summaryrefslogtreecommitdiff
path: root/src/mesa/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers')
-rw-r--r--src/mesa/drivers/dri/Makefile.template9
-rw-r--r--src/mesa/drivers/dri/common/dri_util.c13
-rw-r--r--src/mesa/drivers/dri/common/dri_util.h5
-rw-r--r--src/mesa/drivers/dri/i915/i830_context.c1
-rw-r--r--src/mesa/drivers/dri/i915/i915_context.c2
-rw-r--r--src/mesa/drivers/dri/i915/i915_fragprog.c61
-rw-r--r--src/mesa/drivers/dri/i965/Makefile5
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.c9
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h26
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h3
-rw-r--r--src/mesa/drivers/dri/i965/brw_disasm.c80
-rw-r--r--src/mesa/drivers/dri/i965/brw_draw_upload.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.c6
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.h10
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu_emit.c63
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs.cpp1924
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp365
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp391
-rw-r--r--src/mesa/drivers/dri/i965/brw_misc_state.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_optimize.c81
-rw-r--r--src/mesa/drivers/dri/i965/brw_program.c36
-rw-r--r--src/mesa/drivers/dri/i965/brw_state.h1
-rw-r--r--src/mesa/drivers/dri/i965/brw_state_upload.c7
-rw-r--r--src/mesa/drivers/dri/i965/brw_structs.h11
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.c8
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_emit.c134
-rw-r--r--src/mesa/drivers/dri/i965/brw_vtbl.c1
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.c27
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm.h21
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_debug.c8
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_emit.c138
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_fp.c11
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_glsl.c26
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_iz.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_pass0.c2
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_pass1.c6
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_pass2.c4
-rw-r--r--src/mesa/drivers/dri/i965/brw_wm_state.c18
-rw-r--r--src/mesa/drivers/dri/i965/gen6_cc.c2
-rw-r--r--src/mesa/drivers/dri/i965/gen6_wm_state.c71
-rw-r--r--src/mesa/drivers/dri/intel/intel_buffer_objects.c6
-rw-r--r--src/mesa/drivers/dri/intel/intel_chipset.h4
-rw-r--r--src/mesa/drivers/dri/intel/intel_context.c5
-rw-r--r--src/mesa/drivers/dri/intel/intel_context.h14
-rw-r--r--src/mesa/drivers/dri/intel/intel_extensions.c4
-rw-r--r--src/mesa/drivers/dri/intel/intel_fbo.c18
-rw-r--r--src/mesa/drivers/dri/intel/intel_mipmap_tree.c3
-rw-r--r--src/mesa/drivers/dri/intel/intel_regions.c67
-rw-r--r--src/mesa/drivers/dri/intel/intel_regions.h7
-rw-r--r--src/mesa/drivers/dri/intel/intel_screen.c73
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex.h2
-rw-r--r--src/mesa/drivers/dri/nouveau/nouveau_driver.c2
-rw-r--r--src/mesa/drivers/dri/nouveau/nouveau_fbo.c2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/Makefile1
-rwxr-xr-xsrc/mesa/drivers/dri/r300/compiler/SConscript1
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c18
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c231
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_code.h2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler.h5
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c31
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c17
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h6
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c30
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c117
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c8
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c128
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.h36
-rw-r--r--src/mesa/drivers/dri/r300/r300_fragprog_common.c1
-rw-r--r--src/mesa/drivers/dri/r600/Makefile9
-rw-r--r--src/mesa/drivers/dri/r600/evergreen_chip.c1289
-rw-r--r--src/mesa/drivers/dri/r600/evergreen_chip.h516
-rw-r--r--src/mesa/drivers/dri/r600/evergreen_context.c106
-rw-r--r--src/mesa/drivers/dri/r600/evergreen_context.h38
-rw-r--r--src/mesa/drivers/dri/r600/evergreen_diff.h335
-rw-r--r--src/mesa/drivers/dri/r600/evergreen_fragprog.c817
-rw-r--r--src/mesa/drivers/dri/r600/evergreen_fragprog.h77
-rw-r--r--src/mesa/drivers/dri/r600/evergreen_ioctl.c53
-rw-r--r--src/mesa/drivers/dri/r600/evergreen_ioctl.h36
-rw-r--r--src/mesa/drivers/dri/r600/evergreen_off.h881
-rw-r--r--src/mesa/drivers/dri/r600/evergreen_oglprog.c193
-rw-r--r--src/mesa/drivers/dri/r600/evergreen_oglprog.h33
-rw-r--r--src/mesa/drivers/dri/r600/evergreen_render.c937
-rw-r--r--src/mesa/drivers/dri/r600/evergreen_sq.h735
-rw-r--r--src/mesa/drivers/dri/r600/evergreen_state.c1878
-rw-r--r--src/mesa/drivers/dri/r600/evergreen_state.h47
-rw-r--r--src/mesa/drivers/dri/r600/evergreen_tex.c1551
-rw-r--r--src/mesa/drivers/dri/r600/evergreen_tex.h38
-rw-r--r--src/mesa/drivers/dri/r600/evergreen_vertprog.c736
-rw-r--r--src/mesa/drivers/dri/r600/evergreen_vertprog.h109
-rw-r--r--src/mesa/drivers/dri/r600/r600_blit.c9
-rw-r--r--src/mesa/drivers/dri/r600/r600_cmdbuf.c9
-rw-r--r--src/mesa/drivers/dri/r600/r600_cmdbuf.h40
-rw-r--r--src/mesa/drivers/dri/r600/r600_context.c89
-rw-r--r--src/mesa/drivers/dri/r600/r600_context.h50
-rw-r--r--src/mesa/drivers/dri/r600/r600_emit.c65
-rw-r--r--src/mesa/drivers/dri/r600/r600_emit.h10
-rw-r--r--src/mesa/drivers/dri/r600/r600_texstate.c26
-rw-r--r--src/mesa/drivers/dri/r600/r700_assembler.c1700
-rw-r--r--src/mesa/drivers/dri/r600/r700_assembler.h31
-rw-r--r--src/mesa/drivers/dri/r600/r700_chip.c135
-rw-r--r--src/mesa/drivers/dri/r600/r700_chip.h7
-rw-r--r--src/mesa/drivers/dri/r600/r700_fragprog.c33
-rw-r--r--src/mesa/drivers/dri/r600/r700_fragprog.h5
-rw-r--r--src/mesa/drivers/dri/r600/r700_oglprog.c21
-rw-r--r--src/mesa/drivers/dri/r600/r700_render.c1
-rw-r--r--src/mesa/drivers/dri/r600/r700_state.c16
-rw-r--r--src/mesa/drivers/dri/r600/r700_vertprog.c55
-rw-r--r--src/mesa/drivers/dri/r600/r700_vertprog.h5
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_chipset.h45
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_common_context.c5
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_common_context.h2
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_fbo.c4
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c6
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_screen.c57
-rw-r--r--src/mesa/drivers/glslcompiler/Makefile43
-rw-r--r--src/mesa/drivers/glslcompiler/glslcompiler.c436
-rw-r--r--src/mesa/drivers/osmesa/Makefile7
-rw-r--r--src/mesa/drivers/x11/Makefile5
118 files changed, 16693 insertions, 1073 deletions
diff --git a/src/mesa/drivers/dri/Makefile.template b/src/mesa/drivers/dri/Makefile.template
index 8cb25439e4..a00018cafa 100644
--- a/src/mesa/drivers/dri/Makefile.template
+++ b/src/mesa/drivers/dri/Makefile.template
@@ -17,6 +17,7 @@ COMMON_SOURCES = $(COMMON_GALLIUM_SOURCES) \
INCLUDES = $(SHARED_INCLUDES) $(EXPAT_INCLUDES)
OBJECTS = $(C_SOURCES:.c=.o) \
+ $(CXX_SOURCES:.cpp=.o) \
$(ASM_SOURCES:.S=.o)
@@ -33,12 +34,16 @@ SHARED_INCLUDES = \
$(LIBDRM_CFLAGS)
CFLAGS += $(API_DEFINES)
+CXXFLAGS += $(API_DEFINES)
##### RULES #####
.c.o:
$(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@
+.cpp.o:
+ $(CC) -c $(INCLUDES) $(CXXFLAGS) $(DRIVER_DEFINES) $< -o $@
+
.S.o:
$(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@
@@ -54,9 +59,9 @@ lib: symlinks subdirs depend
$(LIBNAME): $(OBJECTS) $(MESA_MODULES) $(EXTRA_MODULES) Makefile \
$(TOP)/src/mesa/drivers/dri/Makefile.template $(TOP)/src/mesa/drivers/dri/common/dri_test.o
- $(MKLIB) -o $@.tmp -noprefix -linker '$(CC)' -ldflags '$(LDFLAGS)' \
+ $(MKLIB) -o $@.tmp -noprefix -linker '$(CXX)' -ldflags '$(LDFLAGS)' \
$(OBJECTS) $(MESA_MODULES) $(EXTRA_MODULES) $(DRI_LIB_DEPS)
- $(CC) $(CFLAGS) -o $@.test $(TOP)/src/mesa/drivers/dri/common/dri_test.o $@.tmp $(DRI_LIB_DEPS)
+ $(CXX) $(CFLAGS) -o $@.test $(TOP)/src/mesa/drivers/dri/common/dri_test.o $@.tmp $(DRI_LIB_DEPS)
@rm -f $@.test
mv -f $@.tmp $@
diff --git a/src/mesa/drivers/dri/common/dri_util.c b/src/mesa/drivers/dri/common/dri_util.c
index dce84ef0de..a581c6663f 100644
--- a/src/mesa/drivers/dri/common/dri_util.c
+++ b/src/mesa/drivers/dri/common/dri_util.c
@@ -32,6 +32,7 @@
#include "drm_sarea.h"
#include "utils.h"
#include "xmlpool.h"
+#include "../glsl/glsl_parser_extras.h"
PUBLIC const char __dri2ConfigOptions[] =
DRI_CONF_BEGIN
@@ -707,6 +708,8 @@ static void driDestroyScreen(__DRIscreen *psp)
* stream open to the X-server anymore.
*/
+ _mesa_destroy_shader_compiler();
+
if (psp->DriverAPI.DestroyScreen)
(*psp->DriverAPI.DestroyScreen)(psp);
@@ -714,6 +717,9 @@ static void driDestroyScreen(__DRIscreen *psp)
(void)drmUnmap((drmAddress)psp->pSAREA, SAREA_MAX);
(void)drmUnmap((drmAddress)psp->pFB, psp->fbSize);
(void)drmCloseOnce(psp->fd);
+ } else {
+ driDestroyOptionCache(&psp->optionCache);
+ driDestroyOptionInfo(&psp->optionInfo);
}
free(psp);
@@ -839,7 +845,6 @@ dri2CreateNewScreen(int scrn, int fd,
static const __DRIextension *emptyExtensionList[] = { NULL };
__DRIscreen *psp;
drmVersionPtr version;
- driOptionCache options;
if (driDriverAPI.InitScreen2 == NULL)
return NULL;
@@ -873,8 +878,10 @@ dri2CreateNewScreen(int scrn, int fd,
psp->DriverAPI = driDriverAPI;
- driParseOptionInfo(&options, __dri2ConfigOptions, __dri2NConfigOptions);
- driParseConfigFiles(&psp->optionCache, &options, psp->myNum, "dri2");
+ driParseOptionInfo(&psp->optionInfo, __dri2ConfigOptions,
+ __dri2NConfigOptions);
+ driParseConfigFiles(&psp->optionCache, &psp->optionInfo, psp->myNum,
+ "dri2");
return psp;
}
diff --git a/src/mesa/drivers/dri/common/dri_util.h b/src/mesa/drivers/dri/common/dri_util.h
index bc647ff813..5096d22cad 100644
--- a/src/mesa/drivers/dri/common/dri_util.h
+++ b/src/mesa/drivers/dri/common/dri_util.h
@@ -513,7 +513,11 @@ struct __DRIscreenRec {
*
* This pointer is never touched by the DRI layer.
*/
+#ifdef __cplusplus
+ void *priv;
+#else
void *private;
+#endif
/* Extensions provided by the loader. */
const __DRIgetDrawableInfoExtension *getDrawableInfo;
@@ -532,6 +536,7 @@ struct __DRIscreenRec {
/* The lock actually in use, old sarea or DRI2 */
drmLock *lock;
+ driOptionCache optionInfo;
driOptionCache optionCache;
unsigned int api_mask;
};
diff --git a/src/mesa/drivers/dri/i915/i830_context.c b/src/mesa/drivers/dri/i915/i830_context.c
index d52ea9812f..8ddce6d82a 100644
--- a/src/mesa/drivers/dri/i915/i830_context.c
+++ b/src/mesa/drivers/dri/i915/i830_context.c
@@ -27,7 +27,6 @@
#include "i830_context.h"
#include "main/imports.h"
-#include "texmem.h"
#include "tnl/tnl.h"
#include "tnl/t_vertex.h"
#include "tnl/t_context.h"
diff --git a/src/mesa/drivers/dri/i915/i915_context.c b/src/mesa/drivers/dri/i915/i915_context.c
index b3fe1c05d6..d8715cf026 100644
--- a/src/mesa/drivers/dri/i915/i915_context.c
+++ b/src/mesa/drivers/dri/i915/i915_context.c
@@ -174,6 +174,8 @@ i915CreateContext(int api,
ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE;
+ ctx->Shader.EmitNoIfs = GL_TRUE;
+
ctx->Const.MaxDrawBuffers = 1;
_tnl_init_vertices(ctx, ctx->Const.MaxArrayLockSize + 12,
diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c
index f1505dc5e7..4a2e6209d0 100644
--- a/src/mesa/drivers/dri/i915/i915_fragprog.c
+++ b/src/mesa/drivers/dri/i915/i915_fragprog.c
@@ -143,6 +143,20 @@ src_vector(struct i915_fragment_program *p,
}
break;
+ case PROGRAM_OUTPUT:
+ switch (source->Index) {
+ case FRAG_RESULT_COLOR:
+ src = UREG(REG_TYPE_OC, 0);
+ break;
+ case FRAG_RESULT_DEPTH:
+ src = UREG(REG_TYPE_OD, 0);
+ break;
+ default:
+ i915_program_error(p, "Bad source->Index: %d", source->Index);
+ return 0;
+ }
+ break;
+
/* Various paramters and env values. All emitted to
* hardware as program constants.
*/
@@ -472,6 +486,18 @@ upload_program(struct i915_fragment_program *p)
swizzle(tmp, X, X, X, X));
break;
+ case OPCODE_DP2:
+ src0 = src_vector(p, &inst->SrcReg[0], program);
+ src1 = src_vector(p, &inst->SrcReg[1], program);
+ i915_emit_arith(p,
+ A0_DP3,
+ get_result_vector(p, inst),
+ get_result_flags(inst), 0,
+ swizzle(src0, X, Y, ZERO, ZERO),
+ swizzle(src1, X, Y, ZERO, ZERO),
+ 0);
+ break;
+
case OPCODE_DP3:
EMIT_2ARG_ARITH(A0_DP3);
break;
@@ -957,6 +983,41 @@ upload_program(struct i915_fragment_program *p)
0);
break;
+ case OPCODE_SSG:
+ dst = get_result_vector(p, inst);
+ flags = get_result_flags(inst);
+ src0 = src_vector(p, &inst->SrcReg[0], program);
+ tmp = i915_get_utemp(p);
+
+ /* tmp = (src < 0.0) */
+ i915_emit_arith(p,
+ A0_SLT,
+ tmp,
+ flags, 0,
+ src0,
+ swizzle(src0, ZERO, ZERO, ZERO, ZERO),
+ 0);
+
+ /* dst = (0.0 < src) */
+ i915_emit_arith(p,
+ A0_SLT,
+ dst,
+ flags, 0,
+ swizzle(src0, ZERO, ZERO, ZERO, ZERO),
+ src0,
+ 0);
+
+ /* dst = (src > 0.0) - (src < 0.0) */
+ i915_emit_arith(p,
+ A0_ADD,
+ dst,
+ flags, 0,
+ dst,
+ negate(tmp, 1, 1, 1, 1),
+ 0);
+
+ break;
+
case OPCODE_SUB:
src0 = src_vector(p, &inst->SrcReg[0], program);
src1 = src_vector(p, &inst->SrcReg[1], program);
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile
index e381a5c714..bea48e1313 100644
--- a/src/mesa/drivers/dri/i965/Makefile
+++ b/src/mesa/drivers/dri/i965/Makefile
@@ -104,6 +104,11 @@ C_SOURCES = \
$(COMMON_SOURCES) \
$(DRIVER_SOURCES)
+CXX_SOURCES = \
+ brw_fs.cpp \
+ brw_fs_channel_expressions.cpp \
+ brw_fs_vector_splitting.cpp
+
ASM_SOURCES =
DRIVER_DEFINES = -I../intel
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 6d064b822e..d2b20165f9 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -144,7 +144,8 @@ GLboolean brwCreateContext( int api,
brw->CMD_VF_STATISTICS = CMD_VF_STATISTICS_GM45;
brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_GM45;
brw->has_surface_tile_offset = GL_TRUE;
- brw->has_compr4 = GL_TRUE;
+ if (intel->gen < 6)
+ brw->has_compr4 = GL_TRUE;
brw->has_aa_line_parameters = GL_TRUE;
brw->has_pln = GL_TRUE;
} else {
@@ -153,7 +154,11 @@ GLboolean brwCreateContext( int api,
}
/* WM maximum threads is number of EUs times number of threads per EU. */
- if (intel->gen == 5) {
+ if (intel->gen >= 6) {
+ brw->urb.size = 1024;
+ brw->vs_max_threads = 60;
+ brw->wm_max_threads = 80;
+ } else if (intel->gen == 5) {
brw->urb.size = 1024;
brw->vs_max_threads = 72;
brw->wm_max_threads = 12 * 6;
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index cc4e6638e8..703a7de78d 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -179,6 +179,16 @@ struct brw_fragment_program {
GLbitfield tex_units_used;
};
+struct brw_shader {
+ struct gl_shader base;
+
+ /** Shader IR transformed for native compile, at link time. */
+ struct exec_list *ir;
+};
+
+struct brw_shader_program {
+ struct gl_shader_program base;
+};
/* Data about a particular attempt to compile a program. Note that
* there can be many of these, each in a different GL state
@@ -654,7 +664,13 @@ struct brw_context
drm_intel_bo *prog_bo;
drm_intel_bo *state_bo;
- drm_intel_bo *const_bo;
+ drm_intel_bo *const_bo; /* pull constant buffer. */
+ /**
+ * This is the push constant BO on gen6.
+ *
+ * Pre-gen6, push constants live in the CURBE.
+ */
+ drm_intel_bo *push_const_bo;
} wm;
@@ -686,7 +702,13 @@ struct brw_context
#define BRW_PACKCOLOR8888(r,g,b,a) ((r<<24) | (g<<16) | (b<<8) | a)
-
+struct brw_instruction_info {
+ char *name;
+ int nsrc;
+ int ndst;
+ GLboolean is_arith;
+};
+extern const struct brw_instruction_info brw_opcodes[128];
/*======================================================================
* brw_vtbl.c
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index f7a68cead7..6b8e9e05d0 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -686,6 +686,9 @@
#define BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5 1
#define BRW_SAMPLER_MESSAGE_SAMPLE_LOD_GEN5 2
#define BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5 3
+#define BRW_SAMPLER_MESSAGE_SAMPLE_DERIVS_GEN5 4
+#define BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5 5
+#define BRW_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE_GEN5 6
/* for GEN5 only */
#define BRW_SAMPLER_SIMD_MODE_SIMD4X2 0
diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c
index d230714536..f74a236834 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -159,6 +159,11 @@ char *saturate[2] = {
[1] = ".sat"
};
+char *accwr[2] = {
+ [0] = "",
+ [1] = "AccWrEnable"
+};
+
char *exec_size[8] = {
[0] = "1",
[1] = "2",
@@ -206,6 +211,7 @@ char *compr_ctrl[4] = {
[0] = "",
[1] = "sechalf",
[2] = "compr",
+ [3] = "compr4",
};
char *dep_ctrl[4] = {
@@ -235,6 +241,16 @@ char *reg_encoding[8] = {
[7] = "F"
};
+int reg_type_size[8] = {
+ [0] = 4,
+ [1] = 4,
+ [2] = 2,
+ [3] = 2,
+ [4] = 1,
+ [5] = 1,
+ [7] = 4
+};
+
char *imm_encoding[8] = {
[0] = "UD",
[1] = "D",
@@ -423,6 +439,11 @@ static int print_opcode (FILE *file, int id)
static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr)
{
int err = 0;
+
+ /* Clear the Compr4 instruction compression bit. */
+ if (_reg_file == BRW_MESSAGE_REGISTER_FILE)
+ _reg_nr &= ~(1 << 7);
+
if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) {
switch (_reg_nr & 0xf0) {
case BRW_ARF_NULL:
@@ -476,7 +497,8 @@ static int dest (FILE *file, struct brw_instruction *inst)
if (err == -1)
return 0;
if (inst->bits1.da1.dest_subreg_nr)
- format (file, ".%d", inst->bits1.da1.dest_subreg_nr);
+ format (file, ".%d", inst->bits1.da1.dest_subreg_nr /
+ reg_type_size[inst->bits1.da1.dest_reg_type]);
format (file, "<%d>", inst->bits1.da1.dest_horiz_stride);
err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da1.dest_reg_type, NULL);
}
@@ -484,7 +506,8 @@ static int dest (FILE *file, struct brw_instruction *inst)
{
string (file, "g[a0");
if (inst->bits1.ia1.dest_subreg_nr)
- format (file, ".%d", inst->bits1.ia1.dest_subreg_nr);
+ format (file, ".%d", inst->bits1.ia1.dest_subreg_nr /
+ reg_type_size[inst->bits1.ia1.dest_reg_type]);
if (inst->bits1.ia1.dest_indirect_offset)
format (file, " %d", inst->bits1.ia1.dest_indirect_offset);
string (file, "]");
@@ -500,7 +523,8 @@ static int dest (FILE *file, struct brw_instruction *inst)
if (err == -1)
return 0;
if (inst->bits1.da16.dest_subreg_nr)
- format (file, ".%d", inst->bits1.da16.dest_subreg_nr);
+ format (file, ".%d", inst->bits1.da16.dest_subreg_nr /
+ reg_type_size[inst->bits1.da16.dest_reg_type]);
string (file, "<1>");
err |= control (file, "writemask", writemask, inst->bits1.da16.dest_writemask, NULL);
err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da16.dest_reg_type, NULL);
@@ -541,7 +565,7 @@ static int src_da1 (FILE *file, GLuint type, GLuint _reg_file,
if (err == -1)
return 0;
if (sub_reg_num)
- format (file, ".%d", sub_reg_num);
+ format (file, ".%d", sub_reg_num / reg_type_size[type]); /* use formal style like spec */
src_align1_region (file, _vert_stride, _width, _horiz_stride);
err |= control (file, "src reg encoding", reg_encoding, type, NULL);
return err;
@@ -595,11 +619,12 @@ static int src_da16 (FILE *file,
if (err == -1)
return 0;
if (_subreg_nr)
- format (file, ".%d", _subreg_nr);
+ /* bit4 for subreg number byte addressing. Make this same meaning as
+ in da1 case, so output looks consistent. */
+ format (file, ".%d", 16 / reg_type_size[_reg_type]);
string (file, "<");
err |= control (file, "vert stride", vert_stride, _vert_stride, NULL);
string (file, ",4,1>");
- err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL);
/*
* Three kinds of swizzle display:
* identity - nothing printed
@@ -863,12 +888,25 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen)
inst->bits3.math.precision, &space);
break;
case BRW_MESSAGE_TARGET_SAMPLER:
- format (file, " (%d, %d, ",
- inst->bits3.sampler.binding_table_index,
- inst->bits3.sampler.sampler);
- err |= control (file, "sampler target format", sampler_target_format,
- inst->bits3.sampler.return_format, NULL);
- string (file, ")");
+ if (gen >= 5) {
+ format (file, " (%d, %d, %d, %d)",
+ inst->bits3.sampler_gen5.binding_table_index,
+ inst->bits3.sampler_gen5.sampler,
+ inst->bits3.sampler_gen5.msg_type,
+ inst->bits3.sampler_gen5.simd_mode);
+ } else if (0 /* FINISHME: is_g4x */) {
+ format (file, " (%d, %d)",
+ inst->bits3.sampler_g4x.binding_table_index,
+ inst->bits3.sampler_g4x.sampler);
+ } else {
+ format (file, " (%d, %d, ",
+ inst->bits3.sampler.binding_table_index,
+ inst->bits3.sampler.sampler);
+ err |= control (file, "sampler target format",
+ sampler_target_format,
+ inst->bits3.sampler.return_format, NULL);
+ string (file, ")");
+ }
break;
case BRW_MESSAGE_TARGET_DATAPORT_READ:
if (gen >= 6) {
@@ -929,6 +967,11 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen)
inst->bits3.urb.used, &space);
err |= control (file, "urb complete", urb_complete,
inst->bits3.urb.complete, &space);
+ if (gen >= 5) {
+ format (file, " mlen %d, rlen %d\n",
+ inst->bits3.urb_gen5.msg_length,
+ inst->bits3.urb_gen5.response_length);
+ }
break;
case BRW_MESSAGE_TARGET_THREAD_SPAWNER:
break;
@@ -957,8 +1000,19 @@ int brw_disasm (FILE *file, struct brw_instruction *inst, int gen)
err |= control(file, "access mode", access_mode, inst->header.access_mode, &space);
err |= control (file, "mask control", mask_ctrl, inst->header.mask_control, &space);
err |= control (file, "dependency control", dep_ctrl, inst->header.dependency_control, &space);
- err |= control (file, "compression control", compr_ctrl, inst->header.compression_control, &space);
+
+ if (inst->header.compression_control == BRW_COMPRESSION_COMPRESSED &&
+ opcode[inst->header.opcode].ndst > 0 &&
+ inst->bits1.da1.dest_reg_file == BRW_MESSAGE_REGISTER_FILE &&
+ inst->bits1.da1.dest_reg_nr & (1 << 7)) {
+ format (file, " compr4");
+ } else {
+ err |= control (file, "compression control", compr_ctrl,
+ inst->header.compression_control, &space);
+ }
err |= control (file, "thread control", thread_ctrl, inst->header.thread_control, &space);
+ if (gen >= 6)
+ err |= control (file, "acc write control", accwr, inst->header.acc_wr_control, &space);
if (inst->header.opcode == BRW_OPCODE_SEND)
err |= control (file, "end of thread", end_of_thread,
inst->bits3.generic.end_of_thread, &space);
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index f07aab86e9..249e874ab1 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -476,7 +476,7 @@ static void brw_emit_vertices(struct brw_context *brw)
if (brw->vb.nr_enabled == 0) {
BEGIN_BATCH(3);
OUT_BATCH((CMD_VERTEX_ELEMENT << 16) | 1);
- if (IS_GEN6(intel->intelScreen->deviceID)) {
+ if (intel->gen >= 6) {
OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) |
GEN6_VE0_VALID |
(BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
@@ -553,7 +553,7 @@ static void brw_emit_vertices(struct brw_context *brw)
break;
}
- if (IS_GEN6(intel->intelScreen->deviceID)) {
+ if (intel->gen >= 6) {
OUT_BATCH((i << GEN6_VE0_INDEX_SHIFT) |
GEN6_VE0_VALID |
(format << BRW_VE0_FORMAT_SHIFT) |
diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c
index 4e7c1226ad..2ff39e8e64 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.c
+++ b/src/mesa/drivers/dri/i965/brw_eu.c
@@ -85,6 +85,12 @@ void brw_set_saturate( struct brw_compile *p, GLuint value )
p->current->header.saturate = value;
}
+void brw_set_acc_write_control(struct brw_compile *p, GLuint value)
+{
+ if (p->brw->intel.gen >= 6)
+ p->current->header.acc_wr_control = value;
+}
+
void brw_push_insn_state( struct brw_compile *p )
{
assert(p->current != &p->stack[BRW_EU_MAX_INSN_STACK-1]);
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index ffdddd0a38..c63db16460 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -633,6 +633,8 @@ static INLINE struct brw_reg brw_swizzle( struct brw_reg reg,
GLuint z,
GLuint w)
{
+ assert(reg.file != BRW_IMMEDIATE_VALUE);
+
reg.dw1.bits.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(reg.dw1.bits.swizzle, x),
BRW_GET_SWZ(reg.dw1.bits.swizzle, y),
BRW_GET_SWZ(reg.dw1.bits.swizzle, z),
@@ -650,6 +652,7 @@ static INLINE struct brw_reg brw_swizzle1( struct brw_reg reg,
static INLINE struct brw_reg brw_writemask( struct brw_reg reg,
GLuint mask )
{
+ assert(reg.file != BRW_IMMEDIATE_VALUE);
reg.dw1.bits.writemask &= mask;
return reg;
}
@@ -657,6 +660,7 @@ static INLINE struct brw_reg brw_writemask( struct brw_reg reg,
static INLINE struct brw_reg brw_set_writemask( struct brw_reg reg,
GLuint mask )
{
+ assert(reg.file != BRW_IMMEDIATE_VALUE);
reg.dw1.bits.writemask = mask;
return reg;
}
@@ -766,6 +770,7 @@ void brw_set_compression_control( struct brw_compile *p, GLboolean control );
void brw_set_predicate_control_flag_value( struct brw_compile *p, GLuint value );
void brw_set_predicate_control( struct brw_compile *p, GLuint pc );
void brw_set_conditionalmod( struct brw_compile *p, GLuint conditional );
+void brw_set_acc_write_control(struct brw_compile *p, GLuint value);
void brw_init_compile( struct brw_context *, struct brw_compile *p );
const GLuint *brw_get_program( struct brw_compile *p, GLuint *sz );
@@ -840,6 +845,7 @@ void brw_ff_sync(struct brw_compile *p,
GLboolean eot);
void brw_fb_WRITE(struct brw_compile *p,
+ int dispatch_width,
struct brw_reg dest,
GLuint msg_reg_nr,
struct brw_reg src0,
@@ -925,8 +931,8 @@ struct brw_instruction *brw_DO(struct brw_compile *p,
struct brw_instruction *brw_WHILE(struct brw_compile *p,
struct brw_instruction *patch_insn);
-struct brw_instruction *brw_BREAK(struct brw_compile *p);
-struct brw_instruction *brw_CONT(struct brw_compile *p);
+struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count);
+struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count);
/* Forward jumps:
*/
void brw_land_fwd_jump(struct brw_compile *p,
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 0d5d17f501..0906150613 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -75,6 +75,8 @@ static void brw_set_dest( struct brw_instruction *insn,
else {
insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
+ /* even ignored in da16, still need to set as '01' */
+ insn->bits1.da16.dest_horiz_stride = 1;
}
}
else {
@@ -90,6 +92,8 @@ static void brw_set_dest( struct brw_instruction *insn,
}
else {
insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
+ /* even ignored in da16, still need to set as '01' */
+ insn->bits1.ia16.dest_horiz_stride = 1;
}
}
@@ -368,9 +372,23 @@ static void brw_set_dp_write_message( struct brw_context *brw,
GLuint send_commit_msg)
{
struct intel_context *intel = &brw->intel;
- brw_set_src1(insn, brw_imm_d(0));
+ brw_set_src1(insn, brw_imm_ud(0));
- if (intel->gen == 5) {
+ if (intel->gen >= 6) {
+ insn->bits3.dp_render_cache.binding_table_index = binding_table_index;
+ insn->bits3.dp_render_cache.msg_control = msg_control;
+ insn->bits3.dp_render_cache.pixel_scoreboard_clear = pixel_scoreboard_clear;
+ insn->bits3.dp_render_cache.msg_type = msg_type;
+ insn->bits3.dp_render_cache.send_commit_msg = send_commit_msg;
+ insn->bits3.dp_render_cache.header_present = 0; /* XXX */
+ insn->bits3.dp_render_cache.response_length = response_length;
+ insn->bits3.dp_render_cache.msg_length = msg_length;
+ insn->bits3.dp_render_cache.end_of_thread = end_of_thread;
+ insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
+ /* XXX really need below? */
+ insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
+ insn->bits2.send_gen5.end_of_thread = end_of_thread;
+ } else if (intel->gen == 5) {
insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
insn->bits3.dp_write_gen5.msg_control = msg_control;
insn->bits3.dp_write_gen5.pixel_scoreboard_clear = pixel_scoreboard_clear;
@@ -759,7 +777,7 @@ void brw_ENDIF(struct brw_compile *p,
}
}
-struct brw_instruction *brw_BREAK(struct brw_compile *p)
+struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count)
{
struct brw_instruction *insn;
insn = next_insn(p, BRW_OPCODE_BREAK);
@@ -770,10 +788,11 @@ struct brw_instruction *brw_BREAK(struct brw_compile *p)
insn->header.execution_size = BRW_EXECUTE_8;
/* insn->header.mask_control = BRW_MASK_DISABLE; */
insn->bits3.if_else.pad0 = 0;
+ insn->bits3.if_else.pop_count = pop_count;
return insn;
}
-struct brw_instruction *brw_CONT(struct brw_compile *p)
+struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count)
{
struct brw_instruction *insn;
insn = next_insn(p, BRW_OPCODE_CONTINUE);
@@ -784,6 +803,7 @@ struct brw_instruction *brw_CONT(struct brw_compile *p)
insn->header.execution_size = BRW_EXECUTE_8;
/* insn->header.mask_control = BRW_MASK_DISABLE; */
insn->bits3.if_else.pad0 = 0;
+ insn->bits3.if_else.pop_count = pop_count;
return insn;
}
@@ -1332,6 +1352,7 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p,
void brw_fb_WRITE(struct brw_compile *p,
+ int dispatch_width,
struct brw_reg dest,
GLuint msg_reg_nr,
struct brw_reg src0,
@@ -1340,22 +1361,40 @@ void brw_fb_WRITE(struct brw_compile *p,
GLuint response_length,
GLboolean eot)
{
- struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
-
+ struct intel_context *intel = &p->brw->intel;
+ struct brw_instruction *insn;
+ GLuint msg_control, msg_type;
+
+ insn = next_insn(p, BRW_OPCODE_SEND);
insn->header.predicate_control = 0; /* XXX */
- insn->header.compression_control = BRW_COMPRESSION_NONE;
- insn->header.destreg__conditionalmod = msg_reg_nr;
-
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+
+ if (intel->gen >= 6) {
+ /* headerless version, just submit color payload */
+ src0 = brw_message_reg(msg_reg_nr);
+
+ msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE_GEN6;
+ } else {
+ insn->header.destreg__conditionalmod = msg_reg_nr;
+
+ msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
+ }
+
+ if (dispatch_width == 16)
+ msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
+ else
+ msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
+
brw_set_dest(insn, dest);
brw_set_src0(insn, src0);
brw_set_dp_write_message(p->brw,
insn,
binding_table_index,
- BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
- BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
+ msg_control,
+ msg_type,
msg_length,
1, /* pixel scoreboard */
- response_length,
+ response_length,
eot,
0 /* send_commit_msg */);
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
new file mode 100644
index 0000000000..34c5d5262f
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -0,0 +1,1924 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Eric Anholt <eric@anholt.net>
+ *
+ */
+
+extern "C" {
+
+#include <sys/types.h>
+
+#include "main/macros.h"
+#include "main/shaderobj.h"
+#include "program/prog_parameter.h"
+#include "program/prog_print.h"
+#include "program/prog_optimize.h"
+#include "program/hash_table.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+#include "brw_wm.h"
+#include "talloc.h"
+}
+#include "../glsl/glsl_types.h"
+#include "../glsl/ir_optimization.h"
+#include "../glsl/ir_print_visitor.h"
+
+enum register_file {
+ ARF = BRW_ARCHITECTURE_REGISTER_FILE,
+ GRF = BRW_GENERAL_REGISTER_FILE,
+ MRF = BRW_MESSAGE_REGISTER_FILE,
+ IMM = BRW_IMMEDIATE_VALUE,
+ FIXED_HW_REG, /* a struct brw_reg */
+ UNIFORM, /* prog_data->params[hw_reg] */
+ BAD_FILE
+};
+
+enum fs_opcodes {
+ FS_OPCODE_FB_WRITE = 256,
+ FS_OPCODE_RCP,
+ FS_OPCODE_RSQ,
+ FS_OPCODE_SQRT,
+ FS_OPCODE_EXP2,
+ FS_OPCODE_LOG2,
+ FS_OPCODE_POW,
+ FS_OPCODE_SIN,
+ FS_OPCODE_COS,
+ FS_OPCODE_DDX,
+ FS_OPCODE_DDY,
+ FS_OPCODE_LINTERP,
+ FS_OPCODE_TEX,
+ FS_OPCODE_TXB,
+ FS_OPCODE_TXL,
+ FS_OPCODE_DISCARD,
+};
+
+static int using_new_fs = -1;
+
+struct gl_shader *
+brw_new_shader(GLcontext *ctx, GLuint name, GLuint type)
+{
+ struct brw_shader *shader;
+
+ shader = talloc_zero(NULL, struct brw_shader);
+ if (shader) {
+ shader->base.Type = type;
+ shader->base.Name = name;
+ _mesa_init_shader(ctx, &shader->base);
+ }
+
+ return &shader->base;
+}
+
+struct gl_shader_program *
+brw_new_shader_program(GLcontext *ctx, GLuint name)
+{
+ struct brw_shader_program *prog;
+ prog = talloc_zero(NULL, struct brw_shader_program);
+ if (prog) {
+ prog->base.Name = name;
+ _mesa_init_shader_program(ctx, &prog->base);
+ }
+ return &prog->base;
+}
+
+GLboolean
+brw_compile_shader(GLcontext *ctx, struct gl_shader *shader)
+{
+ if (!_mesa_ir_compile_shader(ctx, shader))
+ return GL_FALSE;
+
+ return GL_TRUE;
+}
+
+GLboolean
+brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
+{
+ if (using_new_fs == -1)
+ using_new_fs = getenv("INTEL_NEW_FS") != NULL;
+
+ for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) {
+ struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[i];
+
+ if (using_new_fs && shader->base.Type == GL_FRAGMENT_SHADER) {
+ void *mem_ctx = talloc_new(NULL);
+ bool progress;
+
+ if (shader->ir)
+ talloc_free(shader->ir);
+ shader->ir = new(shader) exec_list;
+ clone_ir_list(mem_ctx, shader->ir, shader->base.ir);
+
+ do_mat_op_to_vec(shader->ir);
+ do_mod_to_fract(shader->ir);
+ do_div_to_mul_rcp(shader->ir);
+ do_sub_to_add_neg(shader->ir);
+ do_explog_to_explog2(shader->ir);
+
+ brw_do_channel_expressions(shader->ir);
+ brw_do_vector_splitting(shader->ir);
+
+ do {
+ progress = false;
+
+ progress = do_common_optimization(shader->ir, true) || progress;
+ } while (progress);
+
+ validate_ir_tree(shader->ir);
+
+ reparent_ir(shader->ir, shader->ir);
+ talloc_free(mem_ctx);
+ }
+ }
+
+ if (!_mesa_ir_link_shader(ctx, prog))
+ return GL_FALSE;
+
+ return GL_TRUE;
+}
+
+static int
+type_size(const struct glsl_type *type)
+{
+ unsigned int size, i;
+
+ switch (type->base_type) {
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_BOOL:
+ return type->components();
+ case GLSL_TYPE_ARRAY:
+ /* FINISHME: uniform/varying arrays. */
+ return type_size(type->fields.array) * type->length;
+ case GLSL_TYPE_STRUCT:
+ size = 0;
+ for (i = 0; i < type->length; i++) {
+ size += type_size(type->fields.structure[i].type);
+ }
+ return size;
+ case GLSL_TYPE_SAMPLER:
+ /* Samplers take up no register space, since they're baked in at
+ * link time.
+ */
+ return 0;
+ default:
+ assert(!"not reached");
+ return 0;
+ }
+}
+
+class fs_reg {
+public:
+ /* Callers of this talloc-based new need not call delete. It's
+ * easier to just talloc_free 'ctx' (or any of its ancestors). */
+ static void* operator new(size_t size, void *ctx)
+ {
+ void *node;
+
+ node = talloc_size(ctx, size);
+ assert(node != NULL);
+
+ return node;
+ }
+
+ /** Generic unset register constructor. */
+ fs_reg()
+ {
+ this->file = BAD_FILE;
+ this->reg = 0;
+ this->reg_offset = 0;
+ this->hw_reg = -1;
+ this->negate = 0;
+ this->abs = 0;
+ }
+
+ /** Immediate value constructor. */
+ fs_reg(float f)
+ {
+ this->file = IMM;
+ this->reg = 0;
+ this->hw_reg = 0;
+ this->type = BRW_REGISTER_TYPE_F;
+ this->imm.f = f;
+ this->negate = 0;
+ this->abs = 0;
+ }
+
+ /** Immediate value constructor. */
+ fs_reg(int32_t i)
+ {
+ this->file = IMM;
+ this->reg = 0;
+ this->hw_reg = 0;
+ this->type = BRW_REGISTER_TYPE_D;
+ this->imm.i = i;
+ this->negate = 0;
+ this->abs = 0;
+ }
+
+ /** Immediate value constructor. */
+ fs_reg(uint32_t u)
+ {
+ this->file = IMM;
+ this->reg = 0;
+ this->hw_reg = 0;
+ this->type = BRW_REGISTER_TYPE_UD;
+ this->imm.u = u;
+ this->negate = 0;
+ this->abs = 0;
+ }
+
+ /** Fixed brw_reg Immediate value constructor. */
+ fs_reg(struct brw_reg fixed_hw_reg)
+ {
+ this->file = FIXED_HW_REG;
+ this->fixed_hw_reg = fixed_hw_reg;
+ this->reg = 0;
+ this->hw_reg = 0;
+ this->type = fixed_hw_reg.type;
+ this->negate = 0;
+ this->abs = 0;
+ }
+
+ fs_reg(enum register_file file, int hw_reg);
+ fs_reg(class fs_visitor *v, const struct glsl_type *type);
+
+ /** Register file: ARF, GRF, MRF, IMM. */
+ enum register_file file;
+ /** Abstract register number. 0 = fixed hw reg */
+ int reg;
+ /** Offset within the abstract register. */
+ int reg_offset;
+ /** HW register number. Generally unset until register allocation. */
+ int hw_reg;
+ /** Register type. BRW_REGISTER_TYPE_* */
+ int type;
+ bool negate;
+ bool abs;
+ struct brw_reg fixed_hw_reg;
+
+ /** Value for file == BRW_IMMMEDIATE_FILE */
+ union {
+ int32_t i;
+ uint32_t u;
+ float f;
+ } imm;
+};
+
+static const fs_reg reg_undef;
+static const fs_reg reg_null(ARF, BRW_ARF_NULL);
+
+class fs_inst : public exec_node {
+public:
+ /* Callers of this talloc-based new need not call delete. It's
+ * easier to just talloc_free 'ctx' (or any of its ancestors). */
+ static void* operator new(size_t size, void *ctx)
+ {
+ void *node;
+
+ node = talloc_zero_size(ctx, size);
+ assert(node != NULL);
+
+ return node;
+ }
+
+ void init()
+ {
+ this->opcode = BRW_OPCODE_NOP;
+ this->saturate = false;
+ this->conditional_mod = BRW_CONDITIONAL_NONE;
+ this->predicated = false;
+ this->sampler = 0;
+ this->shadow_compare = false;
+ }
+
+ fs_inst()
+ {
+ init();
+ }
+
+ fs_inst(int opcode)
+ {
+ init();
+ this->opcode = opcode;
+ }
+
+ fs_inst(int opcode, fs_reg dst, fs_reg src0)
+ {
+ init();
+ this->opcode = opcode;
+ this->dst = dst;
+ this->src[0] = src0;
+ }
+
+ fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1)
+ {
+ init();
+ this->opcode = opcode;
+ this->dst = dst;
+ this->src[0] = src0;
+ this->src[1] = src1;
+ }
+
+ fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)
+ {
+ init();
+ this->opcode = opcode;
+ this->dst = dst;
+ this->src[0] = src0;
+ this->src[1] = src1;
+ this->src[2] = src2;
+ }
+
+ int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
+ fs_reg dst;
+ fs_reg src[3];
+ bool saturate;
+ bool predicated;
+ int conditional_mod; /**< BRW_CONDITIONAL_* */
+
+ int mlen; /** SEND message length */
+ int sampler;
+ bool shadow_compare;
+
+ /** @{
+ * Annotation for the generated IR. One of the two can be set.
+ */
+ ir_instruction *ir;
+ const char *annotation;
+ /** @} */
+};
+
+class fs_visitor : public ir_visitor
+{
+public:
+
+ fs_visitor(struct brw_wm_compile *c, struct brw_shader *shader)
+ {
+ this->c = c;
+ this->p = &c->func;
+ this->brw = p->brw;
+ this->intel = &brw->intel;
+ this->ctx = &intel->ctx;
+ this->mem_ctx = talloc_new(NULL);
+ this->shader = shader;
+ this->fail = false;
+ this->next_abstract_grf = 1;
+ this->variable_ht = hash_table_ctor(0,
+ hash_table_pointer_hash,
+ hash_table_pointer_compare);
+
+ this->frag_color = NULL;
+ this->frag_data = NULL;
+ this->frag_depth = NULL;
+ this->first_non_payload_grf = 0;
+
+ this->current_annotation = NULL;
+ this->annotation_string = NULL;
+ this->annotation_ir = NULL;
+ }
+ ~fs_visitor()
+ {
+ talloc_free(this->mem_ctx);
+ hash_table_dtor(this->variable_ht);
+ }
+
+ fs_reg *variable_storage(ir_variable *var);
+
+ void visit(ir_variable *ir);
+ void visit(ir_assignment *ir);
+ void visit(ir_dereference_variable *ir);
+ void visit(ir_dereference_record *ir);
+ void visit(ir_dereference_array *ir);
+ void visit(ir_expression *ir);
+ void visit(ir_texture *ir);
+ void visit(ir_if *ir);
+ void visit(ir_constant *ir);
+ void visit(ir_swizzle *ir);
+ void visit(ir_return *ir);
+ void visit(ir_loop *ir);
+ void visit(ir_loop_jump *ir);
+ void visit(ir_discard *ir);
+ void visit(ir_call *ir);
+ void visit(ir_function *ir);
+ void visit(ir_function_signature *ir);
+
+ fs_inst *emit(fs_inst inst);
+ void assign_curb_setup();
+ void assign_urb_setup();
+ void assign_regs();
+ void generate_code();
+ void generate_fb_write(fs_inst *inst);
+ void generate_linterp(fs_inst *inst, struct brw_reg dst,
+ struct brw_reg *src);
+ void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
+ void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src);
+ void generate_discard(fs_inst *inst);
+
+ void emit_dummy_fs();
+ void emit_interpolation();
+ void emit_pinterp(int location);
+ void emit_fb_writes();
+
+ struct brw_reg interp_reg(int location, int channel);
+
+ struct brw_context *brw;
+ struct intel_context *intel;
+ GLcontext *ctx;
+ struct brw_wm_compile *c;
+ struct brw_compile *p;
+ struct brw_shader *shader;
+ void *mem_ctx;
+ exec_list instructions;
+ int next_abstract_grf;
+ struct hash_table *variable_ht;
+ ir_variable *frag_color, *frag_data, *frag_depth;
+ int first_non_payload_grf;
+
+ /** @{ debug annotation info */
+ const char *current_annotation;
+ ir_instruction *base_ir;
+ const char **annotation_string;
+ ir_instruction **annotation_ir;
+ /** @} */
+
+ bool fail;
+
+ /* Result of last visit() method. */
+ fs_reg result;
+
+ fs_reg pixel_x;
+ fs_reg pixel_y;
+ fs_reg pixel_w;
+ fs_reg delta_x;
+ fs_reg delta_y;
+ fs_reg interp_attrs[64];
+
+ int grf_used;
+
+};
+
+/** Fixed HW reg constructor. */
+fs_reg::fs_reg(enum register_file file, int hw_reg)
+{
+ this->file = file;
+ this->reg = 0;
+ this->reg_offset = 0;
+ this->hw_reg = hw_reg;
+ this->type = BRW_REGISTER_TYPE_F;
+ this->negate = 0;
+ this->abs = 0;
+}
+
+/** Automatic reg constructor. */
+fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type)
+{
+ this->file = GRF;
+ this->reg = v->next_abstract_grf;
+ this->reg_offset = 0;
+ v->next_abstract_grf += type_size(type);
+ this->hw_reg = -1;
+ this->negate = 0;
+ this->abs = 0;
+
+ switch (type->base_type) {
+ case GLSL_TYPE_FLOAT:
+ this->type = BRW_REGISTER_TYPE_F;
+ break;
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_BOOL:
+ this->type = BRW_REGISTER_TYPE_D;
+ break;
+ case GLSL_TYPE_UINT:
+ this->type = BRW_REGISTER_TYPE_UD;
+ break;
+ default:
+ assert(!"not reached");
+ this->type = BRW_REGISTER_TYPE_F;
+ break;
+ }
+}
+
+fs_reg *
+fs_visitor::variable_storage(ir_variable *var)
+{
+ return (fs_reg *)hash_table_find(this->variable_ht, var);
+}
+
+void
+fs_visitor::visit(ir_variable *ir)
+{
+ fs_reg *reg = NULL;
+
+ if (strcmp(ir->name, "gl_FragColor") == 0) {
+ this->frag_color = ir;
+ } else if (strcmp(ir->name, "gl_FragData") == 0) {
+ this->frag_data = ir;
+ } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
+ this->frag_depth = ir;
+ assert(!"FINISHME: this hangs currently.");
+ }
+
+ if (ir->mode == ir_var_in) {
+ reg = &this->interp_attrs[ir->location];
+ }
+
+ if (ir->mode == ir_var_uniform) {
+ const float *vec_values;
+ int param_index = c->prog_data.nr_params;
+
+ /* FINISHME: This is wildly incomplete. */
+ assert(ir->type->is_scalar() || ir->type->is_vector() ||
+ ir->type->is_sampler());
+
+ const struct gl_program *fp = &this->brw->fragment_program->Base;
+ /* Our support for uniforms is piggy-backed on the struct
+ * gl_fragment_program, because that's where the values actually
+ * get stored, rather than in some global gl_shader_program uniform
+ * store.
+ */
+ vec_values = fp->Parameters->ParameterValues[ir->location];
+ for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
+ c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i];
+ }
+
+ reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index);
+ }
+
+ if (!reg)
+ reg = new(this->mem_ctx) fs_reg(this, ir->type);
+
+ hash_table_insert(this->variable_ht, reg, ir);
+}
+
+void
+fs_visitor::visit(ir_dereference_variable *ir)
+{
+ fs_reg *reg = variable_storage(ir->var);
+ this->result = *reg;
+}
+
+void
+fs_visitor::visit(ir_dereference_record *ir)
+{
+ assert(!"FINISHME");
+}
+
+void
+fs_visitor::visit(ir_dereference_array *ir)
+{
+ ir_constant *index;
+ int element_size;
+
+ ir->array->accept(this);
+ index = ir->array_index->as_constant();
+
+ if (ir->type->is_matrix()) {
+ element_size = ir->type->vector_elements;
+ } else {
+ element_size = type_size(ir->type);
+ }
+
+ if (index) {
+ assert(this->result.file == UNIFORM ||
+ (this->result.file == GRF &&
+ this->result.reg != 0));
+ this->result.reg_offset += index->value.i[0] * element_size;
+ } else {
+ assert(!"FINISHME: non-constant matrix column");
+ }
+}
+
+void
+fs_visitor::visit(ir_expression *ir)
+{
+ unsigned int operand;
+ fs_reg op[2], temp;
+ fs_reg result;
+ fs_inst *inst;
+
+ for (operand = 0; operand < ir->get_num_operands(); operand++) {
+ ir->operands[operand]->accept(this);
+ if (this->result.file == BAD_FILE) {
+ ir_print_visitor v;
+ printf("Failed to get tree for expression operand:\n");
+ ir->operands[operand]->accept(&v);
+ this->fail = true;
+ }
+ op[operand] = this->result;
+
+ /* Matrix expression operands should have been broken down to vector
+ * operations already.
+ */
+ assert(!ir->operands[operand]->type->is_matrix());
+ /* And then those vector operands should have been broken down to scalar.
+ */
+ assert(!ir->operands[operand]->type->is_vector());
+ }
+
+ /* Storage for our result. If our result goes into an assignment, it will
+ * just get copy-propagated out, so no worries.
+ */
+ this->result = fs_reg(this, ir->type);
+
+ switch (ir->operation) {
+ case ir_unop_logic_not:
+ emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], fs_reg(-1)));
+ break;
+ case ir_unop_neg:
+ op[0].negate = ~op[0].negate;
+ this->result = op[0];
+ break;
+ case ir_unop_abs:
+ op[0].abs = true;
+ this->result = op[0];
+ break;
+ case ir_unop_sign:
+ temp = fs_reg(this, ir->type);
+
+ emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(0.0f)));
+
+ inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f)));
+ inst->conditional_mod = BRW_CONDITIONAL_G;
+ inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(1.0f)));
+ inst->predicated = true;
+
+ inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f)));
+ inst->conditional_mod = BRW_CONDITIONAL_L;
+ inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f)));
+ inst->predicated = true;
+
+ break;
+ case ir_unop_rcp:
+ emit(fs_inst(FS_OPCODE_RCP, this->result, op[0]));
+ break;
+
+ case ir_unop_exp2:
+ emit(fs_inst(FS_OPCODE_EXP2, this->result, op[0]));
+ break;
+ case ir_unop_log2:
+ emit(fs_inst(FS_OPCODE_LOG2, this->result, op[0]));
+ break;
+ case ir_unop_exp:
+ case ir_unop_log:
+ assert(!"not reached: should be handled by ir_explog_to_explog2");
+ break;
+ case ir_unop_sin:
+ emit(fs_inst(FS_OPCODE_SIN, this->result, op[0]));
+ break;
+ case ir_unop_cos:
+ emit(fs_inst(FS_OPCODE_COS, this->result, op[0]));
+ break;
+
+ case ir_unop_dFdx:
+ emit(fs_inst(FS_OPCODE_DDX, this->result, op[0]));
+ break;
+ case ir_unop_dFdy:
+ emit(fs_inst(FS_OPCODE_DDY, this->result, op[0]));
+ break;
+
+ case ir_binop_add:
+ emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1]));
+ break;
+ case ir_binop_sub:
+ assert(!"not reached: should be handled by ir_sub_to_add_neg");
+ break;
+
+ case ir_binop_mul:
+ emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1]));
+ break;
+ case ir_binop_div:
+ assert(!"not reached: should be handled by ir_div_to_mul_rcp");
+ break;
+ case ir_binop_mod:
+ assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
+ break;
+
+ case ir_binop_less:
+ inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
+ inst->conditional_mod = BRW_CONDITIONAL_L;
+ emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
+ break;
+ case ir_binop_greater:
+ inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
+ inst->conditional_mod = BRW_CONDITIONAL_G;
+ emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
+ break;
+ case ir_binop_lequal:
+ inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
+ inst->conditional_mod = BRW_CONDITIONAL_LE;
+ emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
+ break;
+ case ir_binop_gequal:
+ inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
+ inst->conditional_mod = BRW_CONDITIONAL_GE;
+ emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
+ break;
+ case ir_binop_equal:
+ inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
+ inst->conditional_mod = BRW_CONDITIONAL_Z;
+ emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
+ break;
+ case ir_binop_nequal:
+ inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
+ break;
+
+ case ir_binop_logic_xor:
+ emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1]));
+ break;
+
+ case ir_binop_logic_or:
+ emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1]));
+ break;
+
+ case ir_binop_logic_and:
+ emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1]));
+ break;
+
+ case ir_binop_dot:
+ case ir_binop_cross:
+ case ir_unop_any:
+ assert(!"not reached: should be handled by brw_channel_expressions");
+ break;
+
+ case ir_unop_sqrt:
+ emit(fs_inst(FS_OPCODE_SQRT, this->result, op[0]));
+ break;
+
+ case ir_unop_rsq:
+ emit(fs_inst(FS_OPCODE_RSQ, this->result, op[0]));
+ break;
+
+ case ir_unop_i2f:
+ case ir_unop_b2f:
+ case ir_unop_b2i:
+ emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
+ break;
+ case ir_unop_f2i:
+ emit(fs_inst(BRW_OPCODE_RNDZ, this->result, op[0]));
+ break;
+ case ir_unop_f2b:
+ case ir_unop_i2b:
+ inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f)));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+
+ case ir_unop_trunc:
+ emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
+ break;
+ case ir_unop_ceil:
+ op[0].negate = ~op[0].negate;
+ inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
+ this->result.negate = true;
+ break;
+ case ir_unop_floor:
+ inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
+ break;
+ case ir_unop_fract:
+ inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0]));
+ break;
+
+ case ir_binop_min:
+ inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
+ inst->conditional_mod = BRW_CONDITIONAL_L;
+
+ inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
+ inst->predicated = true;
+ break;
+ case ir_binop_max:
+ inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
+ inst->conditional_mod = BRW_CONDITIONAL_G;
+
+ inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
+ inst->predicated = true;
+ break;
+
+ case ir_binop_pow:
+ inst = emit(fs_inst(FS_OPCODE_POW, this->result, op[0], op[1]));
+ break;
+
+ case ir_unop_bit_not:
+ case ir_unop_u2f:
+ case ir_binop_lshift:
+ case ir_binop_rshift:
+ case ir_binop_bit_and:
+ case ir_binop_bit_xor:
+ case ir_binop_bit_or:
+ assert(!"GLSL 1.30 features unsupported");
+ break;
+ }
+}
+
+void
+fs_visitor::visit(ir_assignment *ir)
+{
+ struct fs_reg l, r;
+ int i;
+ int write_mask;
+ fs_inst *inst;
+
+ /* FINISHME: arrays on the lhs */
+ ir->lhs->accept(this);
+ l = this->result;
+
+ ir->rhs->accept(this);
+ r = this->result;
+
+ /* FINISHME: This should really set to the correct maximal writemask for each
+ * FINISHME: component written (in the loops below). This case can only
+ * FINISHME: occur for matrices, arrays, and structures.
+ */
+ if (ir->write_mask == 0) {
+ assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
+ write_mask = WRITEMASK_XYZW;
+ } else {
+ assert(ir->lhs->type->is_vector() || ir->lhs->type->is_scalar());
+ write_mask = ir->write_mask;
+ }
+
+ assert(l.file != BAD_FILE);
+ assert(r.file != BAD_FILE);
+
+ if (ir->condition) {
+ /* Get the condition bool into the predicate. */
+ ir->condition->accept(this);
+ inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, fs_reg(0)));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ }
+
+ for (i = 0; i < type_size(ir->lhs->type); i++) {
+ if (i >= 4 || (write_mask & (1 << i))) {
+ inst = emit(fs_inst(BRW_OPCODE_MOV, l, r));
+ if (ir->condition)
+ inst->predicated = true;
+ }
+ l.reg_offset++;
+ r.reg_offset++;
+ }
+}
+
+void
+fs_visitor::visit(ir_texture *ir)
+{
+ int base_mrf = 2;
+ fs_inst *inst = NULL;
+ unsigned int mlen = 0;
+
+ ir->coordinate->accept(this);
+ fs_reg coordinate = this->result;
+
+ if (ir->projector) {
+ fs_reg inv_proj = fs_reg(this, glsl_type::float_type);
+
+ ir->projector->accept(this);
+ emit(fs_inst(FS_OPCODE_RCP, inv_proj, this->result));
+
+ fs_reg proj_coordinate = fs_reg(this, ir->coordinate->type);
+ for (unsigned int i = 0; i < ir->coordinate->type->vector_elements; i++) {
+ emit(fs_inst(BRW_OPCODE_MUL, proj_coordinate, coordinate, inv_proj));
+ coordinate.reg_offset++;
+ proj_coordinate.reg_offset++;
+ }
+ proj_coordinate.reg_offset = 0;
+
+ coordinate = proj_coordinate;
+ }
+
+ for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) {
+ emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate));
+ coordinate.reg_offset++;
+ }
+
+ /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */
+ if (intel->gen < 5)
+ mlen = 3;
+
+ if (ir->shadow_comparitor) {
+ /* For shadow comparisons, we have to supply u,v,r. */
+ mlen = 3;
+
+ ir->shadow_comparitor->accept(this);
+ emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
+ mlen++;
+ }
+
+ /* Do we ever want to handle writemasking on texture samples? Is it
+ * performance relevant?
+ */
+ fs_reg dst = fs_reg(this, glsl_type::vec4_type);
+
+ switch (ir->op) {
+ case ir_tex:
+ inst = emit(fs_inst(FS_OPCODE_TEX, dst, fs_reg(MRF, base_mrf)));
+ break;
+ case ir_txb:
+ ir->lod_info.bias->accept(this);
+ emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
+ mlen++;
+
+ inst = emit(fs_inst(FS_OPCODE_TXB, dst, fs_reg(MRF, base_mrf)));
+ break;
+ case ir_txl:
+ ir->lod_info.lod->accept(this);
+ emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
+ mlen++;
+
+ inst = emit(fs_inst(FS_OPCODE_TXL, dst, fs_reg(MRF, base_mrf)));
+ break;
+ case ir_txd:
+ case ir_txf:
+ assert(!"GLSL 1.30 features unsupported");
+ break;
+ }
+
+ this->result = dst;
+
+ if (ir->shadow_comparitor)
+ inst->shadow_compare = true;
+ inst->mlen = mlen;
+}
+
+void
+fs_visitor::visit(ir_swizzle *ir)
+{
+ ir->val->accept(this);
+ fs_reg val = this->result;
+
+ fs_reg result = fs_reg(this, ir->type);
+ this->result = result;
+
+ for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
+ fs_reg channel = val;
+ int swiz = 0;
+
+ switch (i) {
+ case 0:
+ swiz = ir->mask.x;
+ break;
+ case 1:
+ swiz = ir->mask.y;
+ break;
+ case 2:
+ swiz = ir->mask.z;
+ break;
+ case 3:
+ swiz = ir->mask.w;
+ break;
+ }
+
+ channel.reg_offset += swiz;
+ emit(fs_inst(BRW_OPCODE_MOV, result, channel));
+ result.reg_offset++;
+ }
+}
+
+void
+fs_visitor::visit(ir_discard *ir)
+{
+ assert(ir->condition == NULL); /* FINISHME */
+
+ emit(fs_inst(FS_OPCODE_DISCARD));
+}
+
+void
+fs_visitor::visit(ir_constant *ir)
+{
+ fs_reg reg(this, ir->type);
+ this->result = reg;
+
+ for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
+ switch (ir->type->base_type) {
+ case GLSL_TYPE_FLOAT:
+ emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i])));
+ break;
+ case GLSL_TYPE_UINT:
+ emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i])));
+ break;
+ case GLSL_TYPE_INT:
+ emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i])));
+ break;
+ case GLSL_TYPE_BOOL:
+ emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i])));
+ break;
+ default:
+ assert(!"Non-float/uint/int/bool constant");
+ }
+ reg.reg_offset++;
+ }
+}
+
+void
+fs_visitor::visit(ir_if *ir)
+{
+ fs_inst *inst;
+
+ /* Don't point the annotation at the if statement, because then it plus
+ * the then and else blocks get printed.
+ */
+ this->base_ir = ir->condition;
+
+ /* Generate the condition into the condition code. */
+ ir->condition->accept(this);
+ inst = emit(fs_inst(BRW_OPCODE_MOV, fs_reg(brw_null_reg()), this->result));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+
+ inst = emit(fs_inst(BRW_OPCODE_IF));
+ inst->predicated = true;
+
+ foreach_iter(exec_list_iterator, iter, ir->then_instructions) {
+ ir_instruction *ir = (ir_instruction *)iter.get();
+ this->base_ir = ir;
+
+ ir->accept(this);
+ }
+
+ if (!ir->else_instructions.is_empty()) {
+ emit(fs_inst(BRW_OPCODE_ELSE));
+
+ foreach_iter(exec_list_iterator, iter, ir->else_instructions) {
+ ir_instruction *ir = (ir_instruction *)iter.get();
+ this->base_ir = ir;
+
+ ir->accept(this);
+ }
+ }
+
+ emit(fs_inst(BRW_OPCODE_ENDIF));
+}
+
+void
+fs_visitor::visit(ir_loop *ir)
+{
+ assert(!ir->from);
+ assert(!ir->to);
+ assert(!ir->increment);
+ assert(!ir->counter);
+
+ emit(fs_inst(BRW_OPCODE_DO));
+
+ /* Start a safety counter. If the user messed up their loop
+ * counting, we don't want to hang the GPU.
+ */
+ fs_reg max_iter = fs_reg(this, glsl_type::int_type);
+ emit(fs_inst(BRW_OPCODE_MOV, max_iter, fs_reg(10000)));
+
+ foreach_iter(exec_list_iterator, iter, ir->body_instructions) {
+ ir_instruction *ir = (ir_instruction *)iter.get();
+ fs_inst *inst;
+
+ this->base_ir = ir;
+ ir->accept(this);
+
+ /* Check the maximum loop iters counter. */
+ inst = emit(fs_inst(BRW_OPCODE_ADD, max_iter, max_iter, fs_reg(-1)));
+ inst->conditional_mod = BRW_CONDITIONAL_Z;
+
+ inst = emit(fs_inst(BRW_OPCODE_BREAK));
+ inst->predicated = true;
+ }
+
+ emit(fs_inst(BRW_OPCODE_WHILE));
+}
+
+void
+fs_visitor::visit(ir_loop_jump *ir)
+{
+ switch (ir->mode) {
+ case ir_loop_jump::jump_break:
+ emit(fs_inst(BRW_OPCODE_BREAK));
+ break;
+ case ir_loop_jump::jump_continue:
+ emit(fs_inst(BRW_OPCODE_CONTINUE));
+ break;
+ }
+}
+
+void
+fs_visitor::visit(ir_call *ir)
+{
+ assert(!"FINISHME");
+}
+
+void
+fs_visitor::visit(ir_return *ir)
+{
+ assert(!"FINISHME");
+}
+
+void
+fs_visitor::visit(ir_function *ir)
+{
+ /* Ignore function bodies other than main() -- we shouldn't see calls to
+ * them since they should all be inlined before we get to ir_to_mesa.
+ */
+ if (strcmp(ir->name, "main") == 0) {
+ const ir_function_signature *sig;
+ exec_list empty;
+
+ sig = ir->matching_signature(&empty);
+
+ assert(sig);
+
+ foreach_iter(exec_list_iterator, iter, sig->body) {
+ ir_instruction *ir = (ir_instruction *)iter.get();
+ this->base_ir = ir;
+
+ ir->accept(this);
+ }
+ }
+}
+
+void
+fs_visitor::visit(ir_function_signature *ir)
+{
+ assert(!"not reached");
+ (void)ir;
+}
+
+fs_inst *
+fs_visitor::emit(fs_inst inst)
+{
+ fs_inst *list_inst = new(mem_ctx) fs_inst;
+ *list_inst = inst;
+
+ list_inst->annotation = this->current_annotation;
+ list_inst->ir = this->base_ir;
+
+ this->instructions.push_tail(list_inst);
+
+ return list_inst;
+}
+
+/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
+void
+fs_visitor::emit_dummy_fs()
+{
+ /* Everyone's favorite color. */
+ emit(fs_inst(BRW_OPCODE_MOV,
+ fs_reg(MRF, 2),
+ fs_reg(1.0f)));
+ emit(fs_inst(BRW_OPCODE_MOV,
+ fs_reg(MRF, 3),
+ fs_reg(0.0f)));
+ emit(fs_inst(BRW_OPCODE_MOV,
+ fs_reg(MRF, 4),
+ fs_reg(1.0f)));
+ emit(fs_inst(BRW_OPCODE_MOV,
+ fs_reg(MRF, 5),
+ fs_reg(0.0f)));
+
+ fs_inst *write;
+ write = emit(fs_inst(FS_OPCODE_FB_WRITE,
+ fs_reg(0),
+ fs_reg(0)));
+}
+
+/* The register location here is relative to the start of the URB
+ * data. It will get adjusted to be a real location before
+ * generate_code() time.
+ */
+struct brw_reg
+fs_visitor::interp_reg(int location, int channel)
+{
+ int regnr = location * 2 + channel / 2;
+ int stride = (channel & 1) * 4;
+
+ return brw_vec1_grf(regnr, stride);
+}
+
+/** Emits the interpolation for the varying inputs. */
+void
+fs_visitor::emit_interpolation()
+{
+ struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
+ /* For now, the source regs for the setup URB data will be unset,
+ * since we don't know until codegen how many push constants we'll
+ * use, and therefore what the setup URB offset is.
+ */
+ fs_reg src_reg = reg_undef;
+
+ this->current_annotation = "compute pixel centers";
+ this->pixel_x = fs_reg(this, glsl_type::uint_type);
+ this->pixel_y = fs_reg(this, glsl_type::uint_type);
+ emit(fs_inst(BRW_OPCODE_ADD,
+ this->pixel_x,
+ fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
+ fs_reg(brw_imm_v(0x10101010))));
+ emit(fs_inst(BRW_OPCODE_ADD,
+ this->pixel_y,
+ fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
+ fs_reg(brw_imm_v(0x11001100))));
+
+ this->current_annotation = "compute pixel deltas from v0";
+ this->delta_x = fs_reg(this, glsl_type::float_type);
+ this->delta_y = fs_reg(this, glsl_type::float_type);
+ emit(fs_inst(BRW_OPCODE_ADD,
+ this->delta_x,
+ this->pixel_x,
+ fs_reg(negate(brw_vec1_grf(1, 0)))));
+ emit(fs_inst(BRW_OPCODE_ADD,
+ this->delta_y,
+ this->pixel_y,
+ fs_reg(brw_vec1_grf(1, 1))));
+
+ this->current_annotation = "compute pos.w and 1/pos.w";
+ /* Compute wpos. Unlike many other varying inputs, we usually need it
+ * to produce 1/w, and the varying variable wouldn't show up.
+ */
+ fs_reg wpos = fs_reg(this, glsl_type::vec4_type);
+ this->interp_attrs[FRAG_ATTRIB_WPOS] = wpos;
+ emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x)); /* FINISHME: ARB_fcc */
+ wpos.reg_offset++;
+ emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y)); /* FINISHME: ARB_fcc */
+ wpos.reg_offset++;
+ emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y,
+ interp_reg(FRAG_ATTRIB_WPOS, 2)));
+ wpos.reg_offset++;
+ emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y,
+ interp_reg(FRAG_ATTRIB_WPOS, 3)));
+ /* Compute the pixel W value from wpos.w. */
+ this->pixel_w = fs_reg(this, glsl_type::float_type);
+ emit(fs_inst(FS_OPCODE_RCP, this->pixel_w, wpos));
+
+ /* FINISHME: gl_FrontFacing */
+
+ foreach_iter(exec_list_iterator, iter, *this->shader->ir) {
+ ir_instruction *ir = (ir_instruction *)iter.get();
+ ir_variable *var = ir->as_variable();
+
+ if (!var)
+ continue;
+
+ if (var->mode != ir_var_in)
+ continue;
+
+ /* If it's already set up (WPOS), skip. */
+ if (var->location == 0)
+ continue;
+
+ this->current_annotation = talloc_asprintf(this->mem_ctx,
+ "interpolate %s "
+ "(FRAG_ATTRIB[%d])",
+ var->name,
+ var->location);
+ emit_pinterp(var->location);
+ }
+ this->current_annotation = NULL;
+}
+
+void
+fs_visitor::emit_pinterp(int location)
+{
+ fs_reg interp_attr = fs_reg(this, glsl_type::vec4_type);
+ this->interp_attrs[location] = interp_attr;
+
+ for (unsigned int i = 0; i < 4; i++) {
+ struct brw_reg interp = interp_reg(location, i);
+ emit(fs_inst(FS_OPCODE_LINTERP,
+ interp_attr,
+ this->delta_x,
+ this->delta_y,
+ fs_reg(interp)));
+ interp_attr.reg_offset++;
+ }
+ interp_attr.reg_offset -= 4;
+
+ for (unsigned int i = 0; i < 4; i++) {
+ emit(fs_inst(BRW_OPCODE_MUL,
+ interp_attr,
+ interp_attr,
+ this->pixel_w));
+ interp_attr.reg_offset++;
+ }
+}
+
+void
+fs_visitor::emit_fb_writes()
+{
+ this->current_annotation = "FB write";
+
+ assert(this->frag_color || !"FINISHME: MRT");
+ fs_reg color = *(variable_storage(this->frag_color));
+
+ for (int i = 0; i < 4; i++) {
+ emit(fs_inst(BRW_OPCODE_MOV,
+ fs_reg(MRF, 2 + i),
+ color));
+ color.reg_offset++;
+ }
+
+ emit(fs_inst(FS_OPCODE_FB_WRITE,
+ fs_reg(0),
+ fs_reg(0)));
+
+ this->current_annotation = NULL;
+}
+
+void
+fs_visitor::generate_fb_write(fs_inst *inst)
+{
+ GLboolean eot = 1; /* FINISHME: MRT */
+ /* FINISHME: AADS */
+
+ /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
+ * move, here's g1.
+ */
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MOV(p,
+ brw_message_reg(1),
+ brw_vec8_grf(1, 0));
+ brw_pop_insn_state(p);
+
+ int nr = 2 + 4;
+
+ brw_fb_WRITE(p,
+ 8, /* dispatch_width */
+ retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
+ 0, /* base MRF */
+ retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
+ 0, /* FINISHME: MRT target */
+ nr,
+ 0,
+ eot);
+}
+
+void
+fs_visitor::generate_linterp(fs_inst *inst,
+ struct brw_reg dst, struct brw_reg *src)
+{
+ struct brw_reg delta_x = src[0];
+ struct brw_reg delta_y = src[1];
+ struct brw_reg interp = src[2];
+
+ if (brw->has_pln &&
+ delta_y.nr == delta_x.nr + 1 &&
+ (intel->gen >= 6 || (delta_x.nr & 1) == 0)) {
+ brw_PLN(p, dst, interp, delta_x);
+ } else {
+ brw_LINE(p, brw_null_reg(), interp, delta_x);
+ brw_MAC(p, dst, suboffset(interp, 1), delta_y);
+ }
+}
+
+void
+fs_visitor::generate_math(fs_inst *inst,
+ struct brw_reg dst, struct brw_reg *src)
+{
+ int op;
+
+ switch (inst->opcode) {
+ case FS_OPCODE_RCP:
+ op = BRW_MATH_FUNCTION_INV;
+ break;
+ case FS_OPCODE_RSQ:
+ op = BRW_MATH_FUNCTION_RSQ;
+ break;
+ case FS_OPCODE_SQRT:
+ op = BRW_MATH_FUNCTION_SQRT;
+ break;
+ case FS_OPCODE_EXP2:
+ op = BRW_MATH_FUNCTION_EXP;
+ break;
+ case FS_OPCODE_LOG2:
+ op = BRW_MATH_FUNCTION_LOG;
+ break;
+ case FS_OPCODE_POW:
+ op = BRW_MATH_FUNCTION_POW;
+ break;
+ case FS_OPCODE_SIN:
+ op = BRW_MATH_FUNCTION_SIN;
+ break;
+ case FS_OPCODE_COS:
+ op = BRW_MATH_FUNCTION_COS;
+ break;
+ default:
+ assert(!"not reached: unknown math function");
+ op = 0;
+ break;
+ }
+
+ if (inst->opcode == FS_OPCODE_POW) {
+ brw_MOV(p, brw_message_reg(3), src[1]);
+ }
+
+ brw_math(p, dst,
+ op,
+ inst->saturate ? BRW_MATH_SATURATE_SATURATE :
+ BRW_MATH_SATURATE_NONE,
+ 2, src[0],
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
+}
+
+void
+fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
+{
+ int msg_type = -1;
+ int rlen = 4;
+
+ if (intel->gen == 5) {
+ switch (inst->opcode) {
+ case FS_OPCODE_TEX:
+ if (inst->shadow_compare) {
+ msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5;
+ } else {
+ msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5;
+ }
+ break;
+ case FS_OPCODE_TXB:
+ if (inst->shadow_compare) {
+ msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5;
+ } else {
+ msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5;
+ }
+ break;
+ }
+ } else {
+ switch (inst->opcode) {
+ case FS_OPCODE_TEX:
+ /* Note that G45 and older determines shadow compare and dispatch width
+ * from message length for most messages.
+ */
+ if (inst->shadow_compare) {
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
+ } else {
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
+ }
+ case FS_OPCODE_TXB:
+ if (inst->shadow_compare) {
+ assert(!"FINISHME: shadow compare with bias.");
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
+ } else {
+ msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
+ rlen = 8;
+ }
+ break;
+ }
+ }
+ assert(msg_type != -1);
+
+ /* g0 header. */
+ src.nr--;
+
+ brw_SAMPLE(p,
+ retype(dst, BRW_REGISTER_TYPE_UW),
+ src.nr,
+ retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
+ SURF_INDEX_TEXTURE(inst->sampler),
+ inst->sampler,
+ WRITEMASK_XYZW,
+ msg_type,
+ rlen,
+ inst->mlen + 1,
+ 0,
+ 1,
+ BRW_SAMPLER_SIMD_MODE_SIMD8);
+}
+
+void
+fs_visitor::generate_discard(fs_inst *inst)
+{
+ struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */
+ brw_AND(p, g0, c->emit_mask_reg, g0);
+ brw_pop_insn_state(p);
+}
+
+static void
+trivial_assign_reg(int header_size, fs_reg *reg)
+{
+ if (reg->file == GRF && reg->reg != 0) {
+ reg->hw_reg = header_size + reg->reg - 1 + reg->reg_offset;
+ reg->reg = 0;
+ }
+}
+
+void
+fs_visitor::assign_curb_setup()
+{
+ c->prog_data.first_curbe_grf = c->key.nr_payload_regs;
+ c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8;
+
+ /* Map the offsets in the UNIFORM file to fixed HW regs. */
+ foreach_iter(exec_list_iterator, iter, this->instructions) {
+ fs_inst *inst = (fs_inst *)iter.get();
+
+ for (unsigned int i = 0; i < 3; i++) {
+ if (inst->src[i].file == UNIFORM) {
+ int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
+ struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf +
+ constant_nr / 8,
+ constant_nr % 8);
+
+ inst->src[i].file = FIXED_HW_REG;
+ inst->src[i].fixed_hw_reg = brw_reg;
+ }
+ }
+ }
+}
+
+void
+fs_visitor::assign_urb_setup()
+{
+ int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length;
+ int interp_reg_nr[FRAG_ATTRIB_MAX];
+
+ c->prog_data.urb_read_length = 0;
+
+ /* Figure out where each of the incoming setup attributes lands. */
+ for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) {
+ interp_reg_nr[i] = -1;
+
+ if (i != FRAG_ATTRIB_WPOS &&
+ !(brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i)))
+ continue;
+
+ /* Each attribute is 4 setup channels, each of which is half a reg. */
+ interp_reg_nr[i] = urb_start + c->prog_data.urb_read_length;
+ c->prog_data.urb_read_length += 2;
+ }
+
+ /* Map the register numbers for FS_OPCODE_LINTERP so that it uses
+ * the correct setup input.
+ */
+ foreach_iter(exec_list_iterator, iter, this->instructions) {
+ fs_inst *inst = (fs_inst *)iter.get();
+
+ if (inst->opcode != FS_OPCODE_LINTERP)
+ continue;
+
+ assert(inst->src[2].file == FIXED_HW_REG);
+
+ int location = inst->src[2].fixed_hw_reg.nr / 2;
+ assert(interp_reg_nr[location] != -1);
+ inst->src[2].fixed_hw_reg.nr = (interp_reg_nr[location] +
+ (inst->src[2].fixed_hw_reg.nr & 1));
+ }
+
+ this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length;
+}
+
+void
+fs_visitor::assign_regs()
+{
+ int header_size = this->first_non_payload_grf;
+ int last_grf = 0;
+
+ /* FINISHME: trivial assignment of register numbers */
+ foreach_iter(exec_list_iterator, iter, this->instructions) {
+ fs_inst *inst = (fs_inst *)iter.get();
+
+ trivial_assign_reg(header_size, &inst->dst);
+ trivial_assign_reg(header_size, &inst->src[0]);
+ trivial_assign_reg(header_size, &inst->src[1]);
+
+ last_grf = MAX2(last_grf, inst->dst.hw_reg);
+ last_grf = MAX2(last_grf, inst->src[0].hw_reg);
+ last_grf = MAX2(last_grf, inst->src[1].hw_reg);
+ }
+
+ this->grf_used = last_grf + 1;
+}
+
+static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
+{
+ struct brw_reg brw_reg;
+
+ switch (reg->file) {
+ case GRF:
+ case ARF:
+ case MRF:
+ brw_reg = brw_vec8_reg(reg->file,
+ reg->hw_reg, 0);
+ brw_reg = retype(brw_reg, reg->type);
+ break;
+ case IMM:
+ switch (reg->type) {
+ case BRW_REGISTER_TYPE_F:
+ brw_reg = brw_imm_f(reg->imm.f);
+ break;
+ case BRW_REGISTER_TYPE_D:
+ brw_reg = brw_imm_d(reg->imm.i);
+ break;
+ case BRW_REGISTER_TYPE_UD:
+ brw_reg = brw_imm_ud(reg->imm.u);
+ break;
+ default:
+ assert(!"not reached");
+ break;
+ }
+ break;
+ case FIXED_HW_REG:
+ brw_reg = reg->fixed_hw_reg;
+ break;
+ case BAD_FILE:
+ /* Probably unused. */
+ brw_reg = brw_null_reg();
+ break;
+ case UNIFORM:
+ assert(!"not reached");
+ brw_reg = brw_null_reg();
+ break;
+ }
+ if (reg->abs)
+ brw_reg = brw_abs(brw_reg);
+ if (reg->negate)
+ brw_reg = negate(brw_reg);
+
+ return brw_reg;
+}
+
+void
+fs_visitor::generate_code()
+{
+ unsigned int annotation_len = 0;
+ int last_native_inst = 0;
+ struct brw_instruction *if_stack[16], *loop_stack[16];
+ int if_stack_depth = 0, loop_stack_depth = 0;
+ int if_depth_in_loop[16];
+
+ if_depth_in_loop[loop_stack_depth] = 0;
+
+ memset(&if_stack, 0, sizeof(if_stack));
+ foreach_iter(exec_list_iterator, iter, this->instructions) {
+ fs_inst *inst = (fs_inst *)iter.get();
+ struct brw_reg src[3], dst;
+
+ for (unsigned int i = 0; i < 3; i++) {
+ src[i] = brw_reg_from_fs_reg(&inst->src[i]);
+ }
+ dst = brw_reg_from_fs_reg(&inst->dst);
+
+ brw_set_conditionalmod(p, inst->conditional_mod);
+ brw_set_predicate_control(p, inst->predicated);
+
+ switch (inst->opcode) {
+ case BRW_OPCODE_MOV:
+ brw_MOV(p, dst, src[0]);
+ break;
+ case BRW_OPCODE_ADD:
+ brw_ADD(p, dst, src[0], src[1]);
+ break;
+ case BRW_OPCODE_MUL:
+ brw_MUL(p, dst, src[0], src[1]);
+ break;
+
+ case BRW_OPCODE_FRC:
+ brw_FRC(p, dst, src[0]);
+ break;
+ case BRW_OPCODE_RNDD:
+ brw_RNDD(p, dst, src[0]);
+ break;
+ case BRW_OPCODE_RNDZ:
+ brw_RNDZ(p, dst, src[0]);
+ break;
+
+ case BRW_OPCODE_AND:
+ brw_AND(p, dst, src[0], src[1]);
+ break;
+ case BRW_OPCODE_OR:
+ brw_OR(p, dst, src[0], src[1]);
+ break;
+ case BRW_OPCODE_XOR:
+ brw_XOR(p, dst, src[0], src[1]);
+ break;
+
+ case BRW_OPCODE_CMP:
+ brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
+ break;
+ case BRW_OPCODE_SEL:
+ brw_SEL(p, dst, src[0], src[1]);
+ break;
+
+ case BRW_OPCODE_IF:
+ assert(if_stack_depth < 16);
+ if_stack[if_stack_depth] = brw_IF(p, BRW_EXECUTE_8);
+ if_stack_depth++;
+ break;
+ case BRW_OPCODE_ELSE:
+ if_stack[if_stack_depth - 1] =
+ brw_ELSE(p, if_stack[if_stack_depth - 1]);
+ break;
+ case BRW_OPCODE_ENDIF:
+ if_stack_depth--;
+ brw_ENDIF(p , if_stack[if_stack_depth]);
+ break;
+
+ case BRW_OPCODE_DO:
+ loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
+ if_depth_in_loop[loop_stack_depth] = 0;
+ break;
+
+ case BRW_OPCODE_BREAK:
+ brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ break;
+ case BRW_OPCODE_CONTINUE:
+ brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ break;
+
+ case BRW_OPCODE_WHILE: {
+ struct brw_instruction *inst0, *inst1;
+ GLuint br = 1;
+
+ if (intel->gen == 5)
+ br = 2;
+
+ assert(loop_stack_depth > 0);
+ loop_stack_depth--;
+ inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
+ /* patch all the BREAK/CONT instructions from last BGNLOOP */
+ while (inst0 > loop_stack[loop_stack_depth]) {
+ inst0--;
+ if (inst0->header.opcode == BRW_OPCODE_BREAK &&
+ inst0->bits3.if_else.jump_count == 0) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
+ }
+ else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
+ inst0->bits3.if_else.jump_count == 0) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
+ }
+ }
+ }
+ break;
+
+ case FS_OPCODE_RCP:
+ case FS_OPCODE_RSQ:
+ case FS_OPCODE_SQRT:
+ case FS_OPCODE_EXP2:
+ case FS_OPCODE_LOG2:
+ case FS_OPCODE_POW:
+ case FS_OPCODE_SIN:
+ case FS_OPCODE_COS:
+ generate_math(inst, dst, src);
+ break;
+ case FS_OPCODE_LINTERP:
+ generate_linterp(inst, dst, src);
+ break;
+ case FS_OPCODE_TEX:
+ case FS_OPCODE_TXB:
+ case FS_OPCODE_TXL:
+ generate_tex(inst, dst, src[0]);
+ break;
+ case FS_OPCODE_DISCARD:
+ generate_discard(inst);
+ break;
+ case FS_OPCODE_FB_WRITE:
+ generate_fb_write(inst);
+ break;
+ default:
+ if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
+ _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
+ brw_opcodes[inst->opcode].name);
+ } else {
+ _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode);
+ }
+ this->fail = true;
+ }
+
+ if (annotation_len < p->nr_insn) {
+ annotation_len *= 2;
+ if (annotation_len < 16)
+ annotation_len = 16;
+
+ this->annotation_string = talloc_realloc(this->mem_ctx,
+ annotation_string,
+ const char *,
+ annotation_len);
+ this->annotation_ir = talloc_realloc(this->mem_ctx,
+ annotation_ir,
+ ir_instruction *,
+ annotation_len);
+ }
+
+ for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
+ this->annotation_string[i] = inst->annotation;
+ this->annotation_ir[i] = inst->ir;
+ }
+ last_native_inst = p->nr_insn;
+ }
+}
+
+GLboolean
+brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
+{
+ struct brw_compile *p = &c->func;
+ struct intel_context *intel = &brw->intel;
+ GLcontext *ctx = &intel->ctx;
+ struct brw_shader *shader = NULL;
+ struct gl_shader_program *prog = ctx->Shader.CurrentProgram;
+
+ if (!prog)
+ return GL_FALSE;
+
+ if (!using_new_fs)
+ return GL_FALSE;
+
+ for (unsigned int i = 0; i < prog->_NumLinkedShaders; i++) {
+ if (prog->_LinkedShaders[i]->Type == GL_FRAGMENT_SHADER) {
+ shader = (struct brw_shader *)prog->_LinkedShaders[i];
+ break;
+ }
+ }
+ if (!shader)
+ return GL_FALSE;
+
+ /* We always use 8-wide mode, at least for now. For one, flow
+ * control only works in 8-wide. Also, when we're fragment shader
+ * bound, we're almost always under register pressure as well, so
+ * 8-wide would save us from the performance cliff of spilling
+ * regs.
+ */
+ c->dispatch_width = 8;
+
+ if (INTEL_DEBUG & DEBUG_WM) {
+ printf("GLSL IR for native fragment shader %d:\n", prog->Name);
+ _mesa_print_ir(shader->ir, NULL);
+ printf("\n");
+ }
+
+ /* Now the main event: Visit the shader IR and generate our FS IR for it.
+ */
+ fs_visitor v(c, shader);
+
+ if (0) {
+ v.emit_dummy_fs();
+ } else {
+ v.emit_interpolation();
+
+ /* Generate FS IR for main(). (the visitor only descends into
+ * functions called "main").
+ */
+ foreach_iter(exec_list_iterator, iter, *shader->ir) {
+ ir_instruction *ir = (ir_instruction *)iter.get();
+ v.base_ir = ir;
+ ir->accept(&v);
+ }
+
+ if (v.fail)
+ return GL_FALSE;
+
+ v.emit_fb_writes();
+ v.assign_curb_setup();
+ v.assign_urb_setup();
+ v.assign_regs();
+ }
+
+ v.generate_code();
+
+ if (INTEL_DEBUG & DEBUG_WM) {
+ const char *last_annotation_string = NULL;
+ ir_instruction *last_annotation_ir = NULL;
+
+ printf("Native code for fragment shader %d:\n", prog->Name);
+ for (unsigned int i = 0; i < p->nr_insn; i++) {
+ if (last_annotation_ir != v.annotation_ir[i]) {
+ last_annotation_ir = v.annotation_ir[i];
+ if (last_annotation_ir) {
+ printf(" ");
+ last_annotation_ir->print();
+ printf("\n");
+ }
+ }
+ if (last_annotation_string != v.annotation_string[i]) {
+ last_annotation_string = v.annotation_string[i];
+ if (last_annotation_string)
+ printf(" %s\n", last_annotation_string);
+ }
+ brw_disasm(stdout, &p->store[i], intel->gen);
+ }
+ printf("\n");
+ }
+
+ c->prog_data.total_grf = v.grf_used;
+ c->prog_data.total_scratch = 0;
+
+ return GL_TRUE;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
new file mode 100644
index 0000000000..d8d58a9467
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp
@@ -0,0 +1,365 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file brw_wm_channel_expressions.cpp
+ *
+ * Breaks vector operations down into operations on each component.
+ *
+ * The 965 fragment shader receives 8 or 16 pixels at a time, so each
+ * channel of a vector is laid out as 1 or 2 8-float registers. Each
+ * ALU operation operates on one of those channel registers. As a
+ * result, there is no value to the 965 fragment shader in tracking
+ * "vector" expressions in the sense of GLSL fragment shaders, when
+ * doing a channel at a time may help in constant folding, algebraic
+ * simplification, and reducing the liveness of channel registers.
+ *
+ * The exception to the desire to break everything down to floats is
+ * texturing. The texture sampler returns a writemasked masked
+ * 4/8-register sequence containing the texture values. We don't want
+ * to dispatch to the sampler separately for each channel we need, so
+ * we do retain the vector types in that case.
+ */
+
+extern "C" {
+#include "main/core.h"
+#include "brw_wm.h"
+}
+#include "../glsl/ir.h"
+#include "../glsl/ir_expression_flattening.h"
+#include "../glsl/glsl_types.h"
+
+class ir_channel_expressions_visitor : public ir_hierarchical_visitor {
+public:
+ ir_channel_expressions_visitor()
+ {
+ this->progress = false;
+ this->mem_ctx = NULL;
+ }
+
+ ir_visitor_status visit_leave(ir_assignment *);
+
+ ir_rvalue *get_element(ir_variable *var, unsigned int element);
+ void assign(ir_assignment *ir, int elem, ir_rvalue *val);
+
+ bool progress;
+ void *mem_ctx;
+};
+
+static bool
+channel_expressions_predicate(ir_instruction *ir)
+{
+ ir_expression *expr = ir->as_expression();
+ unsigned int i;
+
+ if (!expr)
+ return false;
+
+ for (i = 0; i < expr->get_num_operands(); i++) {
+ if (expr->operands[i]->type->is_vector())
+ return true;
+ }
+
+ return false;
+}
+
+extern "C" {
+GLboolean
+brw_do_channel_expressions(exec_list *instructions)
+{
+ ir_channel_expressions_visitor v;
+
+ /* Pull out any matrix expression to a separate assignment to a
+ * temp. This will make our handling of the breakdown to
+ * operations on the matrix's vector components much easier.
+ */
+ do_expression_flattening(instructions, channel_expressions_predicate);
+
+ visit_list_elements(&v, instructions);
+
+ return v.progress;
+}
+}
+
+ir_rvalue *
+ir_channel_expressions_visitor::get_element(ir_variable *var, unsigned int elem)
+{
+ ir_dereference *deref;
+
+ if (var->type->is_scalar())
+ return new(mem_ctx) ir_dereference_variable(var);
+
+ assert(elem < var->type->components());
+ deref = new(mem_ctx) ir_dereference_variable(var);
+ return new(mem_ctx) ir_swizzle(deref, elem, 0, 0, 0, 1);
+}
+
+void
+ir_channel_expressions_visitor::assign(ir_assignment *ir, int elem, ir_rvalue *val)
+{
+ ir_dereference *lhs = ir->lhs->clone(mem_ctx, NULL);
+ ir_assignment *assign;
+ ir_swizzle *val_swiz;
+
+ /* This assign-of-expression should have been generated by the
+ * expression flattening visitor (since we never short circit to
+ * not flatten, even for plain assignments of variables), so the
+ * writemask is always full.
+ */
+ assert(ir->write_mask == (1 << ir->lhs->type->components()) - 1);
+
+ /* Smear the float across all the channels for the masked write. */
+ val_swiz = new(mem_ctx) ir_swizzle(val, 0, 0, 0, 0,
+ ir->lhs->type->components());
+ assign = new(mem_ctx) ir_assignment(lhs, val_swiz, NULL, (1 << elem));
+ ir->insert_before(assign);
+}
+
+ir_visitor_status
+ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
+{
+ ir_expression *expr = ir->rhs->as_expression();
+ bool found_vector = false;
+ unsigned int i, vector_elements = 1;
+ ir_variable *op_var[2];
+
+ if (!expr)
+ return visit_continue;
+
+ if (!this->mem_ctx)
+ this->mem_ctx = talloc_parent(ir);
+
+ for (i = 0; i < expr->get_num_operands(); i++) {
+ if (expr->operands[i]->type->is_vector()) {
+ found_vector = true;
+ vector_elements = expr->operands[i]->type->vector_elements;
+ break;
+ }
+ }
+ if (!found_vector)
+ return visit_continue;
+
+ /* Store the expression operands in temps so we can use them
+ * multiple times.
+ */
+ for (i = 0; i < expr->get_num_operands(); i++) {
+ ir_assignment *assign;
+ ir_dereference *deref;
+
+ assert(!expr->operands[i]->type->is_matrix());
+
+ op_var[i] = new(mem_ctx) ir_variable(expr->operands[i]->type,
+ "channel_expressions",
+ ir_var_temporary);
+ ir->insert_before(op_var[i]);
+
+ deref = new(mem_ctx) ir_dereference_variable(op_var[i]);
+ assign = new(mem_ctx) ir_assignment(deref,
+ expr->operands[i],
+ NULL);
+ ir->insert_before(assign);
+ }
+
+ const glsl_type *element_type = glsl_type::get_instance(ir->lhs->type->base_type,
+ 1, 1);
+
+ /* OK, time to break down this vector operation. */
+ switch (expr->operation) {
+ case ir_unop_bit_not:
+ case ir_unop_logic_not:
+ case ir_unop_neg:
+ case ir_unop_abs:
+ case ir_unop_sign:
+ case ir_unop_rcp:
+ case ir_unop_rsq:
+ case ir_unop_sqrt:
+ case ir_unop_exp:
+ case ir_unop_log:
+ case ir_unop_exp2:
+ case ir_unop_log2:
+ case ir_unop_f2i:
+ case ir_unop_i2f:
+ case ir_unop_f2b:
+ case ir_unop_b2f:
+ case ir_unop_i2b:
+ case ir_unop_b2i:
+ case ir_unop_u2f:
+ case ir_unop_trunc:
+ case ir_unop_ceil:
+ case ir_unop_floor:
+ case ir_unop_fract:
+ case ir_unop_sin:
+ case ir_unop_cos:
+ case ir_unop_dFdx:
+ case ir_unop_dFdy:
+ for (i = 0; i < vector_elements; i++) {
+ ir_rvalue *op0 = get_element(op_var[0], i);
+
+ assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
+ element_type,
+ op0,
+ NULL));
+ }
+ break;
+
+ case ir_binop_add:
+ case ir_binop_sub:
+ case ir_binop_mul:
+ case ir_binop_div:
+ case ir_binop_mod:
+ case ir_binop_min:
+ case ir_binop_max:
+ case ir_binop_pow:
+ case ir_binop_lshift:
+ case ir_binop_rshift:
+ case ir_binop_bit_and:
+ case ir_binop_bit_xor:
+ case ir_binop_bit_or:
+ for (i = 0; i < vector_elements; i++) {
+ ir_rvalue *op0 = get_element(op_var[0], i);
+ ir_rvalue *op1 = get_element(op_var[1], i);
+
+ assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
+ element_type,
+ op0,
+ op1));
+ }
+ break;
+
+ case ir_unop_any: {
+ ir_expression *temp;
+ temp = new(mem_ctx) ir_expression(ir_binop_logic_or,
+ element_type,
+ get_element(op_var[0], 0),
+ get_element(op_var[0], 1));
+
+ for (i = 2; i < vector_elements; i++) {
+ temp = new(mem_ctx) ir_expression(ir_binop_logic_or,
+ element_type,
+ get_element(op_var[0], i),
+ temp);
+ }
+ assign(ir, 0, temp);
+ break;
+ }
+
+ case ir_binop_dot: {
+ ir_expression *last = NULL;
+ for (i = 0; i < vector_elements; i++) {
+ ir_rvalue *op0 = get_element(op_var[0], i);
+ ir_rvalue *op1 = get_element(op_var[1], i);
+ ir_expression *temp;
+
+ temp = new(mem_ctx) ir_expression(ir_binop_mul,
+ element_type,
+ op0,
+ op1);
+ if (last) {
+ last = new(mem_ctx) ir_expression(ir_binop_add,
+ element_type,
+ temp,
+ last);
+ } else {
+ last = temp;
+ }
+ }
+ assign(ir, 0, last);
+ break;
+ }
+
+ case ir_binop_cross: {
+ for (i = 0; i < vector_elements; i++) {
+ int swiz0 = (i + 1) % 3;
+ int swiz1 = (i + 2) % 3;
+ ir_expression *temp1, *temp2;
+
+ temp1 = new(mem_ctx) ir_expression(ir_binop_mul,
+ element_type,
+ get_element(op_var[0], swiz0),
+ get_element(op_var[1], swiz1));
+
+ temp2 = new(mem_ctx) ir_expression(ir_binop_mul,
+ element_type,
+ get_element(op_var[1], swiz0),
+ get_element(op_var[0], swiz1));
+
+ temp2 = new(mem_ctx) ir_expression(ir_unop_neg,
+ element_type,
+ temp2,
+ NULL);
+
+ assign(ir, i, new(mem_ctx) ir_expression(ir_binop_add,
+ element_type,
+ temp1, temp2));
+ }
+ break;
+ }
+
+ case ir_binop_less:
+ case ir_binop_greater:
+ case ir_binop_lequal:
+ case ir_binop_gequal:
+ case ir_binop_logic_and:
+ case ir_binop_logic_xor:
+ case ir_binop_logic_or:
+ ir->print();
+ printf("\n");
+ assert(!"not reached: expression operates on scalars only");
+ break;
+ case ir_binop_equal:
+ case ir_binop_nequal: {
+ ir_expression *last = NULL;
+ for (i = 0; i < vector_elements; i++) {
+ ir_rvalue *op0 = get_element(op_var[0], i);
+ ir_rvalue *op1 = get_element(op_var[1], i);
+ ir_expression *temp;
+ ir_expression_operation join;
+
+ if (expr->operation == ir_binop_equal)
+ join = ir_binop_logic_and;
+ else
+ join = ir_binop_logic_or;
+
+ temp = new(mem_ctx) ir_expression(expr->operation,
+ element_type,
+ op0,
+ op1);
+ if (last) {
+ last = new(mem_ctx) ir_expression(join,
+ element_type,
+ temp,
+ last);
+ } else {
+ last = temp;
+ }
+ }
+ assign(ir, 0, last);
+ break;
+ }
+ }
+
+ ir->remove();
+ this->progress = true;
+
+ return visit_continue;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
new file mode 100644
index 0000000000..00d5c20248
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp
@@ -0,0 +1,391 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * \file brw_wm_vector_splitting.cpp
+ *
+ * If a vector is only ever referenced by its components, then
+ * split those components out to individual variables so they can be
+ * handled normally by other optimization passes.
+ *
+ * This skips vectors in uniforms and varyings, which need to be
+ * accessible as vectors for their access by the GL. Also, vector
+ * results of non-variable-derefs in assignments aren't handled
+ * because to do so we would have to store the vector result to a
+ * temporary in order to unload each channel, and to do so would just
+ * loop us back to where we started. For the 965, this is exactly the
+ * behavior we want for the results of texture lookups, but probably not for
+ */
+
+extern "C" {
+#include "main/core.h"
+#include "intel_context.h"
+}
+#include "../glsl/ir.h"
+#include "../glsl/ir_visitor.h"
+#include "../glsl/ir_print_visitor.h"
+#include "../glsl/ir_rvalue_visitor.h"
+#include "../glsl/glsl_types.h"
+
+static bool debug = false;
+
+class variable_entry : public exec_node
+{
+public:
+ variable_entry(ir_variable *var)
+ {
+ this->var = var;
+ this->whole_vector_access = 0;
+ this->declaration = false;
+ this->mem_ctx = NULL;
+ }
+
+ ir_variable *var; /* The key: the variable's pointer. */
+
+ /** Number of times the variable is referenced, including assignments. */
+ unsigned whole_vector_access;
+
+ bool declaration; /* If the variable had a decl in the instruction stream */
+
+ ir_variable *components[4];
+
+ /** talloc_parent(this->var) -- the shader's talloc context. */
+ void *mem_ctx;
+};
+
+class ir_vector_reference_visitor : public ir_hierarchical_visitor {
+public:
+ ir_vector_reference_visitor(void)
+ {
+ this->mem_ctx = talloc_new(NULL);
+ this->variable_list.make_empty();
+ }
+
+ ~ir_vector_reference_visitor(void)
+ {
+ talloc_free(mem_ctx);
+ }
+
+ virtual ir_visitor_status visit(ir_variable *);
+ virtual ir_visitor_status visit(ir_dereference_variable *);
+ virtual ir_visitor_status visit_enter(ir_swizzle *);
+ virtual ir_visitor_status visit_enter(ir_assignment *);
+ virtual ir_visitor_status visit_enter(ir_function_signature *);
+
+ variable_entry *get_variable_entry(ir_variable *var);
+
+ /* List of variable_entry */
+ exec_list variable_list;
+
+ void *mem_ctx;
+};
+
+variable_entry *
+ir_vector_reference_visitor::get_variable_entry(ir_variable *var)
+{
+ assert(var);
+
+ if (!var->type->is_vector())
+ return NULL;
+
+ switch (var->mode) {
+ case ir_var_uniform:
+ case ir_var_in:
+ case ir_var_out:
+ case ir_var_inout:
+ /* Can't split varyings or uniforms. Function in/outs won't get split
+ * either, so don't care about the ambiguity.
+ */
+ return NULL;
+ case ir_var_auto:
+ case ir_var_temporary:
+ break;
+ }
+
+ foreach_iter(exec_list_iterator, iter, this->variable_list) {
+ variable_entry *entry = (variable_entry *)iter.get();
+ if (entry->var == var)
+ return entry;
+ }
+
+ variable_entry *entry = new(mem_ctx) variable_entry(var);
+ this->variable_list.push_tail(entry);
+ return entry;
+}
+
+
+ir_visitor_status
+ir_vector_reference_visitor::visit(ir_variable *ir)
+{
+ variable_entry *entry = this->get_variable_entry(ir);
+
+ if (entry)
+ entry->declaration = true;
+
+ return visit_continue;
+}
+
+ir_visitor_status
+ir_vector_reference_visitor::visit(ir_dereference_variable *ir)
+{
+ ir_variable *const var = ir->var;
+ variable_entry *entry = this->get_variable_entry(var);
+
+ if (entry)
+ entry->whole_vector_access++;
+
+ return visit_continue;
+}
+
+ir_visitor_status
+ir_vector_reference_visitor::visit_enter(ir_swizzle *ir)
+{
+ /* Don't descend into a vector ir_dereference_variable below. */
+ if (ir->val->as_dereference_variable() && ir->type->is_scalar())
+ return visit_continue_with_parent;
+
+ return visit_continue;
+}
+
+ir_visitor_status
+ir_vector_reference_visitor::visit_enter(ir_assignment *ir)
+{
+ if (ir->lhs->as_dereference_variable() &&
+ ir->rhs->as_dereference_variable() &&
+ !ir->condition) {
+ /* We'll split copies of a vector to copies of channels, so don't
+ * descend to the ir_dereference_variables.
+ */
+ return visit_continue_with_parent;
+ }
+ if (ir->lhs->as_dereference_variable() &&
+ is_power_of_two(ir->write_mask) &&
+ !ir->condition) {
+ /* If we're writing just a channel, then channel-splitting the LHS is OK.
+ */
+ ir->rhs->accept(this);
+ return visit_continue_with_parent;
+ }
+ return visit_continue;
+}
+
+ir_visitor_status
+ir_vector_reference_visitor::visit_enter(ir_function_signature *ir)
+{
+ /* We don't want to descend into the function parameters and
+ * split them, so just accept the body here.
+ */
+ visit_list_elements(this, &ir->body);
+ return visit_continue_with_parent;
+}
+
+class ir_vector_splitting_visitor : public ir_rvalue_visitor {
+public:
+ ir_vector_splitting_visitor(exec_list *vars)
+ {
+ this->variable_list = vars;
+ }
+
+ virtual ir_visitor_status visit_leave(ir_assignment *);
+
+ void handle_rvalue(ir_rvalue **rvalue);
+ struct variable_entry *get_splitting_entry(ir_variable *var);
+
+ exec_list *variable_list;
+ void *mem_ctx;
+};
+
+struct variable_entry *
+ir_vector_splitting_visitor::get_splitting_entry(ir_variable *var)
+{
+ assert(var);
+
+ if (!var->type->is_vector())
+ return NULL;
+
+ foreach_iter(exec_list_iterator, iter, *this->variable_list) {
+ variable_entry *entry = (variable_entry *)iter.get();
+ if (entry->var == var) {
+ return entry;
+ }
+ }
+
+ return NULL;
+}
+
+void
+ir_vector_splitting_visitor::handle_rvalue(ir_rvalue **rvalue)
+{
+ if (!*rvalue)
+ return;
+
+ ir_swizzle *swiz = (*rvalue)->as_swizzle();
+ if (!swiz || !swiz->type->is_scalar())
+ return;
+
+ ir_dereference_variable *deref_var = swiz->val->as_dereference_variable();
+ if (!deref_var)
+ return;
+
+ variable_entry *entry = get_splitting_entry(deref_var->var);
+ if (!entry)
+ return;
+
+ ir_variable *var = entry->components[swiz->mask.x];
+ *rvalue = new(entry->mem_ctx) ir_dereference_variable(var);
+}
+
+ir_visitor_status
+ir_vector_splitting_visitor::visit_leave(ir_assignment *ir)
+{
+ ir_dereference_variable *lhs_deref = ir->lhs->as_dereference_variable();
+ ir_dereference_variable *rhs_deref = ir->rhs->as_dereference_variable();
+ variable_entry *lhs = lhs_deref ? get_splitting_entry(lhs_deref->var) : NULL;
+ variable_entry *rhs = rhs_deref ? get_splitting_entry(rhs_deref->var) : NULL;
+
+ if (lhs_deref && rhs_deref && (lhs || rhs) && !ir->condition) {
+ /* Straight assignment of vector variables. */
+ for (unsigned int i = 0; i < ir->rhs->type->vector_elements; i++) {
+ ir_dereference *new_lhs;
+ ir_rvalue *new_rhs;
+ void *mem_ctx = lhs ? lhs->mem_ctx : rhs->mem_ctx;
+ unsigned int writemask;
+
+ if (lhs) {
+ new_lhs = new(mem_ctx) ir_dereference_variable(lhs->components[i]);
+ writemask = (ir->write_mask >> i) & 1;
+ } else {
+ new_lhs = ir->lhs->clone(mem_ctx, NULL);
+ writemask = ir->write_mask & (1 << i);
+ }
+
+ if (rhs) {
+ new_rhs = new(mem_ctx) ir_dereference_variable(rhs->components[i]);
+ /* If we're writing into a writemask, smear it out to that channel. */
+ if (!lhs)
+ new_rhs = new(mem_ctx) ir_swizzle(new_rhs, i, i, i, i, i + 1);
+ } else {
+ new_rhs = new(mem_ctx) ir_swizzle(ir->rhs->clone(mem_ctx, NULL),
+ i, i, i, i, 1);
+ }
+
+ ir->insert_before(new(mem_ctx) ir_assignment(new_lhs,
+ new_rhs,
+ NULL, writemask));
+ }
+ ir->remove();
+ } else if (lhs) {
+ int elem = -1;
+
+ switch (ir->write_mask) {
+ case (1 << 0):
+ elem = 0;
+ break;
+ case (1 << 1):
+ elem = 1;
+ break;
+ case (1 << 2):
+ elem = 2;
+ break;
+ case (1 << 3):
+ elem = 3;
+ break;
+ default:
+ ir->print();
+ assert(!"not reached: non-channelwise dereference of LHS.");
+ }
+
+ ir->lhs = new(mem_ctx) ir_dereference_variable(lhs->components[elem]);
+ ir->write_mask = (1 << 0);
+
+ handle_rvalue(&ir->rhs);
+ ir->rhs = new(mem_ctx) ir_swizzle(ir->rhs,
+ elem, elem, elem, elem, 1);
+ } else {
+ handle_rvalue(&ir->rhs);
+ }
+
+ handle_rvalue(&ir->condition);
+
+ return visit_continue;
+}
+
+extern "C" {
+bool
+brw_do_vector_splitting(exec_list *instructions)
+{
+ ir_vector_reference_visitor refs;
+
+ visit_list_elements(&refs, instructions);
+
+ /* Trim out variables we can't split. */
+ foreach_iter(exec_list_iterator, iter, refs.variable_list) {
+ variable_entry *entry = (variable_entry *)iter.get();
+
+ if (debug) {
+ printf("vector %s@%p: decl %d, whole_access %d\n",
+ entry->var->name, (void *) entry->var, entry->declaration,
+ entry->whole_vector_access);
+ }
+
+ if (!entry->declaration || entry->whole_vector_access) {
+ entry->remove();
+ }
+ }
+
+ if (refs.variable_list.is_empty())
+ return false;
+
+ void *mem_ctx = talloc_new(NULL);
+
+ /* Replace the decls of the vectors to be split with their split
+ * components.
+ */
+ foreach_iter(exec_list_iterator, iter, refs.variable_list) {
+ variable_entry *entry = (variable_entry *)iter.get();
+ const struct glsl_type *type;
+ type = glsl_type::get_instance(entry->var->type->base_type, 1, 1);
+
+ entry->mem_ctx = talloc_parent(entry->var);
+
+ for (unsigned int i = 0; i < entry->var->type->vector_elements; i++) {
+ const char *name = talloc_asprintf(mem_ctx, "%s_%c",
+ entry->var->name,
+ "xyzw"[i]);
+
+ entry->components[i] = new(entry->mem_ctx) ir_variable(type, name,
+ ir_var_temporary);
+ entry->var->insert_before(entry->components[i]);
+ }
+
+ entry->var->remove();
+ }
+
+ ir_vector_splitting_visitor split(&refs.variable_list);
+ visit_list_elements(&split, instructions);
+
+ talloc_free(mem_ctx);
+
+ return true;
+}
+}
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 572175f463..6eeaba7772 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -281,7 +281,7 @@ static void emit_depthbuffer(struct brw_context *brw)
}
assert(region->tiling != I915_TILING_X);
- if (IS_GEN6(intel->intelScreen->deviceID))
+ if (intel->gen >= 6)
assert(region->tiling != I915_TILING_NONE);
BEGIN_BATCH(len);
@@ -295,7 +295,7 @@ static void emit_depthbuffer(struct brw_context *brw)
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
0);
OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) |
- ((region->pitch - 1) << 6) |
+ ((region->width - 1) << 6) |
((region->height - 1) << 19));
OUT_BATCH(0);
diff --git a/src/mesa/drivers/dri/i965/brw_optimize.c b/src/mesa/drivers/dri/i965/brw_optimize.c
index 8aa6fb6cc6..cbed2bd5cb 100644
--- a/src/mesa/drivers/dri/i965/brw_optimize.c
+++ b/src/mesa/drivers/dri/i965/brw_optimize.c
@@ -32,12 +32,7 @@
#include "brw_defines.h"
#include "brw_eu.h"
-static const struct {
- char *name;
- int nsrc;
- int ndst;
- GLboolean is_arith;
-} inst_opcode[128] = {
+const struct brw_instruction_info brw_opcodes[128] = {
[BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1, .is_arith = 1 },
[BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1, .is_arith = 1 },
[BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1, .is_arith = 1 },
@@ -94,7 +89,7 @@ static const struct {
static INLINE
GLboolean brw_is_arithmetic_inst(const struct brw_instruction *inst)
{
- return inst_opcode[inst->header.opcode].is_arith;
+ return brw_opcodes[inst->header.opcode].is_arith;
}
static const GLuint inst_stride[7] = {
@@ -122,7 +117,7 @@ brw_is_grf_written(const struct brw_instruction *inst,
int reg_index, int size,
int gen)
{
- if (inst_opcode[inst->header.opcode].ndst == 0)
+ if (brw_opcodes[inst->header.opcode].ndst == 0)
return GL_FALSE;
if (inst->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT)
@@ -161,20 +156,19 @@ brw_is_grf_written(const struct brw_instruction *inst,
return left < right;
}
-/* Specific path for message register since we need to handle the compr4 case */
-static INLINE GLboolean
-brw_is_mrf_written(const struct brw_instruction *inst, int reg_index, int size)
+static GLboolean
+brw_is_mrf_written_alu(const struct brw_instruction *inst,
+ int reg_index, int size)
{
- if (inst_opcode[inst->header.opcode].ndst == 0)
+ if (brw_opcodes[inst->header.opcode].ndst == 0)
return GL_FALSE;
- if (inst->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT)
- if (inst->bits1.ia1.dest_reg_file == BRW_MESSAGE_REGISTER_FILE)
- return GL_TRUE;
-
if (inst->bits1.da1.dest_reg_file != BRW_MESSAGE_REGISTER_FILE)
return GL_FALSE;
+ if (inst->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT)
+ return GL_TRUE;
+
const int reg_start = reg_index * REG_SIZE;
const int reg_end = reg_start + size;
@@ -188,8 +182,6 @@ brw_is_mrf_written(const struct brw_instruction *inst, int reg_index, int size)
if (is_compr4 && inst->header.execution_size != BRW_EXECUTE_16)
return GL_TRUE;
- GLboolean is_written = GL_FALSE;
-
/* Here we write mrf_{i} and mrf_{i+4}. So we read two times 8 elements */
if (is_compr4) {
const int length = 8 * type_size * inst->bits1.da1.dest_horiz_stride;
@@ -210,7 +202,8 @@ brw_is_mrf_written(const struct brw_instruction *inst, int reg_index, int size)
const int left1 = MAX2(write_start1, reg_start);
const int right1 = MIN2(write_end1, reg_end);
- is_written = left0 < right0 || left1 < right1;
+ if (left0 < right0 || left1 < right1)
+ return GL_TRUE;
}
else {
int length;
@@ -223,25 +216,41 @@ brw_is_mrf_written(const struct brw_instruction *inst, int reg_index, int size)
+ inst->bits1.da1.dest_subreg_nr;
const int write_end = write_start + length;
const int left = MAX2(write_start, reg_start);
- const int right = MIN2(write_end, reg_end);;
+ const int right = MIN2(write_end, reg_end);
- is_written = left < right;
+ if (left < right)
+ return GL_TRUE;
}
- /* SEND may perform an implicit mov to a mrf register */
- if (is_written == GL_FALSE &&
- inst->header.opcode == BRW_OPCODE_SEND &&
- inst->bits1.da1.src0_reg_file != 0) {
+ return GL_FALSE;
+}
- const int mrf_start = inst->header.destreg__conditionalmod;
- const int write_start = mrf_start * REG_SIZE;
- const int write_end = write_start + REG_SIZE;
- const int left = MAX2(write_start, reg_start);
- const int right = MIN2(write_end, reg_end);;
- is_written = left < right;
- }
+/* SEND may perform an implicit mov to a mrf register */
+static GLboolean brw_is_mrf_written_send(const struct brw_instruction *inst,
+ int reg_index, int size)
+{
+
+ const int reg_start = reg_index * REG_SIZE;
+ const int reg_end = reg_start + size;
+ const int mrf_start = inst->header.destreg__conditionalmod;
+ const int write_start = mrf_start * REG_SIZE;
+ const int write_end = write_start + REG_SIZE;
+ const int left = MAX2(write_start, reg_start);
+ const int right = MIN2(write_end, reg_end);
+
+ if (inst->header.opcode != BRW_OPCODE_SEND ||
+ inst->bits1.da1.src0_reg_file == 0)
+ return GL_FALSE;
- return is_written;
+ return left < right;
+}
+
+/* Specific path for message register since we need to handle the compr4 case */
+static INLINE GLboolean
+brw_is_mrf_written(const struct brw_instruction *inst, int reg_index, int size)
+{
+ return (brw_is_mrf_written_alu(inst, reg_index, size) ||
+ brw_is_mrf_written_send(inst, reg_index, size));
}
static INLINE GLboolean
@@ -284,7 +293,7 @@ static INLINE GLboolean
brw_is_grf_read(const struct brw_instruction *inst, int reg_index, int size)
{
int i, j;
- if (inst_opcode[inst->header.opcode].nsrc == 0)
+ if (brw_opcodes[inst->header.opcode].nsrc == 0)
return GL_FALSE;
/* Look at first source. We must take into account register regions to
@@ -292,7 +301,7 @@ brw_is_grf_read(const struct brw_instruction *inst, int reg_index, int size)
* since we do not take into account the fact that some complete registers
* may be skipped
*/
- if (inst_opcode[inst->header.opcode].nsrc >= 1) {
+ if (brw_opcodes[inst->header.opcode].nsrc >= 1) {
if (inst->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT)
if (inst->bits1.ia1.src0_reg_file == BRW_GENERAL_REGISTER_FILE)
@@ -327,7 +336,7 @@ brw_is_grf_read(const struct brw_instruction *inst, int reg_index, int size)
}
/* Second src register */
- if (inst_opcode[inst->header.opcode].nsrc >= 2) {
+ if (brw_opcodes[inst->header.opcode].nsrc >= 2) {
if (inst->bits3.da1.src1_address_mode != BRW_ADDRESS_DIRECT)
if (inst->bits1.ia1.src1_reg_file == BRW_GENERAL_REGISTER_FILE)
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index 4b08d2599b..bc152204a4 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -36,6 +36,7 @@
#include "program/program.h"
#include "program/programopt.h"
#include "tnl/tnl.h"
+#include "talloc.h"
#include "brw_context.h"
#include "brw_wm.h"
@@ -114,10 +115,7 @@ shader_error(GLcontext *ctx, struct gl_program *prog, const char *msg)
shader = _mesa_lookup_shader_program(ctx, prog->Id);
if (shader) {
- if (shader->InfoLog) {
- free(shader->InfoLog);
- }
- shader->InfoLog = _mesa_strdup(msg);
+ shader->InfoLog = talloc_strdup_append(shader->InfoLog, msg);
shader->LinkStatus = GL_FALSE;
}
}
@@ -170,6 +168,9 @@ static GLboolean brwProgramStringNotify( GLcontext *ctx,
* See piglit glsl-{vs,fs}-functions-[23] tests.
*/
for (i = 0; i < prog->NumInstructions; i++) {
+ struct prog_instruction *inst = prog->Instructions + i;
+ int r;
+
if (prog->Instructions[i].Opcode == OPCODE_CAL) {
shader_error(ctx, prog,
"i965 driver doesn't yet support uninlined function "
@@ -177,16 +178,28 @@ static GLboolean brwProgramStringNotify( GLcontext *ctx,
"the end of the function to work around it.\n");
return GL_FALSE;
}
- if (prog->Instructions[i].DstReg.RelAddr &&
- prog->Instructions[i].DstReg.File == PROGRAM_INPUT) {
+
+ if (prog->Instructions[i].Opcode == OPCODE_RET) {
shader_error(ctx, prog,
- "Variable indexing of shader inputs unsupported\n");
+ "i965 driver doesn't yet support \"return\" "
+ "from main().\n");
return GL_FALSE;
}
- if (prog->Instructions[i].DstReg.RelAddr &&
+
+ for (r = 0; r < _mesa_num_inst_src_regs(inst->Opcode); r++) {
+ if (prog->Instructions[i].SrcReg[r].RelAddr &&
+ prog->Instructions[i].SrcReg[r].File == PROGRAM_INPUT) {
+ shader_error(ctx, prog,
+ "Variable indexing of shader inputs unsupported\n");
+ return GL_FALSE;
+ }
+ }
+
+ if (target == GL_FRAGMENT_PROGRAM_ARB &&
+ prog->Instructions[i].DstReg.RelAddr &&
prog->Instructions[i].DstReg.File == PROGRAM_OUTPUT) {
shader_error(ctx, prog,
- "Variable indexing of shader outputs unsupported\n");
+ "Variable indexing of FS outputs unsupported\n");
return GL_FALSE;
}
if (target == GL_FRAGMENT_PROGRAM_ARB) {
@@ -218,5 +231,10 @@ void brwInitFragProgFuncs( struct dd_function_table *functions )
functions->DeleteProgram = brwDeleteProgram;
functions->IsProgramNative = brwIsProgramNative;
functions->ProgramStringNotify = brwProgramStringNotify;
+
+ functions->NewShader = brw_new_shader;
+ functions->NewShaderProgram = brw_new_shader_program;
+ functions->CompileShader = brw_compile_shader;
+ functions->LinkShader = brw_link_shader;
}
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index af08446f2d..c5d296b129 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -107,6 +107,7 @@ extern const struct brw_tracked_state gen6_sf_vp;
extern const struct brw_tracked_state gen6_urb;
extern const struct brw_tracked_state gen6_viewport_state;
extern const struct brw_tracked_state gen6_vs_state;
+extern const struct brw_tracked_state gen6_wm_constants;
extern const struct brw_tracked_state gen6_wm_state;
/***********************************************************************
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index f92a19c2aa..a0c130557e 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -35,7 +35,6 @@
#include "brw_state.h"
#include "intel_batchbuffer.h"
#include "intel_buffers.h"
-#include "intel_chipset.h"
/* This is used to initialize brw->state.atoms[]. We could use this
* list directly except for a single atom, brw_constant_buffer, which
@@ -129,7 +128,7 @@ const struct brw_tracked_state *gen6_atoms[] =
&gen6_cc_state_pointers,
&brw_vs_constants, /* Before vs_surfaces and constant_buffer */
- &brw_wm_constants, /* Before wm_surfaces and constant_buffer */
+ &gen6_wm_constants, /* Before wm_surfaces and constant_buffer */
&brw_vs_surfaces, /* must do before unit */
&brw_wm_constant_surface, /* must do before wm surfaces/bind bo */
@@ -351,7 +350,7 @@ void brw_validate_state( struct brw_context *brw )
brw_add_validated_bo(brw, intel->batch->buf);
- if (IS_GEN6(intel->intelScreen->deviceID)) {
+ if (intel->gen >= 6) {
atoms = gen6_atoms;
num_atoms = ARRAY_SIZE(gen6_atoms);
} else {
@@ -425,7 +424,7 @@ void brw_upload_state(struct brw_context *brw)
const struct brw_tracked_state **atoms;
int num_atoms;
- if (IS_GEN6(intel->intelScreen->deviceID)) {
+ if (intel->gen >= 6) {
atoms = gen6_atoms;
num_atoms = ARRAY_SIZE(gen6_atoms);
} else {
diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h
index 2fde42a706..2a118e01c5 100644
--- a/src/mesa/drivers/dri/i965/brw_structs.h
+++ b/src/mesa/drivers/dri/i965/brw_structs.h
@@ -750,7 +750,7 @@ struct gen6_depth_stencil_state
} ds1;
struct {
- GLuint pad0:25;
+ GLuint pad0:26;
GLuint depth_write_enable:1;
GLuint depth_test_func:3;
GLuint pad1:1;
@@ -1305,13 +1305,14 @@ struct brw_instruction
GLuint access_mode:1;
GLuint mask_control:1;
GLuint dependency_control:2;
- GLuint compression_control:2;
+ GLuint compression_control:2; /* gen6: quater control */
GLuint thread_control:2;
GLuint predicate_control:4;
GLuint predicate_inverse:1;
GLuint execution_size:3;
GLuint destreg__conditionalmod:4; /* destreg - send, conditionalmod - others */
- GLuint pad0:2;
+ GLuint acc_wr_control:1;
+ GLuint cmpt_control:1;
GLuint debug_control:1;
GLuint saturate:1;
} header;
@@ -1359,7 +1360,7 @@ struct brw_instruction
GLuint dest_writemask:4;
GLuint dest_subreg_nr:1;
GLuint dest_reg_nr:8;
- GLuint pad1:2;
+ GLuint dest_horiz_stride:2;
GLuint dest_address_mode:1;
} da16;
@@ -1373,7 +1374,7 @@ struct brw_instruction
GLuint dest_writemask:4;
GLint dest_indirect_offset:6;
GLuint dest_subreg_nr:3;
- GLuint pad1:2;
+ GLuint dest_horiz_stride:2;
GLuint dest_address_mode:1;
} ia16;
} bits1;
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index 9a832af9a9..9f90e1e5e5 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -75,10 +75,10 @@ static void do_vs_prog( struct brw_context *brw,
c.prog_data.outputs_written |= BITFIELD64_BIT(VERT_RESULT_TEX0 + i);
}
- if (0)
- _mesa_print_program(&c.vp->program.Base);
-
-
+ if (0) {
+ _mesa_fprint_program_opt(stdout, &c.vp->program.Base, PROG_PRINT_DEBUG,
+ GL_TRUE);
+ }
/* Emit GEN4 code.
*/
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index b6b558e9a6..1d88c6b5a4 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -47,6 +47,7 @@ brw_vs_arg_can_be_immediate(enum prog_opcode opcode, int arg)
[OPCODE_MOV] = 1,
[OPCODE_ADD] = 2,
[OPCODE_CMP] = 3,
+ [OPCODE_DP2] = 2,
[OPCODE_DP3] = 2,
[OPCODE_DP4] = 2,
[OPCODE_DPH] = 2,
@@ -97,6 +98,39 @@ static void release_tmps( struct brw_vs_compile *c )
c->last_tmp = c->first_tmp;
}
+static int
+get_first_reladdr_output(struct gl_vertex_program *vp)
+{
+ int i;
+ int first_reladdr_output = VERT_RESULT_MAX;
+
+ for (i = 0; i < vp->Base.NumInstructions; i++) {
+ struct prog_instruction *inst = vp->Base.Instructions + i;
+
+ if (inst->DstReg.File == PROGRAM_OUTPUT &&
+ inst->DstReg.RelAddr &&
+ inst->DstReg.Index < first_reladdr_output)
+ first_reladdr_output = inst->DstReg.Index;
+ }
+
+ return first_reladdr_output;
+}
+
+/* Clears the record of which vp_const_buffer elements have been
+ * loaded into our constant buffer registers, for the starts of new
+ * blocks after control flow.
+ */
+static void
+clear_current_const(struct brw_vs_compile *c)
+{
+ unsigned int i;
+
+ if (c->vp->use_const_buffer) {
+ for (i = 0; i < 3; i++) {
+ c->current_const[i].index = -1;
+ }
+ }
+}
/**
* Preallocate GRF register before code emit.
@@ -108,6 +142,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
struct intel_context *intel = &c->func.brw->intel;
GLuint i, reg = 0, mrf;
int attributes_in_vue;
+ int first_reladdr_output;
/* Determine whether to use a real constant buffer or use a block
* of GRF registers for constants. The later is faster but only
@@ -225,6 +260,7 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
else
mrf = 4;
+ first_reladdr_output = get_first_reladdr_output(&c->vp->program);
for (i = 0; i < VERT_RESULT_MAX; i++) {
if (c->prog_data.outputs_written & BITFIELD64_BIT(i)) {
c->nr_outputs++;
@@ -253,15 +289,16 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
* For attributes beyond the compute-to-MRF, we compute to
* GRFs and they will be written in the second URB_WRITE.
*/
- if (mrf < 15) {
+ if (first_reladdr_output > i && mrf < 15) {
c->regs[PROGRAM_OUTPUT][i] = brw_message_reg(mrf);
mrf++;
}
else {
- if (!c->first_overflow_output)
+ if (mrf >= 15 && !c->first_overflow_output)
c->first_overflow_output = i;
c->regs[PROGRAM_OUTPUT][i] = brw_vec8_grf(reg, 0);
reg++;
+ mrf++;
}
}
}
@@ -292,10 +329,10 @@ static void brw_vs_alloc_regs( struct brw_vs_compile *c )
if (c->vp->use_const_buffer) {
for (i = 0; i < 3; i++) {
- c->current_const[i].index = -1;
c->current_const[i].reg = brw_vec8_grf(reg, 0);
reg++;
}
+ clear_current_const(c);
}
for (i = 0; i < 128; i++) {
@@ -502,6 +539,23 @@ static void emit_cmp( struct brw_compile *p,
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
}
+static void emit_sign(struct brw_vs_compile *c,
+ struct brw_reg dst,
+ struct brw_reg arg0)
+{
+ struct brw_compile *p = &c->func;
+
+ brw_MOV(p, dst, brw_imm_f(0));
+
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, brw_imm_f(0));
+ brw_MOV(p, dst, brw_imm_f(-1.0));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, arg0, brw_imm_f(0));
+ brw_MOV(p, dst, brw_imm_f(1.0));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+}
+
static void emit_max( struct brw_compile *p,
struct brw_reg dst,
struct brw_reg arg0,
@@ -1010,13 +1064,11 @@ move_to_reladdr_dst(struct brw_vs_compile *c,
int reg_size = 32;
struct brw_reg addr_reg = c->regs[PROGRAM_ADDRESS][0];
struct brw_reg vp_address = retype(vec1(addr_reg), BRW_REGISTER_TYPE_D);
- struct brw_reg temp_base = c->regs[inst->DstReg.File][0];
- GLuint byte_offset = temp_base.nr * 32 + temp_base.subnr;
+ struct brw_reg base = c->regs[inst->DstReg.File][inst->DstReg.Index];
+ GLuint byte_offset = base.nr * 32 + base.subnr;
struct brw_reg indirect = brw_vec4_indirect(0,0);
struct brw_reg acc = retype(vec1(get_tmp(c)), BRW_REGISTER_TYPE_UW);
- byte_offset += inst->DstReg.Index * reg_size;
-
brw_push_insn_state(p);
brw_set_access_mode(p, BRW_ALIGN_1);
@@ -1162,10 +1214,12 @@ static struct brw_reg get_arg( struct brw_vs_compile *c,
/* Convert 3-bit swizzle to 2-bit.
*/
- reg.dw1.bits.swizzle = BRW_SWIZZLE4(GET_SWZ(src->Swizzle, 0),
- GET_SWZ(src->Swizzle, 1),
- GET_SWZ(src->Swizzle, 2),
- GET_SWZ(src->Swizzle, 3));
+ if (reg.file != BRW_IMMEDIATE_VALUE) {
+ reg.dw1.bits.swizzle = BRW_SWIZZLE4(GET_SWZ(src->Swizzle, 0),
+ GET_SWZ(src->Swizzle, 1),
+ GET_SWZ(src->Swizzle, 2),
+ GET_SWZ(src->Swizzle, 3));
+ }
/* Note this is ok for non-swizzle instructions:
*/
@@ -1211,6 +1265,7 @@ static struct brw_reg get_dst( struct brw_vs_compile *c,
reg = brw_null_reg();
}
+ assert(reg.type != BRW_IMMEDIATE_VALUE);
reg.dw1.bits.writemask = dst.WriteMask;
return reg;
@@ -1299,6 +1354,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
struct brw_reg ndc;
int eot;
GLuint len_vertex_header = 2;
+ int next_mrf, i;
if (c->key.copy_edgeflag) {
brw_MOV(p,
@@ -1376,6 +1432,7 @@ static void emit_vertex_write( struct brw_vs_compile *c)
* of zeros followed by two sets of NDC coordinates:
*/
brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_set_acc_write_control(p, 0);
/* The VUE layout is documented in Volume 2a. */
if (intel->gen >= 6) {
@@ -1416,6 +1473,23 @@ static void emit_vertex_write( struct brw_vs_compile *c)
len_vertex_header = 2;
}
+ /* Move variable-addressed, non-overflow outputs to their MRFs. */
+ next_mrf = 2 + len_vertex_header;
+ for (i = 0; i < VERT_RESULT_MAX; i++) {
+ if (c->first_overflow_output > 0 && i >= c->first_overflow_output)
+ break;
+ if (!(c->prog_data.outputs_written & BITFIELD64_BIT(i)))
+ continue;
+
+ if (i >= VERT_RESULT_TEX0 &&
+ c->regs[PROGRAM_OUTPUT][i].file == BRW_GENERAL_REGISTER_FILE) {
+ brw_MOV(p, brw_message_reg(next_mrf), c->regs[PROGRAM_OUTPUT][i]);
+ next_mrf++;
+ } else if (c->regs[PROGRAM_OUTPUT][i].file == BRW_MESSAGE_REGISTER_FILE) {
+ next_mrf = c->regs[PROGRAM_OUTPUT][i].nr + 1;
+ }
+ }
+
eot = (c->first_overflow_output == 0);
brw_urb_WRITE(p,
@@ -1541,18 +1615,23 @@ void brw_vs_emit(struct brw_vs_compile *c )
const GLuint nr_insns = c->vp->program.Base.NumInstructions;
GLuint insn, if_depth = 0, loop_depth = 0;
struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH] = { 0 };
+ int if_depth_in_loop[MAX_LOOP_DEPTH];
const struct brw_indirect stack_index = brw_indirect(0, 0);
GLuint index;
GLuint file;
if (INTEL_DEBUG & DEBUG_VS) {
printf("vs-mesa:\n");
- _mesa_print_program(&c->vp->program.Base);
+ _mesa_fprint_program_opt(stdout, &c->vp->program.Base, PROG_PRINT_DEBUG,
+ GL_TRUE);
printf("\n");
}
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_access_mode(p, BRW_ALIGN_16);
+ if_depth_in_loop[loop_depth] = 0;
+
+ brw_set_acc_write_control(p, 1);
for (insn = 0; insn < nr_insns; insn++) {
GLuint i;
@@ -1591,7 +1670,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
const struct prog_instruction *inst = &c->vp->program.Base.Instructions[insn];
struct brw_reg args[3], dst;
GLuint i;
-
+
#if 0
printf("%d: ", insn);
_mesa_print_instruction(inst);
@@ -1636,6 +1715,9 @@ void brw_vs_emit(struct brw_vs_compile *c )
case OPCODE_COS:
emit_math1(c, BRW_MATH_FUNCTION_COS, dst, args[0], BRW_MATH_PRECISION_FULL);
break;
+ case OPCODE_DP2:
+ brw_DP2(p, dst, args[0], args[1]);
+ break;
case OPCODE_DP3:
brw_DP3(p, dst, args[0], args[1]);
break;
@@ -1732,6 +1814,9 @@ void brw_vs_emit(struct brw_vs_compile *c )
case OPCODE_SLE:
unalias2(c, dst, args[0], args[1], emit_sle);
break;
+ case OPCODE_SSG:
+ unalias1(c, dst, args[0], emit_sign);
+ break;
case OPCODE_SUB:
brw_ADD(p, dst, args[0], negate(args[1]));
break;
@@ -1753,31 +1838,38 @@ void brw_vs_emit(struct brw_vs_compile *c )
if_inst[if_depth] = brw_IF(p, BRW_EXECUTE_8);
/* Note that brw_IF smashes the predicate_control field. */
if_inst[if_depth]->header.predicate_control = get_predicate(inst);
+ if_depth_in_loop[loop_depth]++;
if_depth++;
break;
case OPCODE_ELSE:
+ clear_current_const(c);
assert(if_depth > 0);
if_inst[if_depth-1] = brw_ELSE(p, if_inst[if_depth-1]);
break;
case OPCODE_ENDIF:
+ clear_current_const(c);
assert(if_depth > 0);
brw_ENDIF(p, if_inst[--if_depth]);
+ if_depth_in_loop[loop_depth]--;
break;
case OPCODE_BGNLOOP:
+ clear_current_const(c);
loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8);
+ if_depth_in_loop[loop_depth] = 0;
break;
case OPCODE_BRK:
brw_set_predicate_control(p, get_predicate(inst));
- brw_BREAK(p);
+ brw_BREAK(p, if_depth_in_loop[loop_depth]);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
break;
case OPCODE_CONT:
brw_set_predicate_control(p, get_predicate(inst));
- brw_CONT(p);
+ brw_CONT(p, if_depth_in_loop[loop_depth]);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
break;
case OPCODE_ENDLOOP:
{
+ clear_current_const(c);
struct brw_instruction *inst0, *inst1;
GLuint br = 1;
@@ -1793,12 +1885,10 @@ void brw_vs_emit(struct brw_vs_compile *c )
if (inst0->header.opcode == BRW_OPCODE_BREAK &&
inst0->bits3.if_else.jump_count == 0) {
inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
- inst0->bits3.if_else.pop_count = 0;
}
else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
inst0->bits3.if_else.jump_count == 0) {
inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
- inst0->bits3.if_else.pop_count = 0;
}
}
}
@@ -1883,11 +1973,9 @@ void brw_vs_emit(struct brw_vs_compile *c )
}
}
- if (inst->DstReg.RelAddr && inst->DstReg.File == PROGRAM_TEMPORARY) {
- /* We don't do RelAddr of PROGRAM_OUTPUT yet, because of the
- * compute-to-mrf and the fact that we are allocating
- * registers for only the used PROGRAM_OUTPUTs.
- */
+ if (inst->DstReg.RelAddr) {
+ assert(inst->DstReg.File == PROGRAM_TEMPORARY||
+ inst->DstReg.File == PROGRAM_OUTPUT);
move_to_reladdr_dst(c, inst, dst);
}
@@ -1903,7 +1991,7 @@ void brw_vs_emit(struct brw_vs_compile *c )
printf("vs-native:\n");
for (i = 0; i < p->nr_insn; i++)
- brw_disasm(stderr, &p->store[i], intel->gen);
+ brw_disasm(stdout, &p->store[i], intel->gen);
printf("\n");
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_vtbl.c b/src/mesa/drivers/dri/i965/brw_vtbl.c
index 14227a5133..8f1601d10f 100644
--- a/src/mesa/drivers/dri/i965/brw_vtbl.c
+++ b/src/mesa/drivers/dri/i965/brw_vtbl.c
@@ -101,6 +101,7 @@ static void brw_destroy_context( struct intel_context *intel )
dri_bo_release(&brw->wm.prog_bo);
dri_bo_release(&brw->wm.state_bo);
dri_bo_release(&brw->wm.const_bo);
+ dri_bo_release(&brw->wm.push_const_bo);
dri_bo_release(&brw->cc.prog_bo);
dri_bo_release(&brw->cc.state_bo);
dri_bo_release(&brw->cc.vp_bo);
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index e182fc3202..d70be7bda2 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -32,7 +32,7 @@
#include "brw_context.h"
#include "brw_wm.h"
#include "brw_state.h"
-
+#include "main/formats.h"
/** Return number of src args for given instruction */
GLuint brw_wm_nr_args( GLuint opcode )
@@ -68,6 +68,7 @@ GLuint brw_wm_is_scalar_result( GLuint opcode )
case OPCODE_RCP:
case OPCODE_RSQ:
case OPCODE_SIN:
+ case OPCODE_DP2:
case OPCODE_DP3:
case OPCODE_DP4:
case OPCODE_DPH:
@@ -177,17 +178,19 @@ static void do_wm_prog( struct brw_context *brw,
/* temporary sanity check assertion */
ASSERT(fp->isGLSL == brw_wm_is_glsl(&c->fp->program));
- /*
- * Shader which use GLSL features such as flow control are handled
- * differently from "simple" shaders.
- */
- if (fp->isGLSL) {
- c->dispatch_width = 8;
- brw_wm_glsl_emit(brw, c);
- }
- else {
- c->dispatch_width = 16;
- brw_wm_non_glsl_emit(brw, c);
+ if (!brw_wm_fs_emit(brw, c)) {
+ /*
+ * Shader which use GLSL features such as flow control are handled
+ * differently from "simple" shaders.
+ */
+ if (fp->isGLSL) {
+ c->dispatch_width = 8;
+ brw_wm_glsl_emit(brw, c);
+ }
+ else {
+ c->dispatch_width = 16;
+ brw_wm_non_glsl_emit(brw, c);
+ }
}
if (INTEL_DEBUG & DEBUG_WM)
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h
index f40977fab8..2639d4f26b 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -61,7 +61,7 @@ struct brw_wm_prog_key {
GLuint source_depth_reg:3;
GLuint aa_dest_stencil_reg:3;
GLuint dest_depth_reg:3;
- GLuint nr_depth_regs:3;
+ GLuint nr_payload_regs:4;
GLuint computes_depth:1; /* could be derived from program string */
GLuint source_depth_to_render_target:1;
GLuint flat_shade:1;
@@ -306,6 +306,7 @@ void brw_wm_lookup_iz( GLuint line_aa,
GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp);
void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c);
+GLboolean brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c);
/* brw_wm_emit.c */
void emit_alu1(struct brw_compile *p,
@@ -343,6 +344,11 @@ void emit_delta_xy(struct brw_compile *p,
const struct brw_reg *dst,
GLuint mask,
const struct brw_reg *arg0);
+void emit_dp2(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1);
void emit_dp3(struct brw_compile *p,
const struct brw_reg *dst,
GLuint mask,
@@ -425,6 +431,10 @@ void emit_sop(struct brw_compile *p,
GLuint cond,
const struct brw_reg *arg0,
const struct brw_reg *arg1);
+void emit_sign(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0);
void emit_tex(struct brw_wm_compile *c,
struct brw_reg *dst,
GLuint dst_flags,
@@ -450,4 +460,13 @@ void emit_xpd(struct brw_compile *p,
const struct brw_reg *arg0,
const struct brw_reg *arg1);
+GLboolean brw_compile_shader(GLcontext *ctx,
+ struct gl_shader *shader);
+GLboolean brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog);
+struct gl_shader *brw_new_shader(GLcontext *ctx, GLuint name, GLuint type);
+struct gl_shader_program *brw_new_shader_program(GLcontext *ctx, GLuint name);
+
+GLboolean brw_do_channel_expressions(struct exec_list *instructions);
+GLboolean brw_do_vector_splitting(struct exec_list *instructions);
+
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_wm_debug.c b/src/mesa/drivers/dri/i965/brw_wm_debug.c
index a78cc8b54e..6a91251a80 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_debug.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_debug.c
@@ -44,16 +44,16 @@ void brw_wm_print_value( struct brw_wm_compile *c,
printf("undef");
else if( value - c->vreg >= 0 &&
value - c->vreg < BRW_WM_MAX_VREG)
- printf("r%d", value - c->vreg);
+ printf("r%ld", (long) (value - c->vreg));
else if (value - c->creg >= 0 &&
value - c->creg < BRW_WM_MAX_PARAM)
- printf("c%d", value - c->creg);
+ printf("c%ld", (long) (value - c->creg));
else if (value - c->payload.input_interp >= 0 &&
value - c->payload.input_interp < FRAG_ATTRIB_MAX)
- printf("i%d", value - c->payload.input_interp);
+ printf("i%ld", (long) (value - c->payload.input_interp));
else if (value - c->payload.depth >= 0 &&
value - c->payload.depth < FRAG_ATTRIB_MAX)
- printf("d%d", value - c->payload.depth);
+ printf("d%ld", (long) (value - c->payload.depth));
else
printf("?");
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_emit.c b/src/mesa/drivers/dri/i965/brw_wm_emit.c
index d9fa2e6335..f3ad01b3fe 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_emit.c
@@ -668,6 +668,28 @@ void emit_cmp(struct brw_compile *p,
}
}
+void emit_sign(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0)
+{
+ GLuint i;
+
+ for (i = 0; i < 4; i++) {
+ if (mask & (1<<i)) {
+ brw_MOV(p, dst[i], brw_imm_f(0.0));
+
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0[i], brw_imm_f(0));
+ brw_MOV(p, dst[i], brw_imm_f(-1.0));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+ brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, arg0[i], brw_imm_f(0));
+ brw_MOV(p, dst[i], brw_imm_f(1.0));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ }
+ }
+}
+
void emit_max(struct brw_compile *p,
const struct brw_reg *dst,
GLuint mask,
@@ -709,6 +731,27 @@ void emit_min(struct brw_compile *p,
}
+void emit_dp2(struct brw_compile *p,
+ const struct brw_reg *dst,
+ GLuint mask,
+ const struct brw_reg *arg0,
+ const struct brw_reg *arg1)
+{
+ int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
+
+ if (!(mask & WRITEMASK_XYZW))
+ return; /* Do not emit dead code */
+
+ assert(is_power_of_two(mask & WRITEMASK_XYZW));
+
+ brw_MUL(p, brw_null_reg(), arg0[0], arg1[0]);
+
+ brw_set_saturate(p, (mask & SATURATE) ? 1 : 0);
+ brw_MAC(p, dst[dst_chan], arg0[1], arg1[1]);
+ brw_set_saturate(p, 0);
+}
+
+
void emit_dp3(struct brw_compile *p,
const struct brw_reg *dst,
GLuint mask,
@@ -809,21 +852,28 @@ void emit_math1(struct brw_wm_compile *c,
const struct brw_reg *arg0)
{
struct brw_compile *p = &c->func;
+ struct intel_context *intel = &p->brw->intel;
int dst_chan = _mesa_ffs(mask & WRITEMASK_XYZW) - 1;
GLuint saturate = ((mask & SATURATE) ?
BRW_MATH_SATURATE_SATURATE :
BRW_MATH_SATURATE_NONE);
+ struct brw_reg src;
+
+ if (intel->gen >= 6 && arg0[0].hstride == BRW_HORIZONTAL_STRIDE_0) {
+ /* Gen6 math requires that source and dst horizontal stride be 1.
+ *
+ */
+ src = *dst;
+ brw_MOV(p, src, arg0[0]);
+ } else {
+ src = arg0[0];
+ }
if (!(mask & WRITEMASK_XYZW))
return; /* Do not emit dead code */
assert(is_power_of_two(mask & WRITEMASK_XYZW));
- /* If compressed, this will write message reg 2,3 from arg0.x's 16
- * channels.
- */
- brw_MOV(p, brw_message_reg(2), arg0[0]);
-
/* Send two messages to perform all 16 operations:
*/
brw_push_insn_state(p);
@@ -833,7 +883,7 @@ void emit_math1(struct brw_wm_compile *c,
function,
saturate,
2,
- brw_null_reg(),
+ src,
BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);
@@ -844,7 +894,7 @@ void emit_math1(struct brw_wm_compile *c,
function,
saturate,
3,
- brw_null_reg(),
+ sechalf(src),
BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);
}
@@ -873,13 +923,6 @@ void emit_math2(struct brw_wm_compile *c,
brw_push_insn_state(p);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_MOV(p, brw_message_reg(2), arg0[0]);
- if (c->dispatch_width == 16) {
- brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
- brw_MOV(p, brw_message_reg(4), sechalf(arg0[0]));
- }
-
- brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MOV(p, brw_message_reg(3), arg1[0]);
if (c->dispatch_width == 16) {
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
@@ -892,7 +935,7 @@ void emit_math2(struct brw_wm_compile *c,
function,
saturate,
2,
- brw_null_reg(),
+ arg0[0],
BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);
@@ -905,7 +948,7 @@ void emit_math2(struct brw_wm_compile *c,
function,
saturate,
4,
- brw_null_reg(),
+ sechalf(arg0[0]),
BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);
}
@@ -1199,6 +1242,7 @@ static void fire_fb_write( struct brw_wm_compile *c,
GLuint eot )
{
struct brw_compile *p = &c->func;
+ struct intel_context *intel = &p->brw->intel;
struct brw_reg dst;
if (c->dispatch_width == 16)
@@ -1209,6 +1253,7 @@ static void fire_fb_write( struct brw_wm_compile *c,
/* Pass through control information:
*/
/* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
+ if (intel->gen < 6) /* gen6, use headerless for fb write */
{
brw_push_insn_state(p);
brw_set_mask_control(p, BRW_MASK_DISABLE); /* ? */
@@ -1222,6 +1267,7 @@ static void fire_fb_write( struct brw_wm_compile *c,
/* Send framebuffer write message: */
/* send (16) null.0<1>:uw m0 r0.0<8;8,1>:uw 0x85a04000:ud { Align1 EOT } */
brw_fb_WRITE(p,
+ c->dispatch_width,
dst,
base_reg,
retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
@@ -1263,8 +1309,10 @@ void emit_fb_write(struct brw_wm_compile *c,
{
struct brw_compile *p = &c->func;
struct brw_context *brw = p->brw;
+ struct intel_context *intel = &brw->intel;
GLuint nr = 2;
GLuint channel;
+ int base_reg; /* For gen6 fb write with no header, starting from color payload directly!. */
/* Reserve a space for AA - may not be needed:
*/
@@ -1276,9 +1324,40 @@ void emit_fb_write(struct brw_wm_compile *c,
*/
brw_push_insn_state(p);
+ if (intel->gen >= 6)
+ base_reg = nr;
+ else
+ base_reg = 0;
+
for (channel = 0; channel < 4; channel++) {
- if (c->dispatch_width == 16 && brw->has_compr4) {
- /* By setting the high bit of the MRF register number, we indicate
+ if (intel->gen >= 6) {
+ /* gen6 SIMD16 single source DP write looks like:
+ * m + 0: r0
+ * m + 1: r1
+ * m + 2: g0
+ * m + 3: g1
+ * m + 4: b0
+ * m + 5: b1
+ * m + 6: a0
+ * m + 7: a1
+ */
+ if (c->dispatch_width == 16) {
+ brw_MOV(p, brw_message_reg(nr + channel * 2), arg0[channel]);
+ } else {
+ brw_MOV(p, brw_message_reg(nr + channel), arg0[channel]);
+ }
+ } else if (c->dispatch_width == 16 && brw->has_compr4) {
+ /* pre-gen6 SIMD16 single source DP write looks like:
+ * m + 0: r0
+ * m + 1: g0
+ * m + 2: b0
+ * m + 3: a0
+ * m + 4: r1
+ * m + 5: g1
+ * m + 6: b1
+ * m + 7: a1
+ *
+ * By setting the high bit of the MRF register number, we indicate
* that we want COMPR4 mode - instead of doing the usual destination
* + 1 for the second half we get destination + 4.
*/
@@ -1303,7 +1382,11 @@ void emit_fb_write(struct brw_wm_compile *c,
}
/* skip over the regs populated above:
*/
- nr += 8;
+ if (c->dispatch_width == 16)
+ nr += 8;
+ else
+ nr += 4;
+
brw_pop_insn_state(p);
if (c->key.source_depth_to_render_target)
@@ -1336,11 +1419,16 @@ void emit_fb_write(struct brw_wm_compile *c,
nr += 2;
}
+ if (intel->gen >= 6) {
+ /* Subtract off the message header, since we send headerless. */
+ nr -= 2;
+ }
+
if (!c->key.runtime_check_aads_emit) {
if (c->key.aa_dest_stencil_reg)
emit_aa(c, arg1, 2);
- fire_fb_write(c, 0, nr, target, eot);
+ fire_fb_write(c, base_reg, nr, target, eot);
}
else {
struct brw_reg v1_null_ud = vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
@@ -1562,6 +1650,10 @@ void brw_wm_emit( struct brw_wm_compile *c )
emit_ddxy(p, dst, dst_flags, GL_FALSE, args[0]);
break;
+ case OPCODE_DP2:
+ emit_dp2(p, dst, dst_flags, args[0], args[1]);
+ break;
+
case OPCODE_DP3:
emit_dp3(p, dst, dst_flags, args[0], args[1]);
break;
@@ -1673,6 +1765,10 @@ void brw_wm_emit( struct brw_wm_compile *c )
emit_sne(p, dst, dst_flags, args[0], args[1]);
break;
+ case OPCODE_SSG:
+ emit_sign(p, dst, dst_flags, args[0]);
+ break;
+
case OPCODE_LIT:
emit_lit(c, dst, dst_flags, args[0]);
break;
@@ -1724,7 +1820,7 @@ void brw_wm_emit( struct brw_wm_compile *c )
printf("wm-native:\n");
for (i = 0; i < p->nr_insn; i++)
- brw_disasm(stderr, &p->store[i], p->brw->intel.gen);
+ brw_disasm(stdout, &p->store[i], p->brw->intel.gen);
printf("\n");
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_fp.c b/src/mesa/drivers/dri/i965/brw_wm_fp.c
index 0bef874b88..3870bf10fc 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_fp.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_fp.c
@@ -88,6 +88,7 @@ static struct prog_src_register src_reg(GLuint file, GLuint idx)
reg.RelAddr = 0;
reg.Negate = NEGATE_NONE;
reg.Abs = 0;
+ reg.HasIndex2 = 0;
return reg;
}
@@ -1036,13 +1037,12 @@ static void print_insns( const struct prog_instruction *insn,
for (i = 0; i < nr; i++, insn++) {
printf("%3d: ", i);
if (insn->Opcode < MAX_OPCODE)
- _mesa_print_instruction(insn);
+ _mesa_fprint_instruction_opt(stdout, insn, 0, PROG_PRINT_DEBUG, NULL);
else if (insn->Opcode < MAX_WM_OPCODE) {
GLuint idx = insn->Opcode - MAX_OPCODE;
- _mesa_print_alu_instruction(insn,
- wm_opcode_strings[idx],
- 3);
+ _mesa_fprint_alu_instruction(stdout, insn, wm_opcode_strings[idx],
+ 3, PROG_PRINT_DEBUG, NULL);
}
else
printf("965 Opcode %d\n", insn->Opcode);
@@ -1061,7 +1061,8 @@ void brw_wm_pass_fp( struct brw_wm_compile *c )
if (INTEL_DEBUG & DEBUG_WM) {
printf("pre-fp:\n");
- _mesa_print_program(&fp->program.Base);
+ _mesa_fprint_program_opt(stdout, &fp->program.Base, PROG_PRINT_DEBUG,
+ GL_TRUE);
printf("\n");
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_glsl.c b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
index 575f89b17f..c1083c5942 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_glsl.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_glsl.c
@@ -303,13 +303,13 @@ static void prealloc_reg(struct brw_wm_compile *c)
c->first_free_grf = 0;
for (i = 0; i < 4; i++) {
- if (i < c->key.nr_depth_regs)
+ if (i < (c->key.nr_payload_regs + 1) / 2)
reg = brw_vec8_grf(i * 2, 0);
else
reg = brw_vec8_grf(0, 0);
set_reg(c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i, reg);
}
- reg_index += 2 * c->key.nr_depth_regs;
+ reg_index += c->key.nr_payload_regs;
/* constants */
{
@@ -380,7 +380,7 @@ static void prealloc_reg(struct brw_wm_compile *c)
}
}
- c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
+ c->prog_data.first_curbe_grf = c->key.nr_payload_regs;
c->prog_data.urb_read_length = urb_read_length;
c->prog_data.curb_read_length = c->nr_creg;
c->emit_mask_reg = brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, reg_index, 0);
@@ -1803,12 +1803,15 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
#define MAX_IF_DEPTH 32
#define MAX_LOOP_DEPTH 32
struct brw_instruction *if_inst[MAX_IF_DEPTH], *loop_inst[MAX_LOOP_DEPTH];
+ int if_depth_in_loop[MAX_LOOP_DEPTH];
GLuint i, if_depth = 0, loop_depth = 0;
struct brw_compile *p = &c->func;
struct brw_indirect stack_index = brw_indirect(0, 0);
c->out_of_regs = GL_FALSE;
+ if_depth_in_loop[loop_depth] = 0;
+
prealloc_reg(c);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
@@ -1903,6 +1906,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
case OPCODE_SWZ:
emit_alu1(p, brw_MOV, dst, dst_flags, args[0]);
break;
+ case OPCODE_DP2:
+ emit_dp2(p, dst, dst_flags, args[0], args[1]);
+ break;
case OPCODE_DP3:
emit_dp3(p, dst, dst_flags, args[0], args[1]);
break;
@@ -1971,6 +1977,9 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
emit_sop(p, dst, dst_flags,
BRW_CONDITIONAL_NEQ, args[0], args[1]);
break;
+ case OPCODE_SSG:
+ emit_sign(p, dst, dst_flags, args[0]);
+ break;
case OPCODE_MUL:
emit_alu2(p, brw_MUL, dst, dst_flags, args[0], args[1]);
break;
@@ -2014,6 +2023,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
case OPCODE_IF:
assert(if_depth < MAX_IF_DEPTH);
if_inst[if_depth++] = brw_IF(p, BRW_EXECUTE_8);
+ if_depth_in_loop[loop_depth]++;
break;
case OPCODE_ELSE:
assert(if_depth > 0);
@@ -2022,6 +2032,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
case OPCODE_ENDIF:
assert(if_depth > 0);
brw_ENDIF(p, if_inst[--if_depth]);
+ if_depth_in_loop[loop_depth]--;
break;
case OPCODE_BGNSUB:
brw_save_label(p, inst->Comment, p->nr_insn);
@@ -2056,13 +2067,14 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
case OPCODE_BGNLOOP:
/* XXX may need to invalidate the current_constant regs */
loop_inst[loop_depth++] = brw_DO(p, BRW_EXECUTE_8);
+ if_depth_in_loop[loop_depth] = 0;
break;
case OPCODE_BRK:
- brw_BREAK(p);
+ brw_BREAK(p, if_depth_in_loop[loop_depth]);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
break;
case OPCODE_CONT:
- brw_CONT(p);
+ brw_CONT(p, if_depth_in_loop[loop_depth]);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
break;
case OPCODE_ENDLOOP:
@@ -2082,12 +2094,10 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
if (inst0->header.opcode == BRW_OPCODE_BREAK &&
inst0->bits3.if_else.jump_count == 0) {
inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
- inst0->bits3.if_else.pop_count = 0;
}
else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
inst0->bits3.if_else.jump_count == 0) {
inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
- inst0->bits3.if_else.pop_count = 0;
}
}
}
@@ -2111,7 +2121,7 @@ static void brw_wm_emit_glsl(struct brw_context *brw, struct brw_wm_compile *c)
if (INTEL_DEBUG & DEBUG_WM) {
printf("wm-native:\n");
for (i = 0; i < p->nr_insn; i++)
- brw_disasm(stderr, &p->store[i], intel->gen);
+ brw_disasm(stdout, &p->store[i], intel->gen);
printf("\n");
}
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_iz.c b/src/mesa/drivers/dri/i965/brw_wm_iz.c
index 5e399ac62a..8505ef1951 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_iz.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_iz.c
@@ -152,6 +152,6 @@ void brw_wm_lookup_iz( GLuint line_aa,
reg+=2;
}
- key->nr_depth_regs = (reg+1)/2;
+ key->nr_payload_regs = reg;
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass0.c b/src/mesa/drivers/dri/i965/brw_wm_pass0.c
index 05de85a957..8fc960b445 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_pass0.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_pass0.c
@@ -379,7 +379,7 @@ static void pass0_init_payload( struct brw_wm_compile *c )
GLuint i;
for (i = 0; i < 4; i++) {
- GLuint j = i >= c->key.nr_depth_regs ? 0 : i;
+ GLuint j = i >= (c->key.nr_payload_regs + 1) / 2 ? 0 : i;
pass0_set_fpreg_value( c, PROGRAM_PAYLOAD, PAYLOAD_DEPTH, i,
&c->payload.depth[j] );
}
diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass1.c b/src/mesa/drivers/dri/i965/brw_wm_pass1.c
index b449394029..962515a99e 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_pass1.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_pass1.c
@@ -158,6 +158,7 @@ void brw_wm_pass1( struct brw_wm_compile *c )
case OPCODE_FLR:
case OPCODE_FRC:
case OPCODE_MOV:
+ case OPCODE_SSG:
case OPCODE_SWZ:
case OPCODE_TRUNC:
read0 = writemask;
@@ -254,6 +255,11 @@ void brw_wm_pass1( struct brw_wm_compile *c )
read2 = WRITEMASK_W; /* pixel w */
break;
+ case OPCODE_DP2:
+ read0 = WRITEMASK_XY;
+ read1 = WRITEMASK_XY;
+ break;
+
case OPCODE_DP3:
read0 = WRITEMASK_XYZ;
read1 = WRITEMASK_XYZ;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_pass2.c b/src/mesa/drivers/dri/i965/brw_wm_pass2.c
index 31303febf0..54acb3038b 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_pass2.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_pass2.c
@@ -76,7 +76,7 @@ static void init_registers( struct brw_wm_compile *c )
for (j = 0; j < c->grf_limit; j++)
c->pass2_grf[j].nextuse = BRW_WM_MAX_INSN;
- for (j = 0; j < c->key.nr_depth_regs; j++)
+ for (j = 0; j < (c->key.nr_payload_regs + 1) / 2; j++)
prealloc_reg(c, &c->payload.depth[j], i++);
for (j = 0; j < c->nr_creg; j++)
@@ -101,7 +101,7 @@ static void init_registers( struct brw_wm_compile *c )
assert(nr_interp_regs >= 1);
- c->prog_data.first_curbe_grf = c->key.nr_depth_regs * 2;
+ c->prog_data.first_curbe_grf = ALIGN(c->key.nr_payload_regs, 2);
c->prog_data.urb_read_length = nr_interp_regs * 2;
c->prog_data.curb_read_length = c->nr_creg * 2;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
index c1cf4db1ca..6699d0a73e 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -104,8 +104,22 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled;
key->is_glsl = bfp->isGLSL;
- /* temporary sanity check assertion */
- ASSERT(bfp->isGLSL == brw_wm_is_glsl(fp));
+ /* If using the fragment shader backend, the program is always
+ * 8-wide.
+ */
+ if (ctx->Shader.CurrentProgram) {
+ int i;
+
+ for (i = 0; i < ctx->Shader.CurrentProgram->_NumLinkedShaders; i++) {
+ struct brw_shader *shader =
+ (struct brw_shader *)ctx->Shader.CurrentProgram->_LinkedShaders[i];;
+
+ if (shader->base.Type == GL_FRAGMENT_SHADER &&
+ shader->ir != NULL) {
+ key->is_glsl = GL_TRUE;
+ }
+ }
+ }
/* _NEW_DEPTH */
key->stats_wm = intel->stats_wm;
diff --git a/src/mesa/drivers/dri/i965/gen6_cc.c b/src/mesa/drivers/dri/i965/gen6_cc.c
index f7acad6912..26f1070a16 100644
--- a/src/mesa/drivers/dri/i965/gen6_cc.c
+++ b/src/mesa/drivers/dri/i965/gen6_cc.c
@@ -267,9 +267,9 @@ static void upload_cc_state_pointers(struct brw_context *brw)
BEGIN_BATCH(4);
OUT_BATCH(CMD_3D_CC_STATE_POINTERS << 16 | (4 - 2));
- OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
OUT_RELOC(brw->cc.blend_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
OUT_RELOC(brw->cc.depth_stencil_state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+ OUT_RELOC(brw->cc.state_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
ADVANCE_BATCH();
intel_batchbuffer_emit_mi_flush(intel->batch);
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index 863c85449d..2cd640de17 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -34,18 +34,59 @@
#include "intel_batchbuffer.h"
static void
+prepare_wm_constants(struct brw_context *brw)
+{
+ struct intel_context *intel = &brw->intel;
+ GLcontext *ctx = &intel->ctx;
+ const struct brw_fragment_program *fp =
+ brw_fragment_program_const(brw->fragment_program);
+
+ drm_intel_bo_unreference(brw->wm.push_const_bo);
+ brw->wm.push_const_bo = NULL;
+
+ /* Updates the ParamaterValues[i] pointers for all parameters of the
+ * basic type of PROGRAM_STATE_VAR.
+ */
+ /* XXX: Should this happen somewhere before to get our state flag set? */
+ _mesa_load_state_parameters(ctx, fp->program.Base.Parameters);
+
+ if (brw->wm.prog_data->nr_params != 0) {
+ float *constants;
+ unsigned int i;
+
+ brw->wm.push_const_bo = drm_intel_bo_alloc(intel->bufmgr,
+ "WM constant_bo",
+ brw->wm.prog_data->nr_params *
+ sizeof(float),
+ 4096);
+ drm_intel_gem_bo_map_gtt(brw->wm.push_const_bo);
+ constants = brw->wm.push_const_bo->virtual;
+ for (i = 0; i < brw->wm.prog_data->nr_params; i++) {
+ constants[i] = *brw->wm.prog_data->param[i];
+ }
+ drm_intel_gem_bo_unmap_gtt(brw->wm.push_const_bo);
+ }
+}
+
+const struct brw_tracked_state gen6_wm_constants = {
+ .dirty = {
+ .mesa = _NEW_PROGRAM_CONSTANTS,
+ .brw = 0,
+ .cache = 0,
+ },
+ .prepare = prepare_wm_constants,
+};
+
+static void
upload_wm_state(struct brw_context *brw)
{
struct intel_context *intel = &brw->intel;
GLcontext *ctx = &intel->ctx;
const struct brw_fragment_program *fp =
brw_fragment_program_const(brw->fragment_program);
- unsigned int nr_params = fp->program.Base.Parameters->NumParameters;
- drm_intel_bo *constant_bo;
- int i;
uint32_t dw2, dw4, dw5, dw6;
- if (fp->use_const_buffer || nr_params == 0) {
+ if (fp->use_const_buffer || brw->wm.prog_data->nr_params == 0) {
/* Disable the push constant buffers. */
BEGIN_BATCH(5);
OUT_BATCH(CMD_3D_CONSTANT_PS_STATE << 16 | (5 - 2));
@@ -55,35 +96,17 @@ upload_wm_state(struct brw_context *brw)
OUT_BATCH(0);
ADVANCE_BATCH();
} else {
- /* Updates the ParamaterValues[i] pointers for all parameters of the
- * basic type of PROGRAM_STATE_VAR.
- */
- _mesa_load_state_parameters(ctx, fp->program.Base.Parameters);
-
- constant_bo = drm_intel_bo_alloc(intel->bufmgr, "WM constant_bo",
- nr_params * 4 * sizeof(float),
- 4096);
- drm_intel_gem_bo_map_gtt(constant_bo);
- for (i = 0; i < nr_params; i++) {
- memcpy((char *)constant_bo->virtual + i * 4 * sizeof(float),
- fp->program.Base.Parameters->ParameterValues[i],
- 4 * sizeof(float));
- }
- drm_intel_gem_bo_unmap_gtt(constant_bo);
-
BEGIN_BATCH(5);
OUT_BATCH(CMD_3D_CONSTANT_PS_STATE << 16 |
GEN6_CONSTANT_BUFFER_0_ENABLE |
(5 - 2));
- OUT_RELOC(constant_bo,
+ OUT_RELOC(brw->wm.push_const_bo,
I915_GEM_DOMAIN_RENDER, 0, /* XXX: bad domain */
- ALIGN(nr_params, 2) / 2 - 1);
+ ALIGN(brw->wm.prog_data->nr_params, 8) / 8 - 1);
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
-
- drm_intel_bo_unreference(constant_bo);
}
intel_batchbuffer_emit_mi_flush(intel->batch);
diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
index 8ab41f8d27..117d4daf3b 100644
--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
@@ -202,6 +202,9 @@ intel_bufferobj_subdata(GLcontext * ctx,
struct intel_context *intel = intel_context(ctx);
struct intel_buffer_object *intel_obj = intel_buffer_object(obj);
+ if (size == 0)
+ return;
+
assert(intel_obj);
if (intel_obj->region)
@@ -426,6 +429,9 @@ intel_bufferobj_flush_mapped_range(GLcontext *ctx, GLenum target,
if (intel_obj->range_map_buffer == NULL)
return;
+ if (length == 0)
+ return;
+
temp_bo = drm_intel_bo_alloc(intel->bufmgr, "range map flush", length, 64);
drm_intel_bo_subdata(temp_bo, 0, length, intel_obj->range_map_buffer);
diff --git a/src/mesa/drivers/dri/intel/intel_chipset.h b/src/mesa/drivers/dri/intel/intel_chipset.h
index 72a74322ee..b5f180bbc8 100644
--- a/src/mesa/drivers/dri/intel/intel_chipset.h
+++ b/src/mesa/drivers/dri/intel/intel_chipset.h
@@ -73,6 +73,7 @@
#define PCI_CHIP_SANDYBRIDGE 0x0102
#define PCI_CHIP_SANDYBRIDGE_M 0x0106
+#define PCI_CHIP_SANDYBRIDGE_M_D0 0x0126
#define IS_MOBILE(devid) (devid == PCI_CHIP_I855_GM || \
devid == PCI_CHIP_I915_GM || \
@@ -119,7 +120,8 @@
#define IS_IRONLAKE(devid) IS_GEN5(devid)
#define IS_GEN6(devid) (devid == PCI_CHIP_SANDYBRIDGE || \
- devid == PCI_CHIP_SANDYBRIDGE_M)
+ devid == PCI_CHIP_SANDYBRIDGE_M || \
+ devid == PCI_CHIP_SANDYBRIDGE_M_D0)
#define IS_965(devid) (IS_GEN4(devid) || \
IS_G4X(devid) || \
diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c
index e19f44035f..a9ba93d24b 100644
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -377,7 +377,8 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
intel_region_reference(&region, depth_region);
}
else
- region = intel_region_alloc_for_handle(intel, buffers[i].cpp,
+ region = intel_region_alloc_for_handle(intel->intelScreen,
+ buffers[i].cpp,
drawable->w,
drawable->h,
buffers[i].pitch / buffers[i].cpp,
@@ -720,6 +721,8 @@ intelInitContext(struct intel_context *intel,
ctx->Const.MaxPointSizeAA = 3.0;
ctx->Const.PointSizeGranularity = 1.0;
+ ctx->Const.MaxSamples = 1.0;
+
/* reinitialize the context point state.
* It depend on constants in __GLcontextRec::Const
*/
diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h
index c7ac2de01e..28d53284fd 100644
--- a/src/mesa/drivers/dri/intel/intel_context.h
+++ b/src/mesa/drivers/dri/intel/intel_context.h
@@ -32,14 +32,26 @@
#include "main/mtypes.h"
#include "main/mm.h"
-#include "texmem.h"
#include "dri_metaops.h"
+
+#ifdef __cplusplus
+extern "C" {
+ /* Evil hack for using libdrm in a c++ compiler. */
+ #define virtual virt
+#endif
+
#include "drm.h"
#include "intel_bufmgr.h"
#include "intel_screen.h"
#include "intel_tex_obj.h"
#include "i915_drm.h"
+
+#ifdef __cplusplus
+ #undef virtual
+}
+#endif
+
#include "tnl/t_vertex.h"
#define TAG(x) intel##x
diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c b/src/mesa/drivers/dri/intel/intel_extensions.c
index edba1fc2f2..bf22a423fc 100644
--- a/src/mesa/drivers/dri/intel/intel_extensions.c
+++ b/src/mesa/drivers/dri/intel/intel_extensions.c
@@ -50,8 +50,9 @@
#define need_GL_EXT_cull_vertex
#define need_GL_EXT_draw_buffers2
#define need_GL_EXT_fog_coord
-#define need_GL_EXT_framebuffer_object
#define need_GL_EXT_framebuffer_blit
+#define need_GL_EXT_framebuffer_multisample
+#define need_GL_EXT_framebuffer_object
#define need_GL_EXT_gpu_program_parameters
#define need_GL_EXT_point_parameters
#define need_GL_EXT_provoking_vertex
@@ -111,6 +112,7 @@ static const struct dri_extension card_extensions[] = {
{ "GL_EXT_cull_vertex", GL_EXT_cull_vertex_functions },
{ "GL_EXT_framebuffer_blit", GL_EXT_framebuffer_blit_functions },
{ "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions },
+ { "GL_EXT_framebuffer_multisample", GL_EXT_framebuffer_multisample_functions },
{ "GL_EXT_fog_coord", GL_EXT_fog_coord_functions },
{ "GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions },
{ "GL_EXT_packed_depth_stencil", NULL },
diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c
index 4a83886fc1..2693b5fa72 100644
--- a/src/mesa/drivers/dri/intel/intel_fbo.c
+++ b/src/mesa/drivers/dri/intel/intel_fbo.c
@@ -137,27 +137,21 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
rb->Format = MESA_FORMAT_A8;
rb->DataType = GL_UNSIGNED_BYTE;
break;
+ case GL_DEPTH_COMPONENT16:
+ rb->Format = MESA_FORMAT_Z16;
+ rb->DataType = GL_UNSIGNED_SHORT;
+ break;
case GL_STENCIL_INDEX:
case GL_STENCIL_INDEX1_EXT:
case GL_STENCIL_INDEX4_EXT:
case GL_STENCIL_INDEX8_EXT:
case GL_STENCIL_INDEX16_EXT:
- /* alloc a depth+stencil buffer */
- rb->Format = MESA_FORMAT_S8_Z24;
- rb->DataType = GL_UNSIGNED_INT_24_8_EXT;
- break;
- case GL_DEPTH_COMPONENT16:
- rb->Format = MESA_FORMAT_Z16;
- rb->DataType = GL_UNSIGNED_SHORT;
- break;
case GL_DEPTH_COMPONENT:
case GL_DEPTH_COMPONENT24:
case GL_DEPTH_COMPONENT32:
- rb->Format = MESA_FORMAT_S8_Z24;
- rb->DataType = GL_UNSIGNED_INT_24_8_EXT;
- break;
case GL_DEPTH_STENCIL_EXT:
case GL_DEPTH24_STENCIL8_EXT:
+ /* alloc a depth+stencil buffer */
rb->Format = MESA_FORMAT_S8_Z24;
rb->DataType = GL_UNSIGNED_INT_24_8_EXT;
break;
@@ -182,7 +176,7 @@ intel_alloc_renderbuffer_storage(GLcontext * ctx, struct gl_renderbuffer *rb,
/* alloc hardware renderbuffer */
DBG("Allocating %d x %d Intel RBO\n", width, height);
- irb->region = intel_region_alloc(intel, I915_TILING_NONE, cpp,
+ irb->region = intel_region_alloc(intel->intelScreen, I915_TILING_NONE, cpp,
width, height, GL_TRUE);
if (!irb->region)
return GL_FALSE; /* out of memory? */
diff --git a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
index 39ac0205fa..d316d34d69 100644
--- a/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/intel/intel_mipmap_tree.c
@@ -30,6 +30,7 @@
#include "intel_regions.h"
#include "intel_tex_layout.h"
#include "main/enums.h"
+#include "main/formats.h"
#define FILE_DEBUG_FLAG DEBUG_MIPTREE
@@ -136,7 +137,7 @@ intel_miptree_create(struct intel_context *intel,
return NULL;
}
- mt->region = intel_region_alloc(intel,
+ mt->region = intel_region_alloc(intel->intelScreen,
tiling,
mt->cpp,
mt->total_width,
diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c
index 680d18ba29..e87e29462c 100644
--- a/src/mesa/drivers/dri/intel/intel_regions.c
+++ b/src/mesa/drivers/dri/intel/intel_regions.c
@@ -142,10 +142,10 @@ intel_region_unmap(struct intel_context *intel, struct intel_region *region)
}
static struct intel_region *
-intel_region_alloc_internal(struct intel_context *intel,
+intel_region_alloc_internal(struct intel_screen *screen,
GLuint cpp,
GLuint width, GLuint height, GLuint pitch,
- drm_intel_bo *buffer)
+ uint32_t tiling, drm_intel_bo *buffer)
{
struct intel_region *region;
@@ -164,44 +164,52 @@ intel_region_alloc_internal(struct intel_context *intel,
region->pitch = pitch;
region->refcount = 1;
region->buffer = buffer;
-
- /* Default to no tiling */
- region->tiling = I915_TILING_NONE;
+ region->tiling = tiling;
+ region->screen = screen;
_DBG("%s <-- %p\n", __FUNCTION__, region);
return region;
}
struct intel_region *
-intel_region_alloc(struct intel_context *intel,
+intel_region_alloc(struct intel_screen *screen,
uint32_t tiling,
GLuint cpp, GLuint width, GLuint height,
GLboolean expect_accelerated_upload)
{
drm_intel_bo *buffer;
- struct intel_region *region;
unsigned long flags = 0;
unsigned long aligned_pitch;
if (expect_accelerated_upload)
flags |= BO_ALLOC_FOR_RENDER;
- buffer = drm_intel_bo_alloc_tiled(intel->bufmgr, "region",
+ buffer = drm_intel_bo_alloc_tiled(screen->bufmgr, "region",
width, height, cpp,
&tiling, &aligned_pitch, flags);
- region = intel_region_alloc_internal(intel, cpp, width, height,
- aligned_pitch / cpp, buffer);
- if (region == NULL)
- return region;
+ return intel_region_alloc_internal(screen, cpp, width, height,
+ aligned_pitch / cpp, tiling, buffer);
+}
- region->tiling = tiling;
+GLboolean
+intel_region_flink(struct intel_region *region, uint32_t *name)
+{
+ if (region->name == 0) {
+ if (drm_intel_bo_flink(region->buffer, &region->name))
+ return GL_FALSE;
+
+ _mesa_HashInsert(region->screen->named_regions,
+ region->name, region);
+ }
- return region;
+ *name = region->name;
+
+ return GL_TRUE;
}
struct intel_region *
-intel_region_alloc_for_handle(struct intel_context *intel,
+intel_region_alloc_for_handle(struct intel_screen *screen,
GLuint cpp,
GLuint width, GLuint height, GLuint pitch,
GLuint handle, const char *name)
@@ -209,9 +217,9 @@ intel_region_alloc_for_handle(struct intel_context *intel,
struct intel_region *region, *dummy;
drm_intel_bo *buffer;
int ret;
- uint32_t bit_6_swizzle;
+ uint32_t bit_6_swizzle, tiling;
- region = _mesa_HashLookup(intel->intelScreen->named_regions, handle);
+ region = _mesa_HashLookup(screen->named_regions, handle);
if (region != NULL) {
dummy = NULL;
if (region->width != width || region->height != height ||
@@ -225,25 +233,26 @@ intel_region_alloc_for_handle(struct intel_context *intel,
return dummy;
}
- buffer = intel_bo_gem_create_from_name(intel->bufmgr, name, handle);
-
- region = intel_region_alloc_internal(intel, cpp,
- width, height, pitch, buffer);
- if (region == NULL)
- return region;
-
- ret = drm_intel_bo_get_tiling(region->buffer, &region->tiling,
- &bit_6_swizzle);
+ buffer = intel_bo_gem_create_from_name(screen->bufmgr, name, handle);
+ if (buffer == NULL)
+ return NULL;
+ ret = drm_intel_bo_get_tiling(buffer, &tiling, &bit_6_swizzle);
if (ret != 0) {
fprintf(stderr, "Couldn't get tiling of buffer %d (%s): %s\n",
handle, name, strerror(-ret));
- intel_region_release(&region);
+ drm_intel_bo_unreference(buffer);
+ return NULL;
+ }
+
+ region = intel_region_alloc_internal(screen, cpp,
+ width, height, pitch, tiling, buffer);
+ if (region == NULL) {
+ drm_intel_bo_unreference(buffer);
return NULL;
}
region->name = handle;
- region->screen = intel->intelScreen;
- _mesa_HashInsert(intel->intelScreen->named_regions, handle, region);
+ _mesa_HashInsert(screen->named_regions, handle, region);
return region;
}
diff --git a/src/mesa/drivers/dri/intel/intel_regions.h b/src/mesa/drivers/dri/intel/intel_regions.h
index 6bbed32f2a..8464a5e937 100644
--- a/src/mesa/drivers/dri/intel/intel_regions.h
+++ b/src/mesa/drivers/dri/intel/intel_regions.h
@@ -76,18 +76,21 @@ struct intel_region
/* Allocate a refcounted region. Pointers to regions should only be
* copied by calling intel_reference_region().
*/
-struct intel_region *intel_region_alloc(struct intel_context *intel,
+struct intel_region *intel_region_alloc(struct intel_screen *screen,
uint32_t tiling,
GLuint cpp, GLuint width,
GLuint height,
GLboolean expect_accelerated_upload);
struct intel_region *
-intel_region_alloc_for_handle(struct intel_context *intel,
+intel_region_alloc_for_handle(struct intel_screen *screen,
GLuint cpp,
GLuint width, GLuint height, GLuint pitch,
unsigned int handle, const char *name);
+GLboolean
+intel_region_flink(struct intel_region *region, uint32_t *name);
+
void intel_region_reference(struct intel_region **dst,
struct intel_region *src);
diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c
index 15a465c640..0a542a7303 100644
--- a/src/mesa/drivers/dri/intel/intel_screen.c
+++ b/src/mesa/drivers/dri/intel/intel_screen.c
@@ -70,7 +70,7 @@ PUBLIC const char __driConfigOptions[] =
DRI_CONF_DESC(en, "Enable early Z in classic mode (unstable, 945-only).")
DRI_CONF_OPT_END
- DRI_CONF_OPT_BEGIN(fragment_shader, bool, false)
+ DRI_CONF_OPT_BEGIN(fragment_shader, bool, true)
DRI_CONF_DESC(en, "Enable limited ARB_fragment_shader support on 915/945.")
DRI_CONF_OPT_END
@@ -159,7 +159,8 @@ intel_create_image_from_name(__DRIcontext *context,
image->data = loaderPrivate;
cpp = _mesa_get_format_bytes(image->format);
- image->region = intel_region_alloc_for_handle(intel, cpp, width, height,
+ image->region = intel_region_alloc_for_handle(intel->intelScreen,
+ cpp, width, height,
pitch, name, "image");
if (image->region == NULL) {
FREE(image);
@@ -206,11 +207,79 @@ intel_destroy_image(__DRIimage *image)
FREE(image);
}
+static __DRIimage *
+intel_create_image(__DRIscreen *screen,
+ int width, int height, int format,
+ unsigned int use,
+ void *loaderPrivate)
+{
+ __DRIimage *image;
+ struct intel_screen *intelScreen = screen->private;
+ int cpp;
+
+ image = CALLOC(sizeof *image);
+ if (image == NULL)
+ return NULL;
+
+ switch (format) {
+ case __DRI_IMAGE_FORMAT_RGB565:
+ image->format = MESA_FORMAT_RGB565;
+ image->internal_format = GL_RGB;
+ image->data_type = GL_UNSIGNED_BYTE;
+ break;
+ case __DRI_IMAGE_FORMAT_XRGB8888:
+ image->format = MESA_FORMAT_XRGB8888;
+ image->internal_format = GL_RGB;
+ image->data_type = GL_UNSIGNED_BYTE;
+ break;
+ case __DRI_IMAGE_FORMAT_ARGB8888:
+ image->format = MESA_FORMAT_ARGB8888;
+ image->internal_format = GL_RGBA;
+ image->data_type = GL_UNSIGNED_BYTE;
+ break;
+ default:
+ free(image);
+ return NULL;
+ }
+
+ image->data = loaderPrivate;
+ cpp = _mesa_get_format_bytes(image->format);
+
+ image->region =
+ intel_region_alloc(intelScreen, I915_TILING_NONE,
+ cpp, width, height, GL_TRUE);
+ if (image->region == NULL) {
+ FREE(image);
+ return NULL;
+ }
+
+ return image;
+}
+
+static GLboolean
+intel_query_image(__DRIimage *image, int attrib, int *value)
+{
+ switch (attrib) {
+ case __DRI_IMAGE_ATTRIB_STRIDE:
+ *value = image->region->pitch * image->region->cpp;
+ return GL_TRUE;
+ case __DRI_IMAGE_ATTRIB_HANDLE:
+ *value = image->region->buffer->handle;
+ return GL_TRUE;
+ case __DRI_IMAGE_ATTRIB_NAME:
+ return intel_region_flink(image->region, (uint32_t *) value);
+ default:
+ return GL_FALSE;
+ }
+}
+
static struct __DRIimageExtensionRec intelImageExtension = {
{ __DRI_IMAGE, __DRI_IMAGE_VERSION },
intel_create_image_from_name,
intel_create_image_from_renderbuffer,
intel_destroy_image,
+ intel_create_image,
+ intel_query_image
};
static const __DRIextension *intelScreenExtensions[] = {
diff --git a/src/mesa/drivers/dri/intel/intel_tex.h b/src/mesa/drivers/dri/intel/intel_tex.h
index 4bb012dc65..cd77dd5b8e 100644
--- a/src/mesa/drivers/dri/intel/intel_tex.h
+++ b/src/mesa/drivers/dri/intel/intel_tex.h
@@ -31,8 +31,6 @@
#include "main/mtypes.h"
#include "main/formats.h"
#include "intel_context.h"
-#include "texmem.h"
-
void intelInitTextureFuncs(struct dd_function_table *functions);
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_driver.c b/src/mesa/drivers/dri/nouveau/nouveau_driver.c
index 4ec864c181..6452fe218e 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_driver.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_driver.c
@@ -138,5 +138,7 @@ nouveau_driver_functions_init(struct dd_function_table *functions)
functions->DrawPixels = _mesa_meta_DrawPixels;
functions->CopyPixels = _mesa_meta_CopyPixels;
functions->Bitmap = _mesa_meta_Bitmap;
+#if FEATURE_EXT_framebuffer_blit
functions->BlitFramebuffer = _mesa_meta_BlitFramebuffer;
+#endif
}
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_fbo.c b/src/mesa/drivers/dri/nouveau/nouveau_fbo.c
index bd1273beea..32d8f2d0f9 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_fbo.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_fbo.c
@@ -262,10 +262,12 @@ nouveau_finish_render_texture(GLcontext *ctx,
void
nouveau_fbo_functions_init(struct dd_function_table *functions)
{
+#if FEATURE_EXT_framebuffer_object
functions->NewFramebuffer = nouveau_framebuffer_new;
functions->NewRenderbuffer = nouveau_renderbuffer_new;
functions->BindFramebuffer = nouveau_bind_framebuffer;
functions->FramebufferRenderbuffer = nouveau_framebuffer_renderbuffer;
functions->RenderTexture = nouveau_render_texture;
functions->FinishRenderTexture = nouveau_finish_render_texture;
+#endif
}
diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile
index 3167d49bca..d0eb170784 100644
--- a/src/mesa/drivers/dri/r300/compiler/Makefile
+++ b/src/mesa/drivers/dri/r300/compiler/Makefile
@@ -23,6 +23,7 @@ C_SOURCES = \
radeon_dataflow_deadcode.c \
radeon_dataflow_swizzles.c \
radeon_optimize.c \
+ radeon_remove_constants.c \
radeon_rename_regs.c \
r3xx_fragprog.c \
r300_fragprog.c \
diff --git a/src/mesa/drivers/dri/r300/compiler/SConscript b/src/mesa/drivers/dri/r300/compiler/SConscript
index c6f47a6f8a..847857b142 100755
--- a/src/mesa/drivers/dri/r300/compiler/SConscript
+++ b/src/mesa/drivers/dri/r300/compiler/SConscript
@@ -22,6 +22,7 @@ r300compiler = env.ConvenienceLibrary(
'radeon_pair_schedule.c',
'radeon_pair_regalloc.c',
'radeon_optimize.c',
+ 'radeon_remove_constants.c',
'radeon_rename_regs.c',
'radeon_emulate_branches.c',
'radeon_emulate_loops.c',
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index d2fa816894..8613ec5109 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -30,6 +30,7 @@
#include "radeon_program_alu.h"
#include "radeon_program_tex.h"
#include "radeon_rename_regs.h"
+#include "radeon_remove_constants.h"
#include "r300_fragprog.h"
#include "r300_fragprog_swizzle.h"
#include "r500_fragprog.h"
@@ -180,6 +181,13 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
debug_program_log(c, "after dataflow passes");
+ if (c->Base.remove_unused_constants) {
+ rc_remove_unused_constants(&c->Base,
+ &c->code->constants_remap_table);
+
+ debug_program_log(c, "after constants cleanup");
+ }
+
if(!c->Base.is_r500) {
/* This pass makes it easier for the scheduler to group TEX
* instructions and reduces the chances of creating too
@@ -224,4 +232,14 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
r300FragmentProgramDump(c->code);
}
}
+
+ /* Check the number of constants. */
+ if (!c->Base.Error) {
+ unsigned max = c->Base.is_r500 ? R500_PFS_NUM_CONST_REGS : R300_PFS_NUM_CONST_REGS;
+
+ if (c->Base.Program.Constants.Count > max) {
+ rc_error(&c->Base, "Too many constants. Max: %i, Got: %i\n",
+ max, c->Base.Program.Constants.Count);
+ }
+ }
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
index 666c9c2a7a..b05b3aabf3 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -31,6 +31,7 @@
#include "radeon_swizzle.h"
#include "radeon_emulate_branches.h"
#include "radeon_emulate_loops.h"
+#include "radeon_remove_constants.h"
struct loop {
int BgnLoop;
@@ -465,7 +466,7 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
{
struct rc_instruction *rci;
- struct loop * loops;
+ struct loop * loops = NULL;
int current_loop_depth = 0;
int loops_reserved = 0;
@@ -484,6 +485,21 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
if (!valid_dst(compiler->code, &vpi->DstReg))
continue;
+ if (rc_get_opcode_info(vpi->Opcode)->HasDstReg) {
+ /* Relative addressing of destination operands is not supported yet. */
+ if (vpi->DstReg.RelAddr) {
+ rc_error(&compiler->Base, "Vertex program does not support relative "
+ "addressing of destination operands (yet).\n");
+ return;
+ }
+
+ /* Neither is Saturate. */
+ if (vpi->SaturateMode != RC_SATURATE_NONE) {
+ rc_error(&compiler->Base, "Vertex program does not support the Saturate "
+ "modifier (yet).\n");
+ }
+ }
+
if (compiler->code->length >= R500_VS_MAX_ALU_DWORDS ||
(compiler->code->length >= R300_VS_MAX_ALU_DWORDS && !compiler->Base.is_r500)) {
rc_error(&compiler->Base, "Vertex program has too many instructions\n");
@@ -543,10 +559,16 @@ static void translate_vertex_program(struct r300_vertex_program_compiler * compi
}
case RC_OPCODE_ENDLOOP:
{
- struct loop * l = &loops[current_loop_depth - 1];
- unsigned int act_addr = l->BgnLoop - 1;
- unsigned int last_addr = (compiler->code->length / 4) - 1;
- unsigned int ret_addr = l->BgnLoop;
+ struct loop * l;
+ unsigned int act_addr;
+ unsigned int last_addr;
+ unsigned int ret_addr;
+
+ assert(loops);
+ l = &loops[current_loop_depth - 1];
+ act_addr = l->BgnLoop - 1;
+ last_addr = (compiler->code->length / 4) - 1;
+ ret_addr = l->BgnLoop;
if (loops_reserved >= R300_VS_MAX_FC_OPS) {
rc_error(&compiler->Base,
@@ -624,10 +646,9 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c
struct temporary_allocation * ta;
unsigned int i, j;
- compiler->code->num_temporaries = 0;
memset(hwtemps, 0, sizeof(hwtemps));
- /* Pass 1: Count original temporaries and allocate structures */
+ /* Pass 1: Count original temporaries. */
for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
@@ -645,12 +666,30 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c
}
}
}
+ compiler->code->num_temporaries = num_orig_temps;
+
+ /* Pass 2: If there is relative addressing of temporaries, we cannot change register indices. Give up. */
+ for (inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
+ const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ if (opcode->HasDstReg)
+ if (inst->U.I.DstReg.RelAddr)
+ return;
+ for (i = 0; i < opcode->NumSrcRegs; ++i) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY &&
+ inst->U.I.SrcReg[i].RelAddr) {
+ return;
+ }
+ }
+ }
+
+ compiler->code->num_temporaries = 0;
ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool,
sizeof(struct temporary_allocation) * num_orig_temps);
memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps);
- /* Pass 2: Determine original temporary lifetimes */
+ /* Pass 3: Determine original temporary lifetimes */
for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
/* Instructions inside of loops need to use the ENDLOOP
@@ -685,7 +724,7 @@ static void allocate_temporary_registers(struct r300_vertex_program_compiler * c
}
}
- /* Pass 3: Register allocation */
+ /* Pass 4: Register allocation */
for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
@@ -853,6 +892,76 @@ static int swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
return 1;
}
+static void transform_negative_addressing(struct r300_vertex_program_compiler *c,
+ struct rc_instruction *arl,
+ struct rc_instruction *end,
+ int min_offset)
+{
+ struct rc_instruction *inst, *add;
+ unsigned const_swizzle;
+
+ /* Transform ARL */
+ add = rc_insert_new_instruction(&c->Base, arl->Prev);
+ add->U.I.Opcode = RC_OPCODE_ADD;
+ add->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ add->U.I.DstReg.Index = rc_find_free_temporary(&c->Base);
+ add->U.I.DstReg.WriteMask = RC_MASK_X;
+ add->U.I.SrcReg[0] = arl->U.I.SrcReg[0];
+ add->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
+ add->U.I.SrcReg[1].Index = rc_constants_add_immediate_scalar(&c->Base.Program.Constants,
+ min_offset, &const_swizzle);
+ add->U.I.SrcReg[1].Swizzle = const_swizzle;
+
+ arl->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ arl->U.I.SrcReg[0].Index = add->U.I.DstReg.Index;
+ arl->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XXXX;
+
+ /* Rewrite offsets up to and excluding inst. */
+ for (inst = arl->Next; inst != end; inst = inst->Next) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ for (unsigned i = 0; i < opcode->NumSrcRegs; i++)
+ if (inst->U.I.SrcReg[i].RelAddr)
+ inst->U.I.SrcReg[i].Index -= min_offset;
+ }
+}
+
+static void rc_emulate_negative_addressing(struct r300_vertex_program_compiler *c)
+{
+ struct rc_instruction *inst, *lastARL = NULL;
+ int min_offset = 0;
+
+ for (inst = c->Base.Program.Instructions.Next; inst != &c->Base.Program.Instructions; inst = inst->Next) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ if (inst->U.I.Opcode == RC_OPCODE_ARL) {
+ if (lastARL != NULL && min_offset < 0)
+ transform_negative_addressing(c, lastARL, inst, min_offset);
+
+ lastARL = inst;
+ min_offset = 0;
+ continue;
+ }
+
+ for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
+ if (inst->U.I.SrcReg[i].RelAddr &&
+ inst->U.I.SrcReg[i].Index < 0) {
+ /* ARL must precede any indirect addressing. */
+ if (lastARL == NULL) {
+ rc_error(&c->Base, "Vertex shader: Found relative addressing without ARL.");
+ return;
+ }
+
+ if (inst->U.I.SrcReg[i].Index < min_offset)
+ min_offset = inst->U.I.SrcReg[i].Index;
+ }
+ }
+ }
+
+ if (lastARL != NULL && min_offset < 0)
+ transform_negative_addressing(c, lastARL, inst, min_offset);
+}
+
static void debug_program_log(struct r300_vertex_program_compiler* c, const char * where)
{
if (c->Base.Debug) {
@@ -868,44 +977,56 @@ static struct rc_swizzle_caps r300_vertprog_swizzle_caps = {
};
-void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
+void r3xx_compile_vertex_program(struct r300_vertex_program_compiler *c)
{
struct emulate_loop_state loop_state;
- compiler->Base.SwizzleCaps = &r300_vertprog_swizzle_caps;
+ c->Base.SwizzleCaps = &r300_vertprog_swizzle_caps;
- addArtificialOutputs(compiler);
+ addArtificialOutputs(c);
- debug_program_log(compiler, "before compilation");
+ debug_program_log(c, "before compilation");
- if (compiler->Base.is_r500)
- rc_transform_loops(&compiler->Base, &loop_state, R500_VS_MAX_ALU);
+ if (c->Base.is_r500)
+ rc_transform_loops(&c->Base, &loop_state, R500_VS_MAX_ALU);
else
- rc_transform_loops(&compiler->Base, &loop_state, R300_VS_MAX_ALU);
+ rc_transform_loops(&c->Base, &loop_state, R300_VS_MAX_ALU);
+ if (c->Base.Error)
+ return;
- debug_program_log(compiler, "after emulate loops");
+ debug_program_log(c, "after emulate loops");
- if (!compiler->Base.is_r500) {
- rc_emulate_branches(&compiler->Base);
- debug_program_log(compiler, "after emulate branches");
+ if (!c->Base.is_r500) {
+ rc_emulate_branches(&c->Base);
+ if (c->Base.Error)
+ return;
+ debug_program_log(c, "after emulate branches");
}
- if (compiler->Base.is_r500) {
+ rc_emulate_negative_addressing(c);
+
+ debug_program_log(c, "after negative addressing emulation");
+
+ if (c->Base.is_r500) {
struct radeon_program_transformation transformations[] = {
{ &r300_transform_vertex_alu, 0 },
{ &r300_transform_trig_scale_vertex, 0 }
};
- radeonLocalTransform(&compiler->Base, 2, transformations);
+ radeonLocalTransform(&c->Base, 2, transformations);
+ if (c->Base.Error)
+ return;
- debug_program_log(compiler, "after native rewrite");
+ debug_program_log(c, "after native rewrite");
} else {
struct radeon_program_transformation transformations[] = {
{ &r300_transform_vertex_alu, 0 },
{ &radeonTransformTrigSimple, 0 }
};
- radeonLocalTransform(&compiler->Base, 2, transformations);
+ radeonLocalTransform(&c->Base, 2, transformations);
+ if (c->Base.Error)
+ return;
- debug_program_log(compiler, "after native rewrite");
+ debug_program_log(c, "after native rewrite");
/* Note: This pass has to be done seperately from ALU rewrite,
* because it needs to check every instruction.
@@ -913,9 +1034,11 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
struct radeon_program_transformation transformations2[] = {
{ &transform_nonnative_modifiers, 0 },
};
- radeonLocalTransform(&compiler->Base, 1, transformations2);
+ radeonLocalTransform(&c->Base, 1, transformations2);
+ if (c->Base.Error)
+ return;
- debug_program_log(compiler, "after emulate modifiers");
+ debug_program_log(c, "after emulate modifiers");
}
{
@@ -926,30 +1049,58 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
struct radeon_program_transformation transformations[] = {
{ &transform_source_conflicts, 0 },
};
- radeonLocalTransform(&compiler->Base, 1, transformations);
+ radeonLocalTransform(&c->Base, 1, transformations);
+ if (c->Base.Error)
+ return;
}
- debug_program_log(compiler, "after source conflict resolve");
+ debug_program_log(c, "after source conflict resolve");
- rc_dataflow_deadcode(&compiler->Base, &dataflow_outputs_mark_used, compiler);
+ rc_dataflow_deadcode(&c->Base, &dataflow_outputs_mark_used, c);
+ if (c->Base.Error)
+ return;
- debug_program_log(compiler, "after deadcode");
+ debug_program_log(c, "after deadcode");
- rc_dataflow_swizzles(&compiler->Base);
+ rc_dataflow_swizzles(&c->Base);
+ if (c->Base.Error)
+ return;
- allocate_temporary_registers(compiler);
+ debug_program_log(c, "after dataflow");
- debug_program_log(compiler, "after dataflow");
+ allocate_temporary_registers(c);
+ if (c->Base.Error)
+ return;
- translate_vertex_program(compiler);
+ debug_program_log(c, "after register allocation");
- rc_constants_copy(&compiler->code->constants, &compiler->Base.Program.Constants);
+ if (c->Base.remove_unused_constants) {
+ rc_remove_unused_constants(&c->Base,
+ &c->code->constants_remap_table);
+ if (c->Base.Error)
+ return;
- compiler->code->InputsRead = compiler->Base.Program.InputsRead;
- compiler->code->OutputsWritten = compiler->Base.Program.OutputsWritten;
+ debug_program_log(c, "after constants cleanup");
+ }
- if (compiler->Base.Debug) {
+ translate_vertex_program(c);
+ if (c->Base.Error)
+ return;
+
+ rc_constants_copy(&c->code->constants, &c->Base.Program.Constants);
+
+ c->code->InputsRead = c->Base.Program.InputsRead;
+ c->code->OutputsWritten = c->Base.Program.OutputsWritten;
+
+ if (c->Base.Debug) {
fprintf(stderr, "Final vertex program code:\n");
- r300_vertex_program_dump(compiler);
+ r300_vertex_program_dump(c);
+ }
+
+ /* Check the number of constants. */
+ if (!c->Base.Error &&
+ c->Base.Program.Constants.Count > 256) {
+ rc_error(&c->Base, "Too many constants. Max: 256, Got: %i\n",
+ c->Base.Program.Constants.Count);
}
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
index 896246d203..f76676fae8 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
@@ -235,6 +235,7 @@ struct rX00_fragment_program_code {
unsigned writes_depth:1;
struct rc_constant_list constants;
+ unsigned *constants_remap_table;
};
@@ -266,6 +267,7 @@ struct r300_vertex_program_code {
int outputs[VSF_MAX_OUTPUTS];
struct rc_constant_list constants;
+ unsigned *constants_remap_table;
uint32_t InputsRead;
uint32_t OutputsWritten;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
index 7c42eb3ae5..5155b912e1 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -39,9 +39,12 @@ struct radeon_compiler {
char * ErrorMsg;
/* Hardware specification. */
- unsigned is_r500;
+ unsigned is_r500:1;
unsigned max_temp_regs;
+ /* Whether to remove unused constants and empty holes in constant space. */
+ unsigned remove_unused_constants:1;
+
/**
* Variables used internally, not be touched by callers
* of the compiler
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
index faf531b412..acdb371de9 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
@@ -157,8 +157,12 @@ static void update_instruction(struct deadcode_state * s, struct rc_instruction
unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index);
if (pused) {
usedmask = *pused & inst->U.I.DstReg.WriteMask;
- *pused &= ~usedmask;
+ if (!inst->U.I.DstReg.RelAddr)
+ *pused &= ~usedmask;
}
+
+ if (inst->U.I.DstReg.RelAddr)
+ mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X);
}
insts->WriteMask |= usedmask;
@@ -213,6 +217,7 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f
{
struct deadcode_state s;
unsigned int nr_instructions;
+ unsigned has_temp_reladdr_src = 0;
memset(&s, 0, sizeof(s));
s.C = c;
@@ -300,6 +305,30 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f
rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name);
}
}
+
+ if (!has_temp_reladdr_src) {
+ for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY &&
+ inst->U.I.SrcReg[i].RelAddr) {
+ /* If there is a register read from a temporary file with relative addressing,
+ * mark all preceding written registers as used. */
+ for (struct rc_instruction *ptr = inst->Prev;
+ ptr != &c->Program.Instructions;
+ ptr = ptr->Prev) {
+ if (opcode->HasDstReg &&
+ ptr->U.I.DstReg.File == RC_FILE_TEMPORARY &&
+ ptr->U.I.DstReg.WriteMask) {
+ mark_used(&s,
+ ptr->U.I.DstReg.File,
+ ptr->U.I.DstReg.Index,
+ ptr->U.I.DstReg.WriteMask);
+ }
+ }
+
+ has_temp_reladdr_src = 1;
+ }
+ }
+ }
}
update_instruction(&s, inst);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
index 2ea830be7f..da495a3afa 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
@@ -95,6 +95,12 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
.IsComponentwise = 1
},
{
+ .Opcode = RC_OPCODE_DP2,
+ .Name = "DP2",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1
+ },
+ {
.Opcode = RC_OPCODE_DP3,
.Name = "DP3",
.NumSrcRegs = 2,
@@ -295,6 +301,13 @@ struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
.IsComponentwise = 1
},
{
+ .Opcode = RC_OPCODE_SSG,
+ .Name = "SSG",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
.Opcode = RC_OPCODE_SUB,
.Name = "SUB",
.NumSrcRegs = 2,
@@ -435,6 +448,10 @@ void rc_compute_sources_for_writemask(
case RC_OPCODE_ARL:
srcmasks[0] |= RC_MASK_X;
break;
+ case RC_OPCODE_DP2:
+ srcmasks[0] |= RC_MASK_XY;
+ srcmasks[1] |= RC_MASK_XY;
+ break;
case RC_OPCODE_DP3:
srcmasks[0] |= RC_MASK_XYZ;
srcmasks[1] |= RC_MASK_XYZ;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
index 6e18d6eb3f..d3f639c870 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
@@ -64,6 +64,9 @@ typedef enum {
* dst.c = d src0.c / dy */
RC_OPCODE_DDY,
+ /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y */
+ RC_OPCODE_DP2,
+
/** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z */
RC_OPCODE_DP3,
@@ -154,6 +157,9 @@ typedef enum {
/** vec4 instruction: dst.c = (src0.c != src1.c) ? 1.0 : 0.0 */
RC_OPCODE_SNE,
+ /** vec4 instruction: dst.c = (src0.c < 0 ?) -1 : ((src0.c > 0) : 1 : 0) */
+ RC_OPCODE_SSG,
+
/** vec4 instruction: dst.c = src0.c - src1.c */
RC_OPCODE_SUB,
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
index 407a0a55ee..8327e9aced 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
@@ -230,6 +230,34 @@ static void set_pair_instruction(struct r300_fragment_program_compiler *c,
}
+static void check_opcode_support(struct r300_fragment_program_compiler *c,
+ struct rc_sub_instruction *inst)
+{
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
+
+ if (opcode->HasDstReg) {
+ if (inst->DstReg.RelAddr) {
+ rc_error(&c->Base, "Fragment program does not support relative addressing "
+ "of destination operands.\n");
+ return;
+ }
+
+ if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) {
+ rc_error(&c->Base, "Fragment program does not support signed Saturate.\n");
+ return;
+ }
+ }
+
+ for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
+ if (inst->SrcReg[i].RelAddr) {
+ rc_error(&c->Base, "Fragment program does not support relative addressing "
+ " of source operands.\n");
+ return;
+ }
+ }
+}
+
+
/**
* Translate all ALU instructions into corresponding pair instructions,
* performing no other changes.
@@ -249,6 +277,8 @@ void rc_pair_translate(struct r300_fragment_program_compiler *c)
struct rc_sub_instruction copy = inst->U.I;
+ check_opcode_support(c, &copy);
+
final_rewrite(&copy);
inst->Type = RC_INSTRUCTION_PAIR;
set_pair_instruction(c, &inst->U.P, &copy);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
index 857aae5514..704a7bb2d2 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
@@ -216,18 +216,18 @@ static void transform_CEIL(struct radeon_compiler* c,
rc_remove_instruction(inst);
}
-static void transform_DP3(struct radeon_compiler* c,
+static void transform_DP2(struct radeon_compiler* c,
struct rc_instruction* inst)
{
struct rc_src_register src0 = inst->U.I.SrcReg[0];
struct rc_src_register src1 = inst->U.I.SrcReg[1];
- src0.Negate &= ~RC_MASK_W;
- src0.Swizzle &= ~(7 << (3 * 3));
- src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
- src1.Negate &= ~RC_MASK_W;
- src1.Swizzle &= ~(7 << (3 * 3));
- src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
- emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1);
+ src0.Negate &= ~(RC_MASK_Z | RC_MASK_W);
+ src0.Swizzle &= ~(63 << (3 * 2));
+ src0.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3));
+ src1.Negate &= ~(RC_MASK_Z | RC_MASK_W);
+ src1.Swizzle &= ~(63 << (3 * 2));
+ src1.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3));
+ emit2(c, inst->Prev, RC_OPCODE_DP3, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1);
rc_remove_instruction(inst);
}
@@ -464,6 +464,43 @@ static void transform_SNE(struct radeon_compiler* c,
rc_remove_instruction(inst);
}
+static void transform_SSG(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ /* result = sign(x)
+ *
+ * CMP tmp0, -x, 1, 0
+ * CMP tmp1, x, 1, 0
+ * ADD result, tmp0, -tmp1;
+ */
+ unsigned tmp0, tmp1;
+
+ /* 0 < x */
+ tmp0 = rc_find_free_temporary(c);
+ emit3(c, inst->Prev, RC_OPCODE_CMP, 0,
+ dstregtmpmask(tmp0, inst->U.I.DstReg.WriteMask),
+ negate(inst->U.I.SrcReg[0]),
+ builtin_one,
+ builtin_zero);
+
+ /* x < 0 */
+ tmp1 = rc_find_free_temporary(c);
+ emit3(c, inst->Prev, RC_OPCODE_CMP, 0,
+ dstregtmpmask(tmp1, inst->U.I.DstReg.WriteMask),
+ inst->U.I.SrcReg[0],
+ builtin_one,
+ builtin_zero);
+
+ /* Either both are zero, or one of them is one and the other is zero. */
+ /* result = tmp0 - tmp1 */
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0,
+ inst->U.I.DstReg,
+ srcreg(RC_FILE_TEMPORARY, tmp0),
+ negate(srcreg(RC_FILE_TEMPORARY, tmp1)));
+
+ rc_remove_instruction(inst);
+}
+
static void transform_SUB(struct radeon_compiler* c,
struct rc_instruction* inst)
{
@@ -516,6 +553,7 @@ int radeonTransformALU(
switch(inst->U.I.Opcode) {
case RC_OPCODE_ABS: transform_ABS(c, inst); return 1;
case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1;
+ case RC_OPCODE_DP2: transform_DP2(c, inst); return 1;
case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
case RC_OPCODE_DST: transform_DST(c, inst); return 1;
case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;
@@ -530,6 +568,7 @@ int radeonTransformALU(
case RC_OPCODE_SLE: transform_SLE(c, inst); return 1;
case RC_OPCODE_SLT: transform_SLT(c, inst); return 1;
case RC_OPCODE_SNE: transform_SNE(c, inst); return 1;
+ case RC_OPCODE_SSG: transform_SSG(c, inst); return 1;
case RC_OPCODE_SUB: transform_SUB(c, inst); return 1;
case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1;
case RC_OPCODE_XPD: transform_XPD(c, inst); return 1;
@@ -577,6 +616,29 @@ static void transform_r300_vertex_CMP(struct radeon_compiler* c,
rc_remove_instruction(inst);
}
+static void transform_r300_vertex_DP2(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ struct rc_instruction *next_inst = inst->Next;
+ transform_DP2(c, inst);
+ next_inst->Prev->U.I.Opcode = RC_OPCODE_DP4;
+}
+
+static void transform_r300_vertex_DP3(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ struct rc_src_register src0 = inst->U.I.SrcReg[0];
+ struct rc_src_register src1 = inst->U.I.SrcReg[1];
+ src0.Negate &= ~RC_MASK_W;
+ src0.Swizzle &= ~(7 << (3 * 3));
+ src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
+ src1.Negate &= ~RC_MASK_W;
+ src1.Swizzle &= ~(7 << (3 * 3));
+ src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
+ emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1);
+ rc_remove_instruction(inst);
+}
+
static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c,
struct rc_instruction* inst)
{
@@ -672,6 +734,41 @@ static void transform_r300_vertex_SLE(struct radeon_compiler* c,
inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
}
+static void transform_r300_vertex_SSG(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ /* result = sign(x)
+ *
+ * SLT tmp0, 0, x;
+ * SLT tmp1, x, 0;
+ * ADD result, tmp0, -tmp1;
+ */
+ unsigned tmp0, tmp1;
+
+ /* 0 < x */
+ tmp0 = rc_find_free_temporary(c);
+ emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
+ dstregtmpmask(tmp0, inst->U.I.DstReg.WriteMask),
+ builtin_zero,
+ inst->U.I.SrcReg[0]);
+
+ /* x < 0 */
+ tmp1 = rc_find_free_temporary(c);
+ emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
+ dstregtmpmask(tmp1, inst->U.I.DstReg.WriteMask),
+ inst->U.I.SrcReg[0],
+ builtin_zero);
+
+ /* Either both are zero, or one of them is one and the other is zero. */
+ /* result = tmp0 - tmp1 */
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0,
+ inst->U.I.DstReg,
+ srcreg(RC_FILE_TEMPORARY, tmp0),
+ negate(srcreg(RC_FILE_TEMPORARY, tmp1)));
+
+ rc_remove_instruction(inst);
+}
+
/**
* For use with radeonLocalTransform, this transforms non-native ALU
* instructions of the r300 up to r500 vertex engine.
@@ -685,7 +782,8 @@ int r300_transform_vertex_alu(
case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1;
case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1;
case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1;
- case RC_OPCODE_DP3: transform_DP3(c, inst); return 1;
+ case RC_OPCODE_DP2: transform_r300_vertex_DP2(c, inst); return 1;
+ case RC_OPCODE_DP3: transform_r300_vertex_DP3(c, inst); return 1;
case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;
case RC_OPCODE_LIT: transform_r300_vertex_fix_LIT(c, inst); return 1;
@@ -705,6 +803,7 @@ int r300_transform_vertex_alu(
return 1;
}
return 0;
+ case RC_OPCODE_SSG: transform_r300_vertex_SSG(c, inst); return 1;
case RC_OPCODE_SUB: transform_SUB(c, inst); return 1;
case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1;
case RC_OPCODE_XPD: transform_XPD(c, inst); return 1;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
index 9c4b65f4c0..ddce590ee6 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
@@ -117,8 +117,8 @@ int radeonTransformTEX(
struct rc_instruction * inst_rcp = NULL;
struct rc_instruction * inst_mad;
struct rc_instruction * inst_cmp;
- unsigned tmp_texsample = rc_find_free_temporary(c);
- unsigned tmp_sum = rc_find_free_temporary(c);
+ unsigned tmp_texsample;
+ unsigned tmp_sum;
unsigned tmp_recip_w = 0;
int pass, fail, tex;
@@ -126,6 +126,7 @@ int radeonTransformTEX(
struct rc_dst_register output_reg = inst->U.I.DstReg;
/* Redirect TEX to a new temp. */
+ tmp_texsample = rc_find_free_temporary(c);
inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst->U.I.DstReg.Index = tmp_texsample;
inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
@@ -144,6 +145,7 @@ int radeonTransformTEX(
}
/* Perspective-divide r by W (if it's TXP) and add the texture sample (see below). */
+ tmp_sum = rc_find_free_temporary(c);
inst_mad = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst);
inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
inst_mad->U.I.DstReg.Index = tmp_sum;
@@ -199,6 +201,8 @@ int radeonTransformTEX(
inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE;
inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111;
inst_cmp->U.I.SrcReg[fail] = shadow_ambient(compiler, inst->U.I.TexSrcUnit);
+
+ assert(tmp_texsample != tmp_sum && tmp_sum != tmp_recip_w);
}
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c
new file mode 100644
index 0000000000..be89e9fa5b
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2010 Marek Olšák <maraeo@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_remove_constants.h"
+
+void rc_remove_unused_constants(struct radeon_compiler *c,
+ unsigned **out_remap_table)
+{
+ unsigned char *const_used;
+ unsigned *remap_table;
+ unsigned *inv_remap_table;
+ unsigned has_rel_addr = 0;
+ unsigned is_identity = 1;
+ unsigned are_externals_remapped = 0;
+ struct rc_constant *constants = c->Program.Constants.Constants;
+
+ if (!c->Program.Constants.Count) {
+ *out_remap_table = NULL;
+ return;
+ }
+
+ const_used = malloc(c->Program.Constants.Count);
+ memset(const_used, 0, c->Program.Constants.Count);
+
+ /* Pass 1: Mark used constants. */
+ for (struct rc_instruction *inst = c->Program.Instructions.Next;
+ inst != &c->Program.Instructions; inst = inst->Next) {
+ const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_CONSTANT) {
+ if (inst->U.I.SrcReg[i].RelAddr) {
+ has_rel_addr = 1;
+ } else {
+ const_used[inst->U.I.SrcReg[i].Index] = 1;
+ }
+ }
+ }
+ }
+
+ /* Pass 2: If there is relative addressing, mark all externals as used. */
+ if (has_rel_addr) {
+ for (unsigned i = 0; i < c->Program.Constants.Count; i++)
+ if (constants[i].Type == RC_CONSTANT_EXTERNAL)
+ const_used[i] = 1;
+ }
+
+ /* Pass 3: Make the remapping table and remap constants.
+ * This pass removes unused constants simply by overwriting them by other constants. */
+ remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned));
+ inv_remap_table = malloc(c->Program.Constants.Count * sizeof(unsigned));
+ unsigned new_count = 0;
+
+ for (unsigned i = 0; i < c->Program.Constants.Count; i++) {
+ if (const_used[i]) {
+ remap_table[new_count] = i;
+ inv_remap_table[i] = new_count;
+
+ if (i != new_count) {
+ if (constants[i].Type == RC_CONSTANT_EXTERNAL)
+ are_externals_remapped = 1;
+
+ constants[new_count] = constants[i];
+ is_identity = 0;
+ }
+ new_count++;
+ }
+ }
+
+ /* is_identity ==> new_count == old_count
+ * !is_identity ==> new_count < old_count */
+ assert( is_identity || new_count < c->Program.Constants.Count);
+ assert(!(has_rel_addr && are_externals_remapped));
+
+ /* Pass 4: Redirect reads of all constants to their new locations. */
+ if (!is_identity) {
+ for (struct rc_instruction *inst = c->Program.Instructions.Next;
+ inst != &c->Program.Instructions; inst = inst->Next) {
+ const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_CONSTANT) {
+ inst->U.I.SrcReg[i].Index = inv_remap_table[inst->U.I.SrcReg[i].Index];
+ }
+ }
+ }
+
+ }
+
+ /* Set the new constant count. Note that new_count may be less than
+ * Count even though the remapping function is identity. In that case,
+ * the constants have been removed at the end of the array. */
+ c->Program.Constants.Count = new_count;
+
+ if (are_externals_remapped) {
+ *out_remap_table = remap_table;
+ } else {
+ *out_remap_table = NULL;
+ free(remap_table);
+ }
+
+ free(const_used);
+ free(inv_remap_table);
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.h b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.h
new file mode 100644
index 0000000000..0d3a26ca1c
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_remove_constants.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2010 Marek Olšák <maraeo@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_REMOVE_CONSTANTS_H
+#define RADEON_REMOVE_CONSTANTS_H
+
+#include "radeon_compiler.h"
+
+void rc_remove_unused_constants(struct radeon_compiler *c,
+ unsigned **out_remap_table);
+
+#endif
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
index 95f4306f60..7b6521c748 100644
--- a/src/mesa/drivers/dri/r300/r300_fragprog_common.c
+++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
@@ -38,7 +38,6 @@
#include "r300_fragprog_common.h"
-#include "program/prog_parameter.h"
#include "program/prog_print.h"
#include "compiler/radeon_compiler.h"
diff --git a/src/mesa/drivers/dri/r600/Makefile b/src/mesa/drivers/dri/r600/Makefile
index 17915621ee..03c17540e0 100644
--- a/src/mesa/drivers/dri/r600/Makefile
+++ b/src/mesa/drivers/dri/r600/Makefile
@@ -59,6 +59,15 @@ DRIVER_SOURCES = \
r600_texstate.c \
r600_blit.c \
r700_debug.c \
+ evergreen_context.c \
+ evergreen_state.c \
+ evergreen_tex.c \
+ evergreen_ioctl.c \
+ evergreen_render.c \
+ evergreen_chip.c \
+ evergreen_vertprog.c \
+ evergreen_fragprog.c \
+ evergreen_oglprog.c \
$(RADEON_COMMON_SOURCES) \
$(EGL_SOURCES) \
$(CS_SOURCES)
diff --git a/src/mesa/drivers/dri/r600/evergreen_chip.c b/src/mesa/drivers/dri/r600/evergreen_chip.c
new file mode 100644
index 0000000000..f925f215bc
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/evergreen_chip.c
@@ -0,0 +1,1289 @@
+/*
+ * Copyright (C) 2008-2010 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#include "main/imports.h"
+#include "main/glheader.h"
+#include "main/simple_list.h"
+
+#include "r600_context.h"
+#include "r600_cmdbuf.h"
+
+#include "evergreen_chip.h"
+#include "evergreen_off.h"
+#include "evergreen_diff.h"
+#include "evergreen_fragprog.h"
+#include "evergreen_vertprog.h"
+
+#include "radeon_mipmap_tree.h"
+
+void evergreenCreateChip(context_t *context)
+{
+ EVERGREEN_CHIP_CONTEXT * evergreen =
+ (EVERGREEN_CHIP_CONTEXT*) CALLOC(sizeof(EVERGREEN_CHIP_CONTEXT));
+
+ context->pChip = (void*)evergreen;
+}
+
+#define EVERGREEN_ALLOC_STATE( ATOM, CHK, SZ, EMIT ) \
+do { \
+ context->evergreen_atoms.ATOM.cmd_size = (SZ); \
+ context->evergreen_atoms.ATOM.cmd = NULL; \
+ context->evergreen_atoms.ATOM.name = #ATOM; \
+ context->evergreen_atoms.ATOM.idx = 0; \
+ context->evergreen_atoms.ATOM.check = check_##CHK; \
+ context->evergreen_atoms.ATOM.dirty = GL_FALSE; \
+ context->evergreen_atoms.ATOM.emit = (EMIT); \
+ context->radeon.hw.max_state_size += (SZ); \
+ insert_at_tail(&context->radeon.hw.atomlist, &context->evergreen_atoms.ATOM); \
+} while (0)
+
+/*
+static void evergreen_init_query_stateobj(radeonContextPtr radeon, int SZ)
+{
+ radeon->query.queryobj.cmd_size = (SZ);
+ radeon->query.queryobj.cmd = NULL;
+ radeon->query.queryobj.name = "queryobj";
+ radeon->query.queryobj.idx = 0;
+ radeon->query.queryobj.check = check_queryobj;
+ radeon->query.queryobj.dirty = GL_FALSE;
+ radeon->query.queryobj.emit = r700SendQueryBegin;
+ radeon->hw.max_state_size += (SZ);
+ insert_at_tail(&radeon->hw.atomlist, &radeon->query.queryobj);
+}
+*/
+
+static int check_always(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ return atom->cmd_size;
+}
+
+static void evergreenSendTexState(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+
+ struct evergreen_vertex_program *vp = context->selected_vp;
+
+ struct radeon_bo *bo = NULL;
+ unsigned int i;
+ unsigned int nBorderSet = 0;
+ BATCH_LOCALS(&context->radeon);
+
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+ for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) {
+ if (ctx->Texture.Unit[i]._ReallyEnabled) {
+ radeonTexObj *t = evergreen->textures[i];
+
+ if (t) {
+ /* Tex resource */
+ if (!t->image_override) {
+ bo = t->mt->bo;
+ } else {
+ bo = t->bo;
+ }
+ if (bo)
+ {
+ radeon_bo_unmap(bo);
+
+ r700SyncSurf(context, bo,
+ RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM,
+ 0, TC_ACTION_ENA_bit);
+
+ BEGIN_BATCH_NO_AUTOSTATE(10 + 4);
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 8));
+
+ if( (1<<i) & vp->r700AsmCode.unVetTexBits )
+ { /* vs texture */
+ R600_OUT_BATCH((i + VERT_ATTRIB_MAX + EG_SQ_FETCH_RESOURCE_VS_OFFSET) * FETCH_RESOURCE_STRIDE);
+ }
+ else
+ {
+ R600_OUT_BATCH(i * EG_FETCH_RESOURCE_STRIDE);
+ }
+
+ R600_OUT_BATCH(evergreen->textures[i]->SQ_TEX_RESOURCE0);
+ R600_OUT_BATCH(evergreen->textures[i]->SQ_TEX_RESOURCE1);
+ R600_OUT_BATCH(evergreen->textures[i]->SQ_TEX_RESOURCE2);
+ R600_OUT_BATCH(evergreen->textures[i]->SQ_TEX_RESOURCE3);
+ R600_OUT_BATCH(evergreen->textures[i]->SQ_TEX_RESOURCE4);
+ R600_OUT_BATCH(evergreen->textures[i]->SQ_TEX_RESOURCE5);
+ R600_OUT_BATCH(evergreen->textures[i]->SQ_TEX_RESOURCE6);
+ R600_OUT_BATCH(evergreen->textures[i]->SQ_TEX_RESOURCE7);
+
+ R600_OUT_BATCH_RELOC(evergreen->textures[i]->SQ_TEX_RESOURCE2,
+ bo,
+ evergreen->textures[i]->SQ_TEX_RESOURCE2,
+ RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+ R600_OUT_BATCH_RELOC(evergreen->textures[i]->SQ_TEX_RESOURCE3,
+ bo,
+ evergreen->textures[i]->SQ_TEX_RESOURCE3,
+ RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+ END_BATCH();
+ COMMIT_BATCH();
+ }
+ /* Tex sampler */
+ BEGIN_BATCH_NO_AUTOSTATE(5);
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_SAMPLER, 3));
+
+ if( (1<<i) & vp->r700AsmCode.unVetTexBits )
+ { /* vs texture */
+ R600_OUT_BATCH((i+SQ_TEX_SAMPLER_VS_OFFSET) * 3);
+ }
+ else
+ {
+ R600_OUT_BATCH(i * 3);
+ }
+ R600_OUT_BATCH(evergreen->textures[i]->SQ_TEX_SAMPLER0);
+ R600_OUT_BATCH(evergreen->textures[i]->SQ_TEX_SAMPLER1);
+ R600_OUT_BATCH(evergreen->textures[i]->SQ_TEX_SAMPLER2);
+
+ END_BATCH();
+ COMMIT_BATCH();
+
+ /* Tex border color */
+ if(0 == nBorderSet)
+ {
+ BEGIN_BATCH_NO_AUTOSTATE(2 + 4);
+ R600_OUT_BATCH_REGSEQ(EG_TD_PS_BORDER_COLOR_RED, 4);
+ R600_OUT_BATCH(evergreen->textures[i]->TD_PS_SAMPLER0_BORDER_RED);
+ R600_OUT_BATCH(evergreen->textures[i]->TD_PS_SAMPLER0_BORDER_GREEN);
+ R600_OUT_BATCH(evergreen->textures[i]->TD_PS_SAMPLER0_BORDER_BLUE);
+ R600_OUT_BATCH(evergreen->textures[i]->TD_PS_SAMPLER0_BORDER_ALPHA);
+ END_BATCH();
+ COMMIT_BATCH();
+
+ nBorderSet = 1;
+ }
+ }
+ }
+ }
+}
+
+static int check_evergreen_tx(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ unsigned int i, count = 0;
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+
+ for (i = 0; i < R700_TEXTURE_NUMBERUNITS; i++) {
+ if (ctx->Texture.Unit[i]._ReallyEnabled) {
+ radeonTexObj *t = evergreen->textures[i];
+ if (t)
+ count++;
+ }
+ }
+ radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count);
+ return count * 37 + 6;
+}
+
+static void evergreenSendSQConfig(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+ BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+ BEGIN_BATCH_NO_AUTOSTATE(19);
+ //6
+ EVERGREEN_OUT_BATCH_REGVAL(EG_SPI_CONFIG_CNTL, evergreen->evergreen_config.SPI_CONFIG_CNTL.u32All);
+ EVERGREEN_OUT_BATCH_REGVAL(EG_SPI_CONFIG_CNTL_1, evergreen->evergreen_config.SPI_CONFIG_CNTL_1.u32All);
+ //6
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_SQ_CONFIG, 4);
+ R600_OUT_BATCH(evergreen->evergreen_config.SQ_CONFIG.u32All);
+ R600_OUT_BATCH(evergreen->evergreen_config.SQ_GPR_RESOURCE_MGMT_1.u32All);
+ R600_OUT_BATCH(evergreen->evergreen_config.SQ_GPR_RESOURCE_MGMT_2.u32All);
+ R600_OUT_BATCH(evergreen->evergreen_config.SQ_GPR_RESOURCE_MGMT_3.u32All);
+ //7
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_SQ_THREAD_RESOURCE_MGMT, 5);
+ R600_OUT_BATCH(evergreen->evergreen_config.SQ_THREAD_RESOURCE_MGMT.u32All);
+ R600_OUT_BATCH(evergreen->evergreen_config.SQ_THREAD_RESOURCE_MGMT_2.u32All);
+ R600_OUT_BATCH(evergreen->evergreen_config.SQ_STACK_RESOURCE_MGMT_1.u32All);
+ R600_OUT_BATCH(evergreen->evergreen_config.SQ_STACK_RESOURCE_MGMT_2.u32All);
+ R600_OUT_BATCH(evergreen->evergreen_config.SQ_STACK_RESOURCE_MGMT_3.u32All);
+
+ END_BATCH();
+
+ COMMIT_BATCH();
+}
+
+extern int evergreen_getTypeSize(GLenum type);
+static void evergreenSetupVTXConstants(GLcontext * ctx,
+ void * pAos,
+ StreamDesc * pStreamDesc)
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ struct radeon_aos * paos = (struct radeon_aos *)pAos;
+ unsigned int nVBsize;
+ BATCH_LOCALS(&context->radeon);
+
+ unsigned int uSQ_VTX_CONSTANT_WORD0_0;
+ unsigned int uSQ_VTX_CONSTANT_WORD1_0;
+ unsigned int uSQ_VTX_CONSTANT_WORD2_0 = 0;
+ unsigned int uSQ_VTX_CONSTANT_WORD3_0 = 0;
+ unsigned int uSQ_VTX_CONSTANT_WORD7_0 = 0;
+
+ if (!paos->bo)
+ return;
+
+ r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, VC_ACTION_ENA_bit);
+
+ if(0 == pStreamDesc->stride)
+ {
+ nVBsize = paos->count * pStreamDesc->size * getTypeSize(pStreamDesc->type);
+ }
+ else
+ {
+ nVBsize = (paos->count - 1) * pStreamDesc->stride
+ + pStreamDesc->size * getTypeSize(pStreamDesc->type);
+ }
+
+ //uSQ_VTX_CONSTANT_WORD0_0
+ uSQ_VTX_CONSTANT_WORD0_0 = paos->offset;
+
+ //uSQ_VTX_CONSTANT_WORD1_0
+ uSQ_VTX_CONSTANT_WORD1_0 = nVBsize;
+
+ //uSQ_VTX_CONSTANT_WORD2_0
+ SETfield(uSQ_VTX_CONSTANT_WORD2_0,
+ pStreamDesc->stride,
+ SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift,
+ SQ_VTX_CONSTANT_WORD2_0__STRIDE_mask);
+ SETfield(uSQ_VTX_CONSTANT_WORD2_0, GetSurfaceFormat(pStreamDesc->type, pStreamDesc->size, NULL),
+ SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift,
+ SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask); // TODO : trace back api for initial data type, not only GL_FLOAT
+ SETfield(uSQ_VTX_CONSTANT_WORD2_0, 0, BASE_ADDRESS_HI_shift, BASE_ADDRESS_HI_mask); // TODO
+ if(GL_TRUE == pStreamDesc->normalize)
+ {
+ SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_NORM,
+ SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask);
+ }
+ else
+ {
+ SETfield(uSQ_VTX_CONSTANT_WORD2_0, SQ_NUM_FORMAT_SCALED,
+ SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift, SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask);
+ }
+ if(1 == pStreamDesc->_signed)
+ {
+ SETbit(uSQ_VTX_CONSTANT_WORD2_0, SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit);
+ }
+
+ //uSQ_VTX_CONSTANT_WORD3_0
+ SETfield(uSQ_VTX_CONSTANT_WORD3_0, SQ_SEL_X,
+ EG_SQ_VTX_CONSTANT_WORD3_0__DST_SEL_X_shift,
+ EG_SQ_VTX_CONSTANT_WORD3_0__DST_SEL_X_mask);
+ SETfield(uSQ_VTX_CONSTANT_WORD3_0, SQ_SEL_Y,
+ EG_SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Y_shift,
+ EG_SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Y_mask);
+ SETfield(uSQ_VTX_CONSTANT_WORD3_0, SQ_SEL_Z,
+ EG_SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Z_shift,
+ EG_SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Z_mask);
+ SETfield(uSQ_VTX_CONSTANT_WORD3_0, SQ_SEL_W,
+ EG_SQ_VTX_CONSTANT_WORD3_0__DST_SEL_W_shift,
+ EG_SQ_VTX_CONSTANT_WORD3_0__DST_SEL_W_mask);
+
+ //uSQ_VTX_CONSTANT_WORD7_0
+ SETfield(uSQ_VTX_CONSTANT_WORD7_0, SQ_TEX_VTX_VALID_BUFFER,
+ SQ_TEX_RESOURCE_WORD6_0__TYPE_shift, SQ_TEX_RESOURCE_WORD6_0__TYPE_mask);
+
+ BEGIN_BATCH_NO_AUTOSTATE(10 + 2);
+
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, 8));
+ R600_OUT_BATCH((pStreamDesc->element + EG_SQ_FETCH_RESOURCE_VS_OFFSET) * EG_FETCH_RESOURCE_STRIDE);
+ R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD0_0);
+ R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD1_0);
+ R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD2_0);
+ R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD3_0);
+ R600_OUT_BATCH(0);
+ R600_OUT_BATCH(0);
+ R600_OUT_BATCH(0);
+ R600_OUT_BATCH(uSQ_VTX_CONSTANT_WORD7_0);
+ R600_OUT_BATCH_RELOC(uSQ_VTX_CONSTANT_WORD0_0,
+ paos->bo,
+ uSQ_VTX_CONSTANT_WORD0_0,
+ RADEON_GEM_DOMAIN_GTT, 0, 0);
+ END_BATCH();
+
+ COMMIT_BATCH();
+}
+
+static int check_evergreen_vtx(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ int count = context->radeon.tcl.aos_count * 12;
+
+ if (count)
+ count += 6;
+
+ radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count);
+ return count;
+}
+
+static void evergreenSendVTX(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ struct evergreen_vertex_program *vp = (struct evergreen_vertex_program *)(context->selected_vp);
+ unsigned int i, j = 0;
+ BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+ if (context->radeon.tcl.aos_count == 0)
+ return;
+
+ BEGIN_BATCH_NO_AUTOSTATE(6);
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 1));
+ R600_OUT_BATCH(mmSQ_VTX_BASE_VTX_LOC - ASIC_CTL_CONST_BASE_INDEX);
+ R600_OUT_BATCH(0);
+
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, 1));
+ R600_OUT_BATCH(mmSQ_VTX_START_INST_LOC - ASIC_CTL_CONST_BASE_INDEX);
+ R600_OUT_BATCH(0);
+ END_BATCH();
+ COMMIT_BATCH();
+
+ for(i=0; i<VERT_ATTRIB_MAX; i++) {
+ if(vp->mesa_program->Base.InputsRead & (1 << i))
+ {
+ evergreenSetupVTXConstants(ctx,
+ (void*)(&context->radeon.tcl.aos[j]),
+ &(context->stream_desc[j]));
+ j++;
+ }
+ }
+}
+static void evergreenSendPA(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+ BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+ int id = 0;
+
+ BEGIN_BATCH_NO_AUTOSTATE(3);
+ EVERGREEN_OUT_BATCH_REGVAL(EG_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
+ END_BATCH();
+
+ BEGIN_BATCH_NO_AUTOSTATE(22);
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_PA_SC_SCREEN_SCISSOR_TL, 2);
+ R600_OUT_BATCH(evergreen->PA_SC_SCREEN_SCISSOR_TL.u32All);
+ R600_OUT_BATCH(evergreen->PA_SC_SCREEN_SCISSOR_BR.u32All);
+
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_PA_SC_WINDOW_OFFSET, 12);
+ R600_OUT_BATCH(evergreen->PA_SC_WINDOW_OFFSET.u32All);
+ R600_OUT_BATCH(evergreen->PA_SC_WINDOW_SCISSOR_TL.u32All);
+ R600_OUT_BATCH(evergreen->PA_SC_WINDOW_SCISSOR_BR.u32All);
+ R600_OUT_BATCH(evergreen->PA_SC_CLIPRECT_RULE.u32All);
+ R600_OUT_BATCH(evergreen->PA_SC_CLIPRECT_0_TL.u32All);
+ R600_OUT_BATCH(evergreen->PA_SC_CLIPRECT_0_BR.u32All);
+ R600_OUT_BATCH(evergreen->PA_SC_CLIPRECT_1_TL.u32All);
+ R600_OUT_BATCH(evergreen->PA_SC_CLIPRECT_1_BR.u32All);
+ R600_OUT_BATCH(evergreen->PA_SC_CLIPRECT_2_TL.u32All);
+ R600_OUT_BATCH(evergreen->PA_SC_CLIPRECT_2_BR.u32All);
+ R600_OUT_BATCH(evergreen->PA_SC_CLIPRECT_3_TL.u32All);
+ R600_OUT_BATCH(evergreen->PA_SC_CLIPRECT_3_BR.u32All);
+
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_PA_SC_GENERIC_SCISSOR_TL, 2);
+ R600_OUT_BATCH(evergreen->PA_SC_GENERIC_SCISSOR_TL.u32All);
+ R600_OUT_BATCH(evergreen->PA_SC_GENERIC_SCISSOR_BR.u32All);
+ END_BATCH();
+
+ BEGIN_BATCH_NO_AUTOSTATE(3);
+ EVERGREEN_OUT_BATCH_REGVAL(EG_PA_SC_EDGERULE, evergreen->PA_SC_EDGERULE.u32All);
+ END_BATCH();
+
+
+ BEGIN_BATCH_NO_AUTOSTATE(18);
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_PA_SC_VPORT_SCISSOR_0_TL, 4);
+ R600_OUT_BATCH(evergreen->viewport[id].PA_SC_VPORT_SCISSOR_0_TL.u32All);
+ R600_OUT_BATCH(evergreen->viewport[id].PA_SC_VPORT_SCISSOR_0_BR.u32All);
+ R600_OUT_BATCH(evergreen->viewport[id].PA_SC_VPORT_SCISSOR_0_TL.u32All);
+ R600_OUT_BATCH(evergreen->viewport[id].PA_SC_VPORT_SCISSOR_0_BR.u32All);
+
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_PA_SC_VPORT_ZMIN_0, 2);
+ R600_OUT_BATCH(evergreen->viewport[id].PA_SC_VPORT_ZMIN_0.u32All);
+ R600_OUT_BATCH(evergreen->viewport[id].PA_SC_VPORT_ZMAX_0.u32All);
+
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_PA_CL_VPORT_XSCALE, 6);
+ R600_OUT_BATCH(evergreen->viewport[id].PA_CL_VPORT_XSCALE.u32All);
+ R600_OUT_BATCH(evergreen->viewport[id].PA_CL_VPORT_XOFFSET.u32All);
+ R600_OUT_BATCH(evergreen->viewport[id].PA_CL_VPORT_YSCALE.u32All);
+ R600_OUT_BATCH(evergreen->viewport[id].PA_CL_VPORT_YOFFSET.u32All);
+ R600_OUT_BATCH(evergreen->viewport[id].PA_CL_VPORT_ZSCALE.u32All);
+ R600_OUT_BATCH(evergreen->viewport[id].PA_CL_VPORT_ZOFFSET.u32All);
+ END_BATCH();
+
+
+ for (id = 0; id < EVERGREEN_MAX_UCP; id++) {
+ if (evergreen->ucp[id].enabled) {
+ BEGIN_BATCH_NO_AUTOSTATE(6);
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_PA_CL_UCP_0_X + (4 * id), 4);
+ R600_OUT_BATCH(evergreen->ucp[id].PA_CL_UCP_0_X.u32All);
+ R600_OUT_BATCH(evergreen->ucp[id].PA_CL_UCP_0_Y.u32All);
+ R600_OUT_BATCH(evergreen->ucp[id].PA_CL_UCP_0_Z.u32All);
+ R600_OUT_BATCH(evergreen->ucp[id].PA_CL_UCP_0_W.u32All);
+ END_BATCH();
+ }
+ }
+
+ BEGIN_BATCH_NO_AUTOSTATE(42);
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_PA_CL_CLIP_CNTL, 5);
+ R600_OUT_BATCH(evergreen->PA_CL_CLIP_CNTL.u32All);
+ R600_OUT_BATCH(evergreen->PA_SU_SC_MODE_CNTL.u32All);
+ R600_OUT_BATCH(evergreen->PA_CL_VTE_CNTL.u32All);
+ R600_OUT_BATCH(evergreen->PA_CL_VS_OUT_CNTL.u32All);
+ R600_OUT_BATCH(evergreen->PA_CL_NANINF_CNTL.u32All);
+
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_PA_SU_POINT_SIZE, 3);
+ R600_OUT_BATCH(evergreen->PA_SU_POINT_SIZE.u32All);
+ R600_OUT_BATCH(evergreen->PA_SU_POINT_MINMAX.u32All);
+ R600_OUT_BATCH(evergreen->PA_SU_LINE_CNTL.u32All);
+
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_PA_SC_MODE_CNTL_0, 2);
+ R600_OUT_BATCH(evergreen->PA_SC_MODE_CNTL_0.u32All);
+ R600_OUT_BATCH(evergreen->PA_SC_MODE_CNTL_1.u32All);
+
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_PA_SU_POLY_OFFSET_DB_FMT_CNTL, 6);
+ R600_OUT_BATCH(evergreen->PA_SU_POLY_OFFSET_DB_FMT_CNTL.u32All);
+ R600_OUT_BATCH(evergreen->PA_SU_POLY_OFFSET_CLAMP.u32All);
+ R600_OUT_BATCH(evergreen->PA_SU_POLY_OFFSET_FRONT_SCALE.u32All);
+ R600_OUT_BATCH(evergreen->PA_SU_POLY_OFFSET_FRONT_OFFSET.u32All);
+ R600_OUT_BATCH(evergreen->PA_SU_POLY_OFFSET_BACK_SCALE.u32All);
+ R600_OUT_BATCH(evergreen->PA_SU_POLY_OFFSET_BACK_OFFSET.u32All);
+
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_PA_SC_LINE_CNTL, 16);
+ R600_OUT_BATCH(evergreen->PA_SC_LINE_CNTL.u32All);
+ R600_OUT_BATCH(evergreen->PA_SC_AA_CONFIG.u32All);
+ R600_OUT_BATCH(evergreen->PA_SU_VTX_CNTL.u32All);
+ R600_OUT_BATCH(evergreen->PA_CL_GB_VERT_CLIP_ADJ.u32All);
+ R600_OUT_BATCH(evergreen->PA_CL_GB_VERT_DISC_ADJ.u32All);
+ R600_OUT_BATCH(evergreen->PA_CL_GB_HORZ_CLIP_ADJ.u32All);
+ R600_OUT_BATCH(evergreen->PA_CL_GB_HORZ_DISC_ADJ.u32All);
+ R600_OUT_BATCH(evergreen->PA_SC_AA_SAMPLE_LOCS_0.u32All);
+ R600_OUT_BATCH(evergreen->PA_SC_AA_SAMPLE_LOCS_1.u32All);
+ R600_OUT_BATCH(evergreen->PA_SC_AA_SAMPLE_LOCS_2.u32All);
+ R600_OUT_BATCH(evergreen->PA_SC_AA_SAMPLE_LOCS_3.u32All);
+ R600_OUT_BATCH(evergreen->PA_SC_AA_SAMPLE_LOCS_4.u32All);
+ R600_OUT_BATCH(evergreen->PA_SC_AA_SAMPLE_LOCS_5.u32All);
+ R600_OUT_BATCH(evergreen->PA_SC_AA_SAMPLE_LOCS_6.u32All);
+ R600_OUT_BATCH(evergreen->PA_SC_AA_SAMPLE_LOCS_7.u32All);
+ R600_OUT_BATCH(evergreen->PA_SC_AA_MASK.u32All);
+
+ END_BATCH();
+
+ COMMIT_BATCH();
+}
+static void evergreenSendTP(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ /*
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+ BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+ COMMIT_BATCH();
+ */
+}
+
+static void evergreenSendPSresource(GLcontext *ctx)
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+ struct radeon_bo * pbo;
+
+ struct radeon_bo * pbo_const;
+
+ BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+ pbo = (struct radeon_bo *)evergreenGetActiveFpShaderBo(GL_CONTEXT(context));
+
+ if (!pbo)
+ return;
+
+ r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit);
+
+ BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_SQ_PGM_START_PS, 1);
+ R600_OUT_BATCH(evergreen->ps.SQ_PGM_START_PS.u32All);
+ R600_OUT_BATCH_RELOC(evergreen->ps.SQ_PGM_START_PS.u32All,
+ pbo,
+ evergreen->ps.SQ_PGM_START_PS.u32All,
+ RADEON_GEM_DOMAIN_GTT, 0, 0);
+ END_BATCH();
+
+ BEGIN_BATCH_NO_AUTOSTATE(3);
+ EVERGREEN_OUT_BATCH_REGVAL(EG_SQ_LOOP_CONST_0, 0x01000FFF);
+ END_BATCH();
+
+ pbo_const = (struct radeon_bo *)(context->fp_Constbo);
+
+ if(NULL != pbo_const)
+ {
+ r700SyncSurf(context, pbo_const, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit);
+
+ BEGIN_BATCH_NO_AUTOSTATE(3);
+
+ if(evergreen->ps.num_consts < 4)
+ {
+ EVERGREEN_OUT_BATCH_REGVAL(EG_SQ_ALU_CONST_BUFFER_SIZE_PS_0, 1);
+ }
+ else
+ {
+ EVERGREEN_OUT_BATCH_REGVAL(EG_SQ_ALU_CONST_BUFFER_SIZE_PS_0, (evergreen->ps.num_consts * 4)/16 );
+ }
+
+ END_BATCH();
+
+ BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_SQ_ALU_CONST_CACHE_PS_0, 1);
+ R600_OUT_BATCH(context->fp_bo_offset >> 8);
+ R600_OUT_BATCH_RELOC(0,
+ pbo_const,
+ 0,
+ RADEON_GEM_DOMAIN_GTT, 0, 0);
+ END_BATCH();
+ }
+
+ COMMIT_BATCH();
+}
+
+static void evergreenSendVSresource(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+ struct radeon_bo * pbo;
+
+ struct radeon_bo * pbo_const;
+
+ BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+ pbo = (struct radeon_bo *)evergreenGetActiveVpShaderBo(GL_CONTEXT(context));
+
+ if (!pbo)
+ return;
+
+ r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit);
+
+ BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_SQ_PGM_START_VS, 1);
+ R600_OUT_BATCH(evergreen->vs.SQ_PGM_START_VS.u32All);
+ R600_OUT_BATCH_RELOC(evergreen->vs.SQ_PGM_START_VS.u32All,
+ pbo,
+ evergreen->vs.SQ_PGM_START_VS.u32All,
+ RADEON_GEM_DOMAIN_GTT, 0, 0);
+ END_BATCH();
+
+ BEGIN_BATCH_NO_AUTOSTATE(3);
+ EVERGREEN_OUT_BATCH_REGVAL((EG_SQ_LOOP_CONST_0 + 32*1), 0x0100000F); //consts == 1
+ //EVERGREEN_OUT_BATCH_REGVAL((EG_SQ_LOOP_CONST_0 + (SQ_LOOP_CONST_vs<2)), 0x0100000F);
+ END_BATCH();
+
+ pbo_const = (struct radeon_bo *)(context->vp_Constbo);
+
+ if(NULL != pbo_const)
+ {
+ r700SyncSurf(context, pbo_const, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit);
+
+ BEGIN_BATCH_NO_AUTOSTATE(3);
+
+ if(evergreen->vs.num_consts < 4)
+ {
+ EVERGREEN_OUT_BATCH_REGVAL(EG_SQ_ALU_CONST_BUFFER_SIZE_VS_0, 1);
+ }
+ else
+ {
+ EVERGREEN_OUT_BATCH_REGVAL(EG_SQ_ALU_CONST_BUFFER_SIZE_VS_0, (evergreen->vs.num_consts * 4)/16 );
+ }
+
+ END_BATCH();
+
+ BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_SQ_ALU_CONST_CACHE_VS_0, 1);
+ R600_OUT_BATCH(context->vp_bo_offset >> 8);
+ R600_OUT_BATCH_RELOC(0,
+ pbo_const,
+ 0,
+ RADEON_GEM_DOMAIN_GTT, 0, 0);
+ END_BATCH();
+ }
+
+ COMMIT_BATCH();
+}
+
+static void evergreenSendSQ(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+ BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+ evergreenSendPSresource(ctx); //16 entries now
+
+ BEGIN_BATCH_NO_AUTOSTATE(77);
+
+ //34
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_SQ_VTX_SEMANTIC_0, 32);
+ R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_0.u32All); //// // = 0x28380, // SAME
+ R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_1.u32All); //// // = 0x28384, // SAME
+ R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_2.u32All); //// // = 0x28388, // SAME
+ R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_3.u32All); //// // = 0x2838C, // SAME
+ R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_4.u32All); //// // = 0x28390, // SAME
+ R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_5.u32All); //// // = 0x28394, // SAME
+ R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_6.u32All); //// // = 0x28398, // SAME
+ R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_7.u32All); //// // = 0x2839C, // SAME
+ R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_8.u32All); //// // = 0x283A0, // SAME
+ R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_9.u32All); //// // = 0x283A4, // SAME
+ R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_10.u32All); //// // = 0x283A8, // SAME
+ R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_11.u32All); //// // = 0x283AC, // SAME
+ R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_12.u32All); //// // = 0x283B0, // SAME
+ R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_13.u32All); //// // = 0x283B4, // SAME
+ R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_14.u32All); //// // = 0x283B8, // SAME
+ R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_15.u32All); //// // = 0x283BC, // SAME
+ R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_16.u32All); //// // = 0x283C0, // SAME
+ R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_17.u32All); //// // = 0x283C4, // SAME
+ R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_18.u32All); //// // = 0x283C8, // SAME
+ R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_19.u32All); //// // = 0x283CC, // SAME
+ R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_20.u32All); //// // = 0x283D0, // SAME
+ R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_21.u32All); //// // = 0x283D4, // SAME
+ R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_22.u32All); //// // = 0x283D8, // SAME
+ R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_23.u32All); //// // = 0x283DC, // SAME
+ R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_24.u32All); //// // = 0x283E0, // SAME
+ R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_25.u32All); //// // = 0x283E4, // SAME
+ R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_26.u32All); //// // = 0x283E8, // SAME
+ R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_27.u32All); //// // = 0x283EC, // SAME
+ R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_28.u32All); //// // = 0x283F0, // SAME
+ R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_29.u32All); //// // = 0x283F4, // SAME
+ R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_30.u32All); //// // = 0x283F8, // SAME
+ R600_OUT_BATCH(evergreen->SQ_VTX_SEMANTIC_31.u32All); //// // = 0x283FC, // SAME
+
+
+ //3
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_SQ_DYN_GPR_RESOURCE_LIMIT_1, 1);
+ R600_OUT_BATCH(evergreen->SQ_DYN_GPR_RESOURCE_LIMIT_1.u32All);//// // = 0x28838, //
+
+ //5
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_SQ_PGM_RESOURCES_PS, 3);
+ R600_OUT_BATCH(evergreen->SQ_PGM_RESOURCES_PS.u32All); //// // = 0x28844, // DIFF 0x28850
+ R600_OUT_BATCH(evergreen->SQ_PGM_RESOURCES_2_PS.u32All); //// // = 0x28848, //
+ R600_OUT_BATCH(evergreen->SQ_PGM_EXPORTS_PS.u32All); //// // = 0x2884C, // SAME 0x28854
+
+ //4
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_SQ_PGM_RESOURCES_VS, 2);
+ R600_OUT_BATCH(evergreen->SQ_PGM_RESOURCES_VS.u32All);//// // = 0x28860, // DIFF 0x28868
+ R600_OUT_BATCH(evergreen->SQ_PGM_RESOURCES_2_VS.u32All); //// // = 0x28864, //
+
+ //5
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_SQ_PGM_RESOURCES_GS, 2);
+ /*
+ R600_OUT_BATCH(evergreen->SQ_PGM_START_GS.u32All); //// // = 0x28874, // SAME 0x2886C
+ */
+ R600_OUT_BATCH(evergreen->SQ_PGM_RESOURCES_GS.u32All); //// // = 0x28878, // DIFF 0x2887C
+ R600_OUT_BATCH(evergreen->SQ_PGM_RESOURCES_2_GS.u32All); //// // = 0x2887C, //
+
+ //5
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_SQ_PGM_RESOURCES_ES, 2);
+ /*
+ R600_OUT_BATCH(evergreen->SQ_PGM_START_ES.u32All); //// // = 0x2888C, // SAME 0x28880
+ */
+ R600_OUT_BATCH(evergreen->SQ_PGM_RESOURCES_ES.u32All); //// // = 0x28890, // DIFF
+ R600_OUT_BATCH(evergreen->SQ_PGM_RESOURCES_2_ES.u32All); //// // = 0x28894, //
+
+ //4
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_SQ_PGM_RESOURCES_FS, 1);
+ /*
+ R600_OUT_BATCH(evergreen->SQ_PGM_START_FS.u32All); //// // = 0x288A4, // SAME 0x28894
+ */
+ R600_OUT_BATCH(evergreen->SQ_PGM_RESOURCES_FS.u32All); //// // = 0x288A8, // DIFF 0x288A4
+
+ //3
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_SQ_PGM_RESOURCES_2_HS, 1);
+ R600_OUT_BATCH(evergreen->SQ_PGM_RESOURCES_2_HS.u32All);//// // = 0x288C0, //
+
+ //3
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_SQ_PGM_RESOURCES_2_LS, 1);
+ R600_OUT_BATCH(evergreen->SQ_PGM_RESOURCES_2_LS.u32All); //// // = 0x288D8, //
+
+ //3
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_SQ_LDS_ALLOC_PS, 1);
+ R600_OUT_BATCH(evergreen->SQ_LDS_ALLOC_PS.u32All); //// // = 0x288EC, //
+
+ //8
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_SQ_ESGS_RING_ITEMSIZE, 6);
+ R600_OUT_BATCH(evergreen->SQ_ESGS_RING_ITEMSIZE.u32All); //// // = 0x28900, // SAME 0x288A8
+ R600_OUT_BATCH(evergreen->SQ_GSVS_RING_ITEMSIZE.u32All); //// // = 0x28904, // SAME 0x288AC
+ R600_OUT_BATCH(evergreen->SQ_ESTMP_RING_ITEMSIZE.u32All); //// // = 0x28908, // SAME 0x288B0
+ R600_OUT_BATCH(evergreen->SQ_GSTMP_RING_ITEMSIZE.u32All); //// // = 0x2890C, // SAME 0x288B4
+ R600_OUT_BATCH(evergreen->SQ_VSTMP_RING_ITEMSIZE.u32All); //// // = 0x28910, // SAME 0x288B8
+ R600_OUT_BATCH(evergreen->SQ_PSTMP_RING_ITEMSIZE.u32All); //// // = 0x28914, // SAME 0x288BC
+
+ //3
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_SQ_GS_VERT_ITEMSIZE, 1);
+ R600_OUT_BATCH(evergreen->SQ_GS_VERT_ITEMSIZE.u32All); //// // = 0x2891C, // SAME 0x288C8
+
+ END_BATCH();
+
+ COMMIT_BATCH();
+
+}
+static void evergreenSendSPI(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+ BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+ BEGIN_BATCH_NO_AUTOSTATE(59);
+
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_SPI_VS_OUT_ID_0, 10);
+ R600_OUT_BATCH(evergreen->SPI_VS_OUT_ID_0.u32All);
+ R600_OUT_BATCH(evergreen->SPI_VS_OUT_ID_1.u32All);
+ R600_OUT_BATCH(evergreen->SPI_VS_OUT_ID_2.u32All);
+ R600_OUT_BATCH(evergreen->SPI_VS_OUT_ID_3.u32All);
+ R600_OUT_BATCH(evergreen->SPI_VS_OUT_ID_4.u32All);
+ R600_OUT_BATCH(evergreen->SPI_VS_OUT_ID_5.u32All);
+ R600_OUT_BATCH(evergreen->SPI_VS_OUT_ID_6.u32All);
+ R600_OUT_BATCH(evergreen->SPI_VS_OUT_ID_7.u32All);
+ R600_OUT_BATCH(evergreen->SPI_VS_OUT_ID_8.u32All);
+ R600_OUT_BATCH(evergreen->SPI_VS_OUT_ID_9.u32All);
+
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_SPI_PS_INPUT_CNTL_0, 45);
+ R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[0].u32All);
+ R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[1].u32All);
+ R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[2].u32All);
+ R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[3].u32All);
+ R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[4].u32All);
+ R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[5].u32All);
+ R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[6].u32All);
+ R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[7].u32All);
+ R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[8].u32All);
+ R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[9].u32All);
+ R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[10].u32All);
+ R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[11].u32All);
+ R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[12].u32All);
+ R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[13].u32All);
+ R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[14].u32All);
+ R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[15].u32All);
+ R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[16].u32All);
+ R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[17].u32All);
+ R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[18].u32All);
+ R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[19].u32All);
+ R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[20].u32All);
+ R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[21].u32All);
+ R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[22].u32All);
+ R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[23].u32All);
+ R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[24].u32All);
+ R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[25].u32All);
+ R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[26].u32All);
+ R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[27].u32All);
+ R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[28].u32All);
+ R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[29].u32All);
+ R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[30].u32All);
+ R600_OUT_BATCH(evergreen->SPI_PS_INPUT_CNTL[31].u32All);
+ R600_OUT_BATCH(evergreen->SPI_VS_OUT_CONFIG.u32All);
+ R600_OUT_BATCH(evergreen->SPI_THREAD_GROUPING.u32All);
+ R600_OUT_BATCH(evergreen->SPI_PS_IN_CONTROL_0.u32All);
+ R600_OUT_BATCH(evergreen->SPI_PS_IN_CONTROL_1.u32All);
+ R600_OUT_BATCH(evergreen->SPI_INTERP_CONTROL_0.u32All);
+ R600_OUT_BATCH(evergreen->SPI_INPUT_Z.u32All);
+ R600_OUT_BATCH(evergreen->SPI_FOG_CNTL.u32All);
+ R600_OUT_BATCH(evergreen->SPI_BARYC_CNTL.u32All);
+ R600_OUT_BATCH(evergreen->SPI_PS_IN_CONTROL_2.u32All);
+ R600_OUT_BATCH(evergreen->SPI_COMPUTE_INPUT_CNTL.u32All);
+ R600_OUT_BATCH(evergreen->SPI_COMPUTE_NUM_THREAD_X.u32All);
+ R600_OUT_BATCH(evergreen->SPI_COMPUTE_NUM_THREAD_Y.u32All);
+ R600_OUT_BATCH(evergreen->SPI_COMPUTE_NUM_THREAD_Z.u32All);
+
+ END_BATCH();
+
+ COMMIT_BATCH();
+}
+static void evergreenSendSX(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+ BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+ BEGIN_BATCH_NO_AUTOSTATE(9);
+
+ EVERGREEN_OUT_BATCH_REGVAL(EG_SX_MISC, evergreen->SX_MISC.u32All);
+ EVERGREEN_OUT_BATCH_REGVAL(EG_SX_ALPHA_TEST_CONTROL, evergreen->SX_ALPHA_TEST_CONTROL.u32All);
+ EVERGREEN_OUT_BATCH_REGVAL(EG_SX_ALPHA_REF, evergreen->SX_ALPHA_REF.u32All);
+
+ END_BATCH();
+
+ COMMIT_BATCH();
+}
+
+static void evergreenSetDepthTarget(context_t *context)
+{
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+ struct radeon_renderbuffer *rrb;
+ unsigned int nPitchInPixel;
+
+ rrb = radeon_get_depthbuffer(&context->radeon);
+ if (!rrb)
+ {
+ return;
+ }
+
+ EVERGREEN_STATECHANGE(context, db);
+
+ evergreen->DB_DEPTH_SIZE.u32All = 0;
+
+ SETfield(evergreen->DB_DEPTH_SIZE.u32All, (nPitchInPixel/8)-1,
+ EG_DB_DEPTH_SIZE__PITCH_TILE_MAX_shift,
+ EG_DB_DEPTH_SIZE__PITCH_TILE_MAX_mask);
+ SETfield(evergreen->DB_DEPTH_SIZE.u32All, (context->radeon.radeonScreen->driScreen->fbHeight/8)-1,
+ EG_DB_DEPTH_SIZE__HEIGHT_TILE_MAX_shift,
+ EG_DB_DEPTH_SIZE__HEIGHT_TILE_MAX_mask);
+ evergreen->DB_DEPTH_SLICE.u32All = ( (nPitchInPixel * context->radeon.radeonScreen->driScreen->fbHeight)/64 )-1;
+
+ if(4 == rrb->cpp)
+ {
+ SETfield(evergreen->DB_Z_INFO.u32All, DEPTH_8_24,
+ EG_DB_Z_INFO__FORMAT_shift,
+ EG_DB_Z_INFO__FORMAT_mask);
+ }
+ else
+ {
+ SETfield(evergreen->DB_Z_INFO.u32All, DEPTH_16,
+ EG_DB_Z_INFO__FORMAT_shift,
+ EG_DB_Z_INFO__FORMAT_mask);
+ }
+ SETfield(evergreen->DB_Z_INFO.u32All, ARRAY_1D_TILED_THIN1,
+ EG_DB_Z_INFO__ARRAY_MODE_shift,
+ EG_DB_Z_INFO__ARRAY_MODE_mask);
+}
+
+static void evergreenSendDB(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+ struct radeon_renderbuffer *rrb;
+ BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+ evergreenSetDepthTarget(context);
+
+ //8
+ BEGIN_BATCH_NO_AUTOSTATE(7);
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_DB_RENDER_CONTROL, 5);
+ R600_OUT_BATCH(evergreen->DB_RENDER_CONTROL.u32All);
+ R600_OUT_BATCH(evergreen->DB_COUNT_CONTROL.u32All);
+ R600_OUT_BATCH(evergreen->DB_DEPTH_VIEW.u32All);
+ R600_OUT_BATCH(evergreen->DB_RENDER_OVERRIDE.u32All);
+ R600_OUT_BATCH(evergreen->DB_RENDER_OVERRIDE2.u32All);
+ /*
+ R600_OUT_BATCH(evergreen->DB_HTILE_DATA_BASE.u32All);
+ */
+ END_BATCH();
+
+ //4
+ BEGIN_BATCH_NO_AUTOSTATE(4);
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_DB_STENCIL_CLEAR, 2);
+ R600_OUT_BATCH(evergreen->DB_STENCIL_CLEAR.u32All);
+ R600_OUT_BATCH(evergreen->DB_DEPTH_CLEAR.u32All);
+ END_BATCH();
+
+ //4
+ BEGIN_BATCH_NO_AUTOSTATE(4);
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_DB_DEPTH_SIZE, 2);
+ R600_OUT_BATCH(evergreen->DB_DEPTH_SIZE.u32All);
+ R600_OUT_BATCH(evergreen->DB_DEPTH_SLICE.u32All);
+ END_BATCH();
+
+ //3
+ BEGIN_BATCH_NO_AUTOSTATE(3);
+ EVERGREEN_OUT_BATCH_REGVAL(EG_DB_DEPTH_CONTROL, evergreen->DB_DEPTH_CONTROL.u32All);
+ END_BATCH();
+
+ //3
+ BEGIN_BATCH_NO_AUTOSTATE(3);
+ EVERGREEN_OUT_BATCH_REGVAL(EG_DB_SHADER_CONTROL, evergreen->DB_SHADER_CONTROL.u32All);
+ END_BATCH();
+
+ //5
+ BEGIN_BATCH_NO_AUTOSTATE(5);
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_DB_SRESULTS_COMPARE_STATE0, 3);
+ R600_OUT_BATCH(evergreen->DB_SRESULTS_COMPARE_STATE0.u32All);
+ R600_OUT_BATCH(evergreen->DB_SRESULTS_COMPARE_STATE1.u32All);
+ R600_OUT_BATCH(evergreen->DB_PRELOAD_CONTROL.u32All);
+ END_BATCH();
+
+ //3
+ BEGIN_BATCH_NO_AUTOSTATE(3);
+ EVERGREEN_OUT_BATCH_REGVAL(EG_DB_ALPHA_TO_MASK, evergreen->DB_ALPHA_TO_MASK.u32All);
+ END_BATCH();
+
+ rrb = radeon_get_depthbuffer(&context->radeon);
+ if( (rrb != NULL) && (rrb->bo != NULL) )
+ {
+ //5
+ BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+ EVERGREEN_OUT_BATCH_REGVAL(EG_DB_Z_INFO, evergreen->DB_Z_INFO.u32All);
+ R600_OUT_BATCH_RELOC(evergreen->DB_Z_INFO.u32All,
+ rrb->bo,
+ evergreen->DB_Z_INFO.u32All,
+ 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ END_BATCH();
+
+ //5
+ if((evergreen->DB_DEPTH_CONTROL.u32All & Z_ENABLE_bit) > 0)
+ {
+ BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+ EVERGREEN_OUT_BATCH_REGVAL(EG_DB_Z_READ_BASE, evergreen->DB_Z_READ_BASE.u32All);
+ R600_OUT_BATCH_RELOC(evergreen->DB_Z_READ_BASE.u32All,
+ rrb->bo,
+ evergreen->DB_Z_READ_BASE.u32All,
+ 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ END_BATCH();
+ }
+ //5
+ if((evergreen->DB_DEPTH_CONTROL.u32All & Z_WRITE_ENABLE_bit) > 0)
+ {
+ BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+ EVERGREEN_OUT_BATCH_REGVAL(EG_DB_Z_WRITE_BASE, evergreen->DB_Z_READ_BASE.u32All);
+ R600_OUT_BATCH_RELOC(evergreen->DB_Z_WRITE_BASE.u32All,
+ rrb->bo,
+ evergreen->DB_Z_WRITE_BASE.u32All,
+ 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ END_BATCH();
+ }
+ }
+/*
+ if (ctx->DrawBuffer)
+ {
+ rrb = radeon_get_renderbuffer(ctx->DrawBuffer, BUFFER_STENCIL);
+
+ if((rrb != NULL) && (rrb->bo != NULL))
+ {
+ //5
+ BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+ EVERGREEN_OUT_BATCH_REGVAL(EG_DB_STENCIL_INFO, evergreen->DB_Z_INFO.u32All);
+ R600_OUT_BATCH_RELOC(evergreen->DB_STENCIL_INFO.u32All,
+ rrb->bo,
+ evergreen->DB_STENCIL_INFO.u32All,
+ 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ END_BATCH();
+
+ //10
+ if((evergreen->DB_DEPTH_CONTROL.u32All & STENCIL_ENABLE_bit) > 0)
+ {
+ BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+ EVERGREEN_OUT_BATCH_REGVAL(EG_DB_STENCIL_READ_BASE, evergreen->DB_STENCIL_READ_BASE.u32All);
+ R600_OUT_BATCH_RELOC(evergreen->DB_STENCIL_READ_BASE.u32All,
+ rrb->bo,
+ evergreen->DB_STENCIL_READ_BASE.u32All,
+ 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ END_BATCH();
+
+ BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+ EVERGREEN_OUT_BATCH_REGVAL(EG_DB_STENCIL_WRITE_BASE, evergreen->DB_STENCIL_WRITE_BASE.u32All);
+ R600_OUT_BATCH_RELOC(evergreen->DB_STENCIL_WRITE_BASE.u32All,
+ rrb->bo,
+ evergreen->DB_STENCIL_WRITE_BASE.u32All,
+ 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ END_BATCH();
+ }
+ }
+ }
+*/
+ COMMIT_BATCH();
+}
+
+static void evergreenSetRenderTarget(context_t *context, int id)
+{
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+
+ struct radeon_renderbuffer *rrb;
+ unsigned int nPitchInPixel;
+
+ rrb = radeon_get_colorbuffer(&context->radeon);
+ if (!rrb || !rrb->bo) {
+ return;
+ }
+
+ EVERGREEN_STATECHANGE(context, cb);
+
+ /* addr */
+ evergreen->render_target[id].CB_COLOR0_BASE.u32All = context->radeon.state.color.draw_offset / 256;
+
+ /* pitch */
+ nPitchInPixel = rrb->pitch/rrb->cpp;
+
+ SETfield(evergreen->render_target[id].CB_COLOR0_PITCH.u32All, (nPitchInPixel/8)-1,
+ EG_CB_COLOR0_PITCH__TILE_MAX_shift,
+ EG_CB_COLOR0_PITCH__TILE_MAX_mask);
+
+ /* skice */
+ SETfield(evergreen->render_target[id].CB_COLOR0_SLICE.u32All,
+ //( (nPitchInPixel * context->radeon.radeonScreen->driScreen->fbHeight)/64 )-1,
+ ( (nPitchInPixel * 240)/64 )-1,
+ EG_CB_COLOR0_SLICE__TILE_MAX_shift,
+ EG_CB_COLOR0_SLICE__TILE_MAX_mask);
+
+ /* CB_COLOR0_ATTRIB */ /* TODO : for z clear, this should be set to 0 */
+ SETbit(evergreen->render_target[id].CB_COLOR0_ATTRIB.u32All,
+ EG_CB_COLOR0_ATTRIB__NON_DISP_TILING_ORDER_bit);
+
+ /* CB_COLOR0_INFO */
+ SETfield(evergreen->render_target[id].CB_COLOR0_INFO.u32All,
+ ENDIAN_NONE,
+ EG_CB_COLOR0_INFO__ENDIAN_shift,
+ EG_CB_COLOR0_INFO__ENDIAN_mask);
+ SETfield(evergreen->render_target[id].CB_COLOR0_INFO.u32All,
+ ARRAY_LINEAR_GENERAL,
+ EG_CB_COLOR0_INFO__ARRAY_MODE_shift,
+ EG_CB_COLOR0_INFO__ARRAY_MODE_mask);
+ if(4 == rrb->cpp)
+ {
+ SETfield(evergreen->render_target[id].CB_COLOR0_INFO.u32All,
+ COLOR_8_8_8_8,
+ EG_CB_COLOR0_INFO__FORMAT_shift,
+ EG_CB_COLOR0_INFO__FORMAT_mask);
+ SETfield(evergreen->render_target[id].CB_COLOR0_INFO.u32All,
+ SWAP_ALT, //SWAP_STD
+ EG_CB_COLOR0_INFO__COMP_SWAP_shift,
+ EG_CB_COLOR0_INFO__COMP_SWAP_mask);
+ }
+ else
+ {
+ SETfield(evergreen->render_target[id].CB_COLOR0_INFO.u32All,
+ COLOR_5_6_5,
+ EG_CB_COLOR0_INFO__FORMAT_shift,
+ EG_CB_COLOR0_INFO__FORMAT_mask);
+ SETfield(evergreen->render_target[id].CB_COLOR0_INFO.u32All,
+ SWAP_ALT_REV,
+ EG_CB_COLOR0_INFO__COMP_SWAP_shift,
+ EG_CB_COLOR0_INFO__COMP_SWAP_mask);
+ }
+ SETfield(evergreen->render_target[id].CB_COLOR0_INFO.u32All,
+ 1,
+ EG_CB_COLOR0_INFO__SOURCE_FORMAT_shift,
+ EG_CB_COLOR0_INFO__SOURCE_FORMAT_mask);
+ SETbit(evergreen->render_target[id].CB_COLOR0_INFO.u32All,
+ EG_CB_COLOR0_INFO__BLEND_CLAMP_bit);
+ SETfield(evergreen->render_target[id].CB_COLOR0_INFO.u32All,
+ NUMBER_UNORM,
+ EG_CB_COLOR0_INFO__NUMBER_TYPE_shift,
+ EG_CB_COLOR0_INFO__NUMBER_TYPE_mask);
+
+ evergreen->render_target[id].CB_COLOR0_VIEW.u32All = 0;
+ evergreen->render_target[id].CB_COLOR0_CMASK.u32All = 0;
+ evergreen->render_target[id].CB_COLOR0_FMASK.u32All = 0;
+ evergreen->render_target[id].CB_COLOR0_FMASK_SLICE.u32All = 0;
+
+ evergreen->render_target[id].enabled = GL_TRUE;
+}
+
+static void evergreenSendCB(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+ struct radeon_renderbuffer *rrb;
+ BATCH_LOCALS(&context->radeon);
+ int id = 0;
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+ rrb = radeon_get_colorbuffer(&context->radeon);
+ if (!rrb || !rrb->bo) {
+ return;
+ }
+
+ evergreenSetRenderTarget(context, 0);
+
+ if (!evergreen->render_target[id].enabled)
+ return;
+
+ BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_CB_COLOR0_BASE + (4 * id), 1);
+ R600_OUT_BATCH(evergreen->render_target[id].CB_COLOR0_BASE.u32All);
+ R600_OUT_BATCH_RELOC(evergreen->render_target[id].CB_COLOR0_BASE.u32All,
+ rrb->bo,
+ evergreen->render_target[id].CB_COLOR0_BASE.u32All,
+ 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ END_BATCH();
+
+ BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+ EVERGREEN_OUT_BATCH_REGVAL(EG_CB_COLOR0_INFO, evergreen->render_target[id].CB_COLOR0_INFO.u32All);
+ R600_OUT_BATCH_RELOC(evergreen->render_target[id].CB_COLOR0_INFO.u32All,
+ rrb->bo,
+ evergreen->render_target[id].CB_COLOR0_INFO.u32All,
+ 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ END_BATCH();
+
+ BEGIN_BATCH_NO_AUTOSTATE(5);
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_CB_COLOR0_PITCH, 3);
+ R600_OUT_BATCH(evergreen->render_target[id].CB_COLOR0_PITCH.u32All);
+ R600_OUT_BATCH(evergreen->render_target[id].CB_COLOR0_SLICE.u32All);
+ R600_OUT_BATCH(evergreen->render_target[id].CB_COLOR0_VIEW.u32All);
+ END_BATCH();
+
+ BEGIN_BATCH_NO_AUTOSTATE(4);
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_CB_COLOR0_ATTRIB, 2);
+ R600_OUT_BATCH(evergreen->render_target[id].CB_COLOR0_ATTRIB.u32All);
+ R600_OUT_BATCH(evergreen->render_target[id].CB_COLOR0_DIM.u32All);
+ /*
+ R600_OUT_BATCH(evergreen->render_target[id].CB_COLOR0_CMASK.u32All);
+ R600_OUT_BATCH(evergreen->render_target[id].CB_COLOR0_CMASK_SLICE.u32All);
+ R600_OUT_BATCH(evergreen->render_target[id].CB_COLOR0_FMASK.u32All);
+ R600_OUT_BATCH(evergreen->render_target[id].CB_COLOR0_FMASK_SLICE.u32All);
+ */
+ END_BATCH();
+
+ BEGIN_BATCH_NO_AUTOSTATE(4);
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_CB_TARGET_MASK, 2);
+ R600_OUT_BATCH(evergreen->CB_TARGET_MASK.u32All);
+ R600_OUT_BATCH(evergreen->CB_SHADER_MASK.u32All);
+ END_BATCH();
+
+ BEGIN_BATCH_NO_AUTOSTATE(5);
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_CB_BLEND_RED, 3);
+ R600_OUT_BATCH(evergreen->CB_BLEND_RED.u32All);
+ R600_OUT_BATCH(evergreen->CB_BLEND_GREEN.u32All);
+ R600_OUT_BATCH(evergreen->CB_BLEND_BLUE.u32All);
+ END_BATCH();
+
+ BEGIN_BATCH_NO_AUTOSTATE(9);
+ EVERGREEN_OUT_BATCH_REGVAL(EG_CB_BLEND_ALPHA, evergreen->CB_BLEND_ALPHA.u32All);
+ EVERGREEN_OUT_BATCH_REGVAL(EG_CB_BLEND0_CONTROL, evergreen->CB_BLEND0_CONTROL.u32All);
+ EVERGREEN_OUT_BATCH_REGVAL(EG_CB_COLOR_CONTROL, evergreen->CB_COLOR_CONTROL.u32All);
+ END_BATCH();
+
+ COMMIT_BATCH();
+}
+static void evergreenSendCP(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+ //first to send
+ //r700Start3D
+ BEGIN_BATCH_NO_AUTOSTATE(3);
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_CONTEXT_CONTROL, 1)); //IT_CONTEXT_CONTROL 0x28
+ R600_OUT_BATCH(0x80000000);
+ R600_OUT_BATCH(0x80000000);
+ END_BATCH();
+
+ COMMIT_BATCH();
+}
+static void evergreenSendVGT(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+ BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+/* moved to draw:
+ VGT_DRAW_INITIATOR
+ VGT_INDEX_TYPE
+ VGT_PRIMITIVE_TYPE
+*/
+ BEGIN_BATCH_NO_AUTOSTATE(5);
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_VGT_MAX_VTX_INDX, 3);
+ R600_OUT_BATCH(evergreen->VGT_MAX_VTX_INDX.u32All);
+ R600_OUT_BATCH(evergreen->VGT_MIN_VTX_INDX.u32All);
+ R600_OUT_BATCH(evergreen->VGT_INDX_OFFSET.u32All);
+ END_BATCH();
+
+ BEGIN_BATCH_NO_AUTOSTATE(6);
+ EVERGREEN_OUT_BATCH_REGVAL(EG_VGT_OUTPUT_PATH_CNTL, evergreen->VGT_OUTPUT_PATH_CNTL.u32All);
+
+ EVERGREEN_OUT_BATCH_REGVAL(EG_VGT_GS_MODE, evergreen->VGT_GS_MODE.u32All);
+ END_BATCH();
+
+ BEGIN_BATCH_NO_AUTOSTATE(3);
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_VGT_PRIMITIVEID_EN, 1);
+ R600_OUT_BATCH(evergreen->VGT_PRIMITIVEID_EN.u32All);
+ END_BATCH();
+
+ BEGIN_BATCH_NO_AUTOSTATE(4);
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_VGT_INSTANCE_STEP_RATE_0, 2);
+ R600_OUT_BATCH(evergreen->VGT_INSTANCE_STEP_RATE_0.u32All);
+ R600_OUT_BATCH(evergreen->VGT_INSTANCE_STEP_RATE_1.u32All);
+ END_BATCH();
+
+ BEGIN_BATCH_NO_AUTOSTATE(4);
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_VGT_REUSE_OFF, 2);
+ R600_OUT_BATCH(evergreen->VGT_REUSE_OFF.u32All);
+ R600_OUT_BATCH(evergreen->VGT_VTX_CNT_EN.u32All);
+ END_BATCH();
+
+ BEGIN_BATCH_NO_AUTOSTATE(3);
+ EVERGREEN_OUT_BATCH_REGVAL(EG_VGT_SHADER_STAGES_EN, evergreen->VGT_SHADER_STAGES_EN.u32All);
+ END_BATCH();
+
+ BEGIN_BATCH_NO_AUTOSTATE(4);
+ EVERGREEN_OUT_BATCH_REGSEQ(EG_VGT_STRMOUT_CONFIG, 2);
+ R600_OUT_BATCH(evergreen->VGT_STRMOUT_CONFIG.u32All);
+ R600_OUT_BATCH(evergreen->VGT_STRMOUT_BUFFER_CONFIG.u32All);
+ END_BATCH();
+
+ COMMIT_BATCH();
+}
+
+static void evergreenSendTIMESTAMP(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ BATCH_LOCALS(&context->radeon);
+ radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+}
+
+void evergreenInitAtoms(context_t *context)
+{
+ radeon_print(RADEON_STATE, RADEON_NORMAL, "%s %p\n", __func__, context);
+ context->radeon.hw.max_state_size = 10 + 5 + 14 + 3; /* start 3d, idle, cb/db flush, 3 for time stamp */
+
+ /* Setup the atom linked list */
+ make_empty_list(&context->radeon.hw.atomlist);
+ context->radeon.hw.atomlist.name = "atom-list";
+
+ EVERGREEN_ALLOC_STATE(init, always, 19, evergreenSendSQConfig);
+
+ //make sure send first
+ EVERGREEN_ALLOC_STATE(cp, always, 3, evergreenSendCP);
+
+ EVERGREEN_ALLOC_STATE(vtx, evergreen_vtx, (6 + (VERT_ATTRIB_MAX * 12)), evergreenSendVTX);
+ EVERGREEN_ALLOC_STATE(pa, always, 124, evergreenSendPA);
+ EVERGREEN_ALLOC_STATE(tp, always, 0, evergreenSendTP);
+ EVERGREEN_ALLOC_STATE(sq, always, 86, evergreenSendSQ); /* 85 */
+ EVERGREEN_ALLOC_STATE(vs, always, 16, evergreenSendVSresource);
+ EVERGREEN_ALLOC_STATE(spi, always, 59, evergreenSendSPI);
+ EVERGREEN_ALLOC_STATE(sx, always, 9, evergreenSendSX);
+ EVERGREEN_ALLOC_STATE(tx, evergreen_tx, (R700_TEXTURE_NUMBERUNITS * (21+5) + 6), evergreenSendTexState); /* 21 for resource, 5 for sampler */
+ EVERGREEN_ALLOC_STATE(db, always, 60, evergreenSendDB);
+ EVERGREEN_ALLOC_STATE(cb, always, 35, evergreenSendCB);
+ EVERGREEN_ALLOC_STATE(vgt, always, 29, evergreenSendVGT);
+ EVERGREEN_ALLOC_STATE(timestamp, always, 3, evergreenSendTIMESTAMP);
+
+ //evergreen_init_query_stateobj(&context->radeon, 6 * 2);
+
+ context->radeon.hw.is_dirty = GL_TRUE;
+ context->radeon.hw.all_dirty = GL_TRUE;
+}
diff --git a/src/mesa/drivers/dri/r600/evergreen_chip.h b/src/mesa/drivers/dri/r600/evergreen_chip.h
new file mode 100644
index 0000000000..2ea5cd213c
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/evergreen_chip.h
@@ -0,0 +1,516 @@
+/*
+ * Copyright (C) 2008-2010 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#ifndef _EVERGREEN_CHIP_H_
+#define _EVERGREEN_CHIP_H_
+
+#include "r700_chip.h"
+
+#define EVERGREEN_MAX_DX9_CONSTS 256
+#define EVERGREEN_MAX_SHADER_EXPORTS 32
+#define EVERGREEN_MAX_VIEWPORTS 16
+
+typedef struct _EVERGREEN_VIEWPORT_STATE
+{
+ union UINT_FLOAT PA_SC_VPORT_SCISSOR_0_TL; ////0,1 // = 0x28250, // DIFF
+ union UINT_FLOAT PA_SC_VPORT_SCISSOR_0_BR; ////0,1 // = 0x28254, // DIFF
+ union UINT_FLOAT PA_SC_VPORT_ZMIN_0; ////0 // = 0x282D0, // SAME
+ union UINT_FLOAT PA_SC_VPORT_ZMAX_0; ////0 // = 0x282D4, // SAME
+ union UINT_FLOAT PA_CL_VPORT_XSCALE; //// // = 0x2843C, // SAME
+ union UINT_FLOAT PA_CL_VPORT_XOFFSET; //// // = 0x28440, // SAME
+ union UINT_FLOAT PA_CL_VPORT_YSCALE; //// // = 0x28444, // SAME
+ union UINT_FLOAT PA_CL_VPORT_YOFFSET; //// // = 0x28448, // SAME
+ union UINT_FLOAT PA_CL_VPORT_ZSCALE; //// // = 0x2844C, // SAME
+ union UINT_FLOAT PA_CL_VPORT_ZOFFSET; //// // = 0x28450, // SAME
+ GLboolean enabled;
+ GLboolean dirty;
+} EVERGREEN_VIEWPORT_STATE;
+
+#define EVERGREEN_MAX_UCP 6
+
+typedef struct _EVERGREEN_UCP_STATE
+{
+ union UINT_FLOAT PA_CL_UCP_0_X; // = 0x285BC, // SAME 0x28E20
+ union UINT_FLOAT PA_CL_UCP_0_Y; // = 0x285C0, // SAME 0x28E24
+ union UINT_FLOAT PA_CL_UCP_0_Z; // = 0x285C4, // SAME 0x28E28
+ union UINT_FLOAT PA_CL_UCP_0_W; // = 0x285C8, // SAME 0x28E2C
+ GLboolean enabled;
+ GLboolean dirty;
+} EVERGREEN_UCP_STATE;
+
+#define EVERGREEN_MAX_RENDER_TARGETS 12
+
+typedef struct _EVERGREEN_RENDER_TARGET_STATE
+{
+ union UINT_FLOAT CB_COLOR0_BASE; ////0 // = 0x28C60, // SAME 0x28040
+ union UINT_FLOAT CB_COLOR0_PITCH; ////0 // = 0x28C64, //
+ union UINT_FLOAT CB_COLOR0_SLICE; ////0 // = 0x28C68, //
+ union UINT_FLOAT CB_COLOR0_VIEW; ////0 // = 0x28C6C, // SAME 0x28080
+ union UINT_FLOAT CB_COLOR0_INFO; ////0,1,2,3,4,5,6,78,9,10,11 // = 0x28C70, // DIFF 0x280A0
+ union UINT_FLOAT CB_COLOR0_ATTRIB; ////0 // = 0x28C74, //
+ union UINT_FLOAT CB_COLOR0_DIM; // = 0x28C78, //
+ union UINT_FLOAT CB_COLOR0_CMASK; ////0 // = 0x28C7C, //
+ union UINT_FLOAT CB_COLOR0_CMASK_SLICE; ////0 // = 0x28C80, //
+ union UINT_FLOAT CB_COLOR0_FMASK; ////0 // = 0x28C84, //
+ union UINT_FLOAT CB_COLOR0_FMASK_SLICE; ////0 // = 0x28C88, //
+ union UINT_FLOAT CB_COLOR0_CLEAR_WORD0; // = 0x28C8C, //
+ union UINT_FLOAT CB_COLOR0_CLEAR_WORD1; // = 0x28C90, //
+ union UINT_FLOAT CB_COLOR0_CLEAR_WORD2; // = 0x28C94, //
+ union UINT_FLOAT CB_COLOR0_CLEAR_WORD3; // = 0x28C98, //
+ GLboolean enabled;
+ GLboolean dirty;
+} EVERGREEN_RENDER_TARGET_STATE;
+
+typedef struct _EVERGREEN_CONFIG
+{
+ union UINT_FLOAT SPI_CONFIG_CNTL; // = 0x9100, // DIFF
+ union UINT_FLOAT SPI_CONFIG_CNTL_1; // = 0x913C, // DIFF
+ union UINT_FLOAT CP_PERFMON_CNTL; // = 0x87FC, // SAME
+ union UINT_FLOAT SQ_MS_FIFO_SIZES; // = 0x8CF0, // SAME
+
+ union UINT_FLOAT SQ_CONFIG; // = 0x8C00, // DIFF
+ union UINT_FLOAT SQ_GPR_RESOURCE_MGMT_1; // = 0x8C04, // SAME
+ union UINT_FLOAT SQ_GPR_RESOURCE_MGMT_2; // = 0x8C08, // SAME
+ union UINT_FLOAT SQ_GPR_RESOURCE_MGMT_3; // = 0x8C0C, //
+
+ union UINT_FLOAT SQ_THREAD_RESOURCE_MGMT; // = 0x8C18, // SAME 0x8C0C
+ union UINT_FLOAT SQ_THREAD_RESOURCE_MGMT_2; // = 0x8C1C, //
+ union UINT_FLOAT SQ_STACK_RESOURCE_MGMT_1; // = 0x8C20, // SAME 0x8C10
+ union UINT_FLOAT SQ_STACK_RESOURCE_MGMT_2; // = 0x8C24, // SAME 0x8C14
+ union UINT_FLOAT SQ_STACK_RESOURCE_MGMT_3; // = 0x8C28, //
+
+ union UINT_FLOAT SQ_DYN_GPR_CNTL_PS_FLUSH_REQ; // = 0x8D8C, // DIFF
+ union UINT_FLOAT SQ_LDS_RESOURCE_MGMT; // = 0x8E2C, //
+ union UINT_FLOAT VGT_CACHE_INVALIDATION; // = 0x88C4, // DIFF
+ union UINT_FLOAT VGT_GS_VERTEX_REUSE; // = 0x88D4, // SAME
+ union UINT_FLOAT PA_SC_FORCE_EOV_MAX_CNTS; // = 0x8B24, // SAME
+ union UINT_FLOAT PA_SC_LINE_STIPPLE_STATE; // = 0x8B10, // SAME
+ union UINT_FLOAT PA_CL_ENHANCE; // = 0x8A14, // SAME
+} EVERGREEN_CONFIG;
+
+typedef struct _EVERGREEN_PS_RES
+{
+ union UINT_FLOAT SQ_PGM_START_PS; //// // = 0x28840, // SAME
+ GLboolean dirty;
+
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_PS_0; // = 0x28940, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_PS_1; // = 0x28944, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_PS_2; // = 0x28948, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_PS_3; // = 0x2894C, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_PS_4; // = 0x28950, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_PS_5; // = 0x28954, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_PS_6; // = 0x28958, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_PS_7; // = 0x2895C, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_PS_8; // = 0x28960, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_PS_9; // = 0x28964, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_PS_10; // = 0x28968, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_PS_11; // = 0x2896C, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_PS_12; // = 0x28970, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_PS_13; // = 0x28974, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_PS_14; // = 0x28978, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_PS_15; // = 0x2897C, // SAME
+
+ int num_consts;
+ union UINT_FLOAT consts[EVERGREEN_MAX_DX9_CONSTS][4];
+} EVERGREEN_PS_RES;
+
+typedef struct _EVERGREEN_VS_RES
+{
+ union UINT_FLOAT SQ_PGM_START_VS; //// // = 0x2885C, // SAME 0x28858
+ union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_VS_0; //// // = 0x28180, //?
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_VS_0; //// // = 0x28980, // SAME
+
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_VS_1; // = 0x28984, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_VS_2; // = 0x28988, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_VS_3; // = 0x2898C, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_VS_4; // = 0x28990, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_VS_5; // = 0x28994, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_VS_6; // = 0x28998, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_VS_7; // = 0x2899C, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_VS_8; // = 0x289A0, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_VS_9; // = 0x289A4, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_VS_10; // = 0x289A8, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_VS_11; // = 0x289AC, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_VS_12; // = 0x289B0, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_VS_13; // = 0x289B4, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_VS_14; // = 0x289B8, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_VS_15; // = 0x289BC, // SAME
+
+ GLboolean dirty;
+ int num_consts;
+ union UINT_FLOAT consts[EVERGREEN_MAX_DX9_CONSTS][4];
+} EVERGREEN_VS_RES;
+
+typedef struct _EVERGREEN_CHIP_CONTEXT
+{
+/* Registers from PA block: */
+ union UINT_FLOAT PA_SC_SCREEN_SCISSOR_TL; //// // = 0x28030, // DIFF
+ union UINT_FLOAT PA_SC_SCREEN_SCISSOR_BR; //// // = 0x28034, // DIFF
+ union UINT_FLOAT PA_SC_WINDOW_OFFSET; //// // = 0x28200, // DIFF
+ union UINT_FLOAT PA_SC_WINDOW_SCISSOR_TL; //// // = 0x28204, // DIFF
+ union UINT_FLOAT PA_SC_WINDOW_SCISSOR_BR; //// // = 0x28208, // DIFF
+ union UINT_FLOAT PA_SC_CLIPRECT_RULE; //// // = 0x2820C, // SAME
+ union UINT_FLOAT PA_SC_CLIPRECT_0_TL; //// // = 0x28210, // DIFF
+ union UINT_FLOAT PA_SC_CLIPRECT_0_BR; //// // = 0x28214, // DIFF
+ union UINT_FLOAT PA_SC_CLIPRECT_1_TL; //// // = 0x28218, // DIFF
+ union UINT_FLOAT PA_SC_CLIPRECT_1_BR; //// // = 0x2821C, // DIFF
+ union UINT_FLOAT PA_SC_CLIPRECT_2_TL; //// // = 0x28220, // DIFF
+ union UINT_FLOAT PA_SC_CLIPRECT_2_BR; //// // = 0x28224, // DIFF
+ union UINT_FLOAT PA_SC_CLIPRECT_3_TL; //// // = 0x28228, // DIFF
+ union UINT_FLOAT PA_SC_CLIPRECT_3_BR; //// // = 0x2822C, // DIFF
+ union UINT_FLOAT PA_SC_EDGERULE; // = 0x28230, // SAME
+ union UINT_FLOAT PA_SU_HARDWARE_SCREEN_OFFSET; // = 0x28234, //
+ union UINT_FLOAT PA_SC_GENERIC_SCISSOR_TL; //// // = 0x28240, // DIFF
+ union UINT_FLOAT PA_SC_GENERIC_SCISSOR_BR; //// // = 0x28244, // DIFF
+
+ EVERGREEN_VIEWPORT_STATE viewport[EVERGREEN_MAX_VIEWPORTS];
+ EVERGREEN_UCP_STATE ucp[EVERGREEN_MAX_UCP];
+
+ union UINT_FLOAT PA_CL_POINT_X_RAD; // = 0x287D4, // SAME 0x28E10
+ union UINT_FLOAT PA_CL_POINT_Y_RAD; // = 0x287D8, // SAME 0x28E14
+ union UINT_FLOAT PA_CL_POINT_SIZE; // = 0x287DC, // SAME 0x28E18
+ union UINT_FLOAT PA_CL_POINT_CULL_RAD; // = 0x287E0, // SAME 0x28E1C
+ union UINT_FLOAT PA_CL_CLIP_CNTL; //// // = 0x28810, // SAME
+ union UINT_FLOAT PA_SU_SC_MODE_CNTL; //// // = 0x28814, // SAME
+ union UINT_FLOAT PA_CL_VTE_CNTL; //// // = 0x28818, // SAME
+ union UINT_FLOAT PA_CL_VS_OUT_CNTL; //// // = 0x2881C, // SAME
+ union UINT_FLOAT PA_CL_NANINF_CNTL; //// // = 0x28820, // SAME
+ union UINT_FLOAT PA_SU_LINE_STIPPLE_CNTL; // = 0x28824, //
+ union UINT_FLOAT PA_SU_LINE_STIPPLE_SCALE; // = 0x28828, //
+ union UINT_FLOAT PA_SU_PRIM_FILTER_CNTL; // = 0x2882C, //
+ union UINT_FLOAT PA_SU_POINT_SIZE; //// // = 0x28A00, // SAME
+ union UINT_FLOAT PA_SU_POINT_MINMAX; //// // = 0x28A04, // SAME
+ union UINT_FLOAT PA_SU_LINE_CNTL; //// // = 0x28A08, // SAME
+ union UINT_FLOAT PA_SC_LINE_STIPPLE; // = 0x28A0C, // SAME
+ union UINT_FLOAT PA_SC_MODE_CNTL_0; //// // = 0x28A48, //
+ union UINT_FLOAT PA_SC_MODE_CNTL_1; //// // = 0x28A4C, //
+ union UINT_FLOAT PA_SU_POLY_OFFSET_DB_FMT_CNTL; //// // = 0x28B78, // SAME 0x28DF8
+ union UINT_FLOAT PA_SU_POLY_OFFSET_CLAMP; //// // = 0x28B7C, // SAME 0x28DFC
+ union UINT_FLOAT PA_SU_POLY_OFFSET_FRONT_SCALE;//// // = 0x28B80, // SAME 0x28E00
+ union UINT_FLOAT PA_SU_POLY_OFFSET_FRONT_OFFSET; //// // = 0x28B84, // SAME 0x28E04
+ union UINT_FLOAT PA_SU_POLY_OFFSET_BACK_SCALE; //// // = 0x28B88, // SAME 0x28E08
+ union UINT_FLOAT PA_SU_POLY_OFFSET_BACK_OFFSET; //// // = 0x28B8C, // SAME 0x28E0C
+ union UINT_FLOAT PA_SC_LINE_CNTL; //// // = 0x28C00, // DIFF
+ union UINT_FLOAT PA_SC_AA_CONFIG; //// // = 0x28C04, // SAME
+ union UINT_FLOAT PA_SU_VTX_CNTL; //// // = 0x28C08, // SAME
+ union UINT_FLOAT PA_CL_GB_VERT_CLIP_ADJ; //// // = 0x28C0C, // SAME
+ union UINT_FLOAT PA_CL_GB_VERT_DISC_ADJ; //// // = 0x28C10, // SAME
+ union UINT_FLOAT PA_CL_GB_HORZ_CLIP_ADJ; //// // = 0x28C14, // SAME
+ union UINT_FLOAT PA_CL_GB_HORZ_DISC_ADJ; //// // = 0x28C18, // SAME
+ union UINT_FLOAT PA_SC_AA_SAMPLE_LOCS_0; //// // = 0x28C1C, //
+ union UINT_FLOAT PA_SC_AA_SAMPLE_LOCS_1; //// // = 0x28C20, //
+ union UINT_FLOAT PA_SC_AA_SAMPLE_LOCS_2; //// // = 0x28C24, //
+ union UINT_FLOAT PA_SC_AA_SAMPLE_LOCS_3; //// // = 0x28C28, //
+ union UINT_FLOAT PA_SC_AA_SAMPLE_LOCS_4; //// // = 0x28C2C, //
+ union UINT_FLOAT PA_SC_AA_SAMPLE_LOCS_5; //// // = 0x28C30, //
+ union UINT_FLOAT PA_SC_AA_SAMPLE_LOCS_6; //// // = 0x28C34, //
+ union UINT_FLOAT PA_SC_AA_SAMPLE_LOCS_7; //// // = 0x28C38, //
+ union UINT_FLOAT PA_SC_AA_MASK; //// // = 0x28C3C, // SAME 0x28C48
+
+/* Registers from VGT block: */
+ union UINT_FLOAT VGT_INDEX_TYPE; // = 0x895C, // SAME
+ union UINT_FLOAT VGT_PRIMITIVE_TYPE; // = 0x8958, // SAME
+ union UINT_FLOAT VGT_MAX_VTX_INDX; //// // = 0x28400, // SAME
+ union UINT_FLOAT VGT_MIN_VTX_INDX; //// // = 0x28404, // SAME
+ union UINT_FLOAT VGT_INDX_OFFSET; //// // = 0x28408, // SAME
+ union UINT_FLOAT VGT_MULTI_PRIM_IB_RESET_INDX; // = 0x2840C, // SAME
+
+ union UINT_FLOAT VGT_DRAW_INITIATOR; // = 0x287F0, // SAME
+ union UINT_FLOAT VGT_IMMED_DATA; // = 0x287F4, // SAME
+
+ union UINT_FLOAT VGT_OUTPUT_PATH_CNTL; //// // = 0x28A10, // DIFF
+ union UINT_FLOAT VGT_HOS_CNTL; // = 0x28A14, // SAME
+ union UINT_FLOAT VGT_HOS_MAX_TESS_LEVEL; // = 0x28A18, // SAME
+ union UINT_FLOAT VGT_HOS_MIN_TESS_LEVEL; // = 0x28A1C, // SAME
+ union UINT_FLOAT VGT_HOS_REUSE_DEPTH; // = 0x28A20, // SAME
+ union UINT_FLOAT VGT_GROUP_PRIM_TYPE; // = 0x28A24, // SAME
+ union UINT_FLOAT VGT_GROUP_FIRST_DECR; // = 0x28A28, // SAME
+ union UINT_FLOAT VGT_GROUP_DECR; // = 0x28A2C, // SAME
+ union UINT_FLOAT VGT_GROUP_VECT_0_CNTL; // = 0x28A30, // SAME
+ union UINT_FLOAT VGT_GROUP_VECT_1_CNTL; // = 0x28A34, // SAME
+ union UINT_FLOAT VGT_GROUP_VECT_0_FMT_CNTL; // = 0x28A38, // SAME
+ union UINT_FLOAT VGT_GROUP_VECT_1_FMT_CNTL; // = 0x28A3C, // SAME
+ union UINT_FLOAT VGT_GS_MODE; //// // = 0x28A40, // DIFF
+
+ union UINT_FLOAT VGT_PRIMITIVEID_EN; //// // = 0x28A84, // SAME
+ union UINT_FLOAT VGT_DMA_NUM_INSTANCES; //// // = 0x28A88, // SAME
+ union UINT_FLOAT VGT_EVENT_INITIATOR; // = 0x28A90, // SAME
+ union UINT_FLOAT VGT_MULTI_PRIM_IB_RESET_EN; // = 0x28A94, // SAME
+ union UINT_FLOAT VGT_INSTANCE_STEP_RATE_0; //// // = 0x28AA0, // SAME
+ union UINT_FLOAT VGT_INSTANCE_STEP_RATE_1; //// // = 0x28AA4, // SAME
+ union UINT_FLOAT VGT_REUSE_OFF; //// // = 0x28AB4, // SAME
+ union UINT_FLOAT VGT_VTX_CNT_EN; //// // = 0x28AB8, // SAME
+
+ union UINT_FLOAT VGT_SHADER_STAGES_EN; //// // = 0x28B54, //
+
+ union UINT_FLOAT VGT_STRMOUT_CONFIG; //// // = 0x28B94, //
+ union UINT_FLOAT VGT_STRMOUT_BUFFER_CONFIG; //// // = 0x28B98, //
+ union UINT_FLOAT VGT_VERTEX_REUSE_BLOCK_CNTL;//// // = 0x28C58, // SAME
+ union UINT_FLOAT VGT_OUT_DEALLOC_CNTL; //// // = 0x28C5C, // SAME
+
+/* Registers from SQ block: */
+ union UINT_FLOAT SQ_VTX_SEMANTIC_0; //// // = 0x28380, // SAME
+ union UINT_FLOAT SQ_VTX_SEMANTIC_1; //// // = 0x28384, // SAME
+ union UINT_FLOAT SQ_VTX_SEMANTIC_2; //// // = 0x28388, // SAME
+ union UINT_FLOAT SQ_VTX_SEMANTIC_3; //// // = 0x2838C, // SAME
+ union UINT_FLOAT SQ_VTX_SEMANTIC_4; //// // = 0x28390, // SAME
+ union UINT_FLOAT SQ_VTX_SEMANTIC_5; //// // = 0x28394, // SAME
+ union UINT_FLOAT SQ_VTX_SEMANTIC_6; //// // = 0x28398, // SAME
+ union UINT_FLOAT SQ_VTX_SEMANTIC_7; //// // = 0x2839C, // SAME
+ union UINT_FLOAT SQ_VTX_SEMANTIC_8; //// // = 0x283A0, // SAME
+ union UINT_FLOAT SQ_VTX_SEMANTIC_9; //// // = 0x283A4, // SAME
+ union UINT_FLOAT SQ_VTX_SEMANTIC_10; //// // = 0x283A8, // SAME
+ union UINT_FLOAT SQ_VTX_SEMANTIC_11; //// // = 0x283AC, // SAME
+ union UINT_FLOAT SQ_VTX_SEMANTIC_12; //// // = 0x283B0, // SAME
+ union UINT_FLOAT SQ_VTX_SEMANTIC_13; //// // = 0x283B4, // SAME
+ union UINT_FLOAT SQ_VTX_SEMANTIC_14; //// // = 0x283B8, // SAME
+ union UINT_FLOAT SQ_VTX_SEMANTIC_15; //// // = 0x283BC, // SAME
+ union UINT_FLOAT SQ_VTX_SEMANTIC_16; //// // = 0x283C0, // SAME
+ union UINT_FLOAT SQ_VTX_SEMANTIC_17; //// // = 0x283C4, // SAME
+ union UINT_FLOAT SQ_VTX_SEMANTIC_18; //// // = 0x283C8, // SAME
+ union UINT_FLOAT SQ_VTX_SEMANTIC_19; //// // = 0x283CC, // SAME
+ union UINT_FLOAT SQ_VTX_SEMANTIC_20; //// // = 0x283D0, // SAME
+ union UINT_FLOAT SQ_VTX_SEMANTIC_21; //// // = 0x283D4, // SAME
+ union UINT_FLOAT SQ_VTX_SEMANTIC_22; //// // = 0x283D8, // SAME
+ union UINT_FLOAT SQ_VTX_SEMANTIC_23; //// // = 0x283DC, // SAME
+ union UINT_FLOAT SQ_VTX_SEMANTIC_24; //// // = 0x283E0, // SAME
+ union UINT_FLOAT SQ_VTX_SEMANTIC_25; //// // = 0x283E4, // SAME
+ union UINT_FLOAT SQ_VTX_SEMANTIC_26; //// // = 0x283E8, // SAME
+ union UINT_FLOAT SQ_VTX_SEMANTIC_27; //// // = 0x283EC, // SAME
+ union UINT_FLOAT SQ_VTX_SEMANTIC_28; //// // = 0x283F0, // SAME
+ union UINT_FLOAT SQ_VTX_SEMANTIC_29; //// // = 0x283F4, // SAME
+ union UINT_FLOAT SQ_VTX_SEMANTIC_30; //// // = 0x283F8, // SAME
+ union UINT_FLOAT SQ_VTX_SEMANTIC_31; //// // = 0x283FC, // SAME
+ union UINT_FLOAT SQ_DYN_GPR_RESOURCE_LIMIT_1;//// // = 0x28838, //
+
+ union UINT_FLOAT SQ_PGM_RESOURCES_PS; //// // = 0x28844, // DIFF 0x28850
+ union UINT_FLOAT SQ_PGM_RESOURCES_2_PS; //// // = 0x28848, //
+ union UINT_FLOAT SQ_PGM_EXPORTS_PS; //// // = 0x2884C, // SAME 0x28854
+
+ union UINT_FLOAT SQ_PGM_RESOURCES_VS;//// // = 0x28860, // DIFF 0x28868
+ union UINT_FLOAT SQ_PGM_RESOURCES_2_VS; //// // = 0x28864, //
+ union UINT_FLOAT SQ_PGM_START_GS; //// // = 0x28874, // SAME 0x2886C
+ union UINT_FLOAT SQ_PGM_RESOURCES_GS; //// // = 0x28878, // DIFF 0x2887C
+ union UINT_FLOAT SQ_PGM_RESOURCES_2_GS; //// // = 0x2887C, //
+ union UINT_FLOAT SQ_PGM_START_ES; //// // = 0x2888C, // SAME 0x28880
+ union UINT_FLOAT SQ_PGM_RESOURCES_ES; //// // = 0x28890, // DIFF
+ union UINT_FLOAT SQ_PGM_RESOURCES_2_ES; //// // = 0x28894, //
+ union UINT_FLOAT SQ_PGM_START_FS; //// // = 0x288A4, // SAME 0x28894
+ union UINT_FLOAT SQ_PGM_RESOURCES_FS; //// // = 0x288A8, // DIFF 0x288A4
+ union UINT_FLOAT SQ_PGM_START_HS; // = 0x288B8, //
+ union UINT_FLOAT SQ_PGM_RESOURCES_HS; // = 0x288BC, //
+ union UINT_FLOAT SQ_PGM_RESOURCES_2_HS;//// // = 0x288C0, //
+ union UINT_FLOAT SQ_PGM_START_LS; // = 0x288D0, //
+ union UINT_FLOAT SQ_PGM_RESOURCES_LS; // = 0x288D4, //
+ union UINT_FLOAT SQ_PGM_RESOURCES_2_LS; //// // = 0x288D8, //
+ union UINT_FLOAT SQ_LDS_ALLOC_PS; //// // = 0x288EC, //
+ union UINT_FLOAT SQ_ESGS_RING_ITEMSIZE; //// // = 0x28900, // SAME 0x288A8
+ union UINT_FLOAT SQ_GSVS_RING_ITEMSIZE; //// // = 0x28904, // SAME 0x288AC
+ union UINT_FLOAT SQ_ESTMP_RING_ITEMSIZE; //// // = 0x28908, // SAME 0x288B0
+ union UINT_FLOAT SQ_GSTMP_RING_ITEMSIZE; //// // = 0x2890C, // SAME 0x288B4
+ union UINT_FLOAT SQ_VSTMP_RING_ITEMSIZE; //// // = 0x28910, // SAME 0x288B8
+ union UINT_FLOAT SQ_PSTMP_RING_ITEMSIZE; //// // = 0x28914, // SAME 0x288BC
+ union UINT_FLOAT SQ_GS_VERT_ITEMSIZE; //// // = 0x2891C, // SAME 0x288C8
+ union UINT_FLOAT SQ_GS_VERT_ITEMSIZE_1; // = 0x28920, //
+ union UINT_FLOAT SQ_GS_VERT_ITEMSIZE_2; // = 0x28924, //
+ union UINT_FLOAT SQ_GS_VERT_ITEMSIZE_3; // = 0x28928, //
+
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_GS_0; // = 0x289C0, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_GS_1; // = 0x289C4, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_GS_2; // = 0x289C8, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_GS_3; // = 0x289CC, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_GS_4; // = 0x289D0, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_GS_5; // = 0x289D4, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_GS_6; // = 0x289D8, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_GS_7; // = 0x289DC, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_GS_8; // = 0x289E0, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_GS_9; // = 0x289E4, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_GS_10; // = 0x289E8, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_GS_11; // = 0x289EC, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_GS_12; // = 0x289F0, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_GS_13; // = 0x289F4, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_GS_14; // = 0x289F8, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_GS_15; // = 0x289FC, // SAME
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_HS_0; // = 0x28F00, //
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_HS_1; // = 0x28F04, //
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_HS_2; // = 0x28F08, //
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_HS_3; // = 0x28F0C, //
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_HS_4; // = 0x28F10, //
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_HS_5; // = 0x28F14, //
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_HS_6; // = 0x28F18, //
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_HS_7; // = 0x28F1C, //
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_HS_8; // = 0x28F20, //
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_HS_9; // = 0x28F24, //
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_HS_10; // = 0x28F28, //
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_HS_11; // = 0x28F2C, //
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_HS_12; // = 0x28F30, //
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_HS_13; // = 0x28F34, //
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_HS_14; // = 0x28F38, //
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_HS_15; // = 0x28F3C, //
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_LS_0; // = 0x28F40, //
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_LS_1; // = 0x28F44, //
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_LS_2; // = 0x28F48, //
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_LS_3; // = 0x28F4C, //
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_LS_4; // = 0x28F50, //
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_LS_5; // = 0x28F54, //
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_LS_6; // = 0x28F58, //
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_LS_7; // = 0x28F5C, //
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_LS_8; // = 0x28F60, //
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_LS_9; // = 0x28F64, //
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_LS_10; // = 0x28F68, //
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_LS_11; // = 0x28F6C, //
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_LS_12; // = 0x28F70, //
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_LS_13; // = 0x28F74, //
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_LS_14; // = 0x28F78, //
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_LS_15; // = 0x28F7C, //
+ union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_HS_0; // = 0x28F80, //
+ union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_HS_1; // = 0x28F84, //
+ union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_HS_2; // = 0x28F88, //
+ union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_HS_3; // = 0x28F8C, //
+ union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_HS_4; // = 0x28F90, //
+ union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_HS_5; // = 0x28F94, //
+ union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_HS_6; // = 0x28F98, //
+ union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_HS_7; // = 0x28F9C, //
+ union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_HS_8; // = 0x28FA0, //
+ union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_HS_9; // = 0x28FA4, //
+ union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_HS_10; // = 0x28FA8, //
+ union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_HS_11; // = 0x28FAC, //
+ union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_HS_12; // = 0x28FB0, //
+ union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_HS_13; // = 0x28FB4, //
+ union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_HS_14; // = 0x28FB8, //
+ union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_HS_15; // = 0x28FBC, //
+ union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_LS_0; // = 0x28FC0, //
+ union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_LS_1; // = 0x28FC4, //
+ union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_LS_2; // = 0x28FC8, //
+ union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_LS_3; // = 0x28FCC, //
+ union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_LS_4; // = 0x28FD0, //
+ union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_LS_5; // = 0x28FD4, //
+ union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_LS_6; // = 0x28FD8, //
+ union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_LS_7; // = 0x28FDC, //
+ union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_LS_8; // = 0x28FE0, //
+ union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_LS_9; // = 0x28FE4, //
+ union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_LS_10; // = 0x28FE8, //
+ union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_LS_11; // = 0x28FEC, //
+ union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_LS_12; // = 0x28FF0, //
+ union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_LS_13; // = 0x28FF4, //
+ union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_LS_14; // = 0x28FF8, //
+ union UINT_FLOAT SQ_ALU_CONST_BUFFER_SIZE_LS_15; // = 0x28FFC, //
+
+ EVERGREEN_PS_RES ps;
+ EVERGREEN_VS_RES vs;
+
+/* Registers from SPI block: */
+ union UINT_FLOAT SPI_VS_OUT_ID_0; //// // = 0x2861C, // SAME 0x28614
+ union UINT_FLOAT SPI_VS_OUT_ID_1; //// // = 0x28620, // SAME 0x28618
+ union UINT_FLOAT SPI_VS_OUT_ID_2; //// // = 0x28624, // SAME 0x2861C
+ union UINT_FLOAT SPI_VS_OUT_ID_3; //// // = 0x28628, // SAME 0x28620
+ union UINT_FLOAT SPI_VS_OUT_ID_4; //// // = 0x2862C, // SAME 0x28624
+ union UINT_FLOAT SPI_VS_OUT_ID_5; //// // = 0x28630, // SAME 0x28628
+ union UINT_FLOAT SPI_VS_OUT_ID_6; //// // = 0x28634, // SAME 0x2862C
+ union UINT_FLOAT SPI_VS_OUT_ID_7; //// // = 0x28638, // SAME 0x28630
+ union UINT_FLOAT SPI_VS_OUT_ID_8; //// // = 0x2863C, // SAME 0x28634
+ union UINT_FLOAT SPI_VS_OUT_ID_9; //// // = 0x28640, // SAME 0x28638
+ union UINT_FLOAT SPI_PS_INPUT_CNTL[32]; //// // = 0x28644, // SAME
+
+ union UINT_FLOAT SPI_VS_OUT_CONFIG; //// // = 0x286C4, // SAME
+ union UINT_FLOAT SPI_THREAD_GROUPING; //// // = 0x286C8, // DIFF
+ union UINT_FLOAT SPI_PS_IN_CONTROL_0; //// // = 0x286CC, // SAME
+ union UINT_FLOAT SPI_PS_IN_CONTROL_1; //// // = 0x286D0, // SAME
+ union UINT_FLOAT SPI_INTERP_CONTROL_0; //// // = 0x286D4, // SAME
+ union UINT_FLOAT SPI_INPUT_Z; //// // = 0x286D8, // SAME
+ union UINT_FLOAT SPI_FOG_CNTL; //// // = 0x286DC, // SAME
+ union UINT_FLOAT SPI_BARYC_CNTL; //// // = 0x286E0, //
+ union UINT_FLOAT SPI_PS_IN_CONTROL_2; //// // = 0x286E4, //
+ union UINT_FLOAT SPI_COMPUTE_INPUT_CNTL; // = 0x286E8, //
+ union UINT_FLOAT SPI_COMPUTE_NUM_THREAD_X; // = 0x286EC, //
+ union UINT_FLOAT SPI_COMPUTE_NUM_THREAD_Y; // = 0x286F0, //
+ union UINT_FLOAT SPI_COMPUTE_NUM_THREAD_Z; // = 0x286F4, //
+
+/* Registers from SX block: */
+ union UINT_FLOAT SX_MISC; // = 0x28350, // SAME
+ union UINT_FLOAT SX_SURFACE_SYNC; // = 0x28354, // DIFF
+ union UINT_FLOAT SX_ALPHA_TEST_CONTROL; //// // = 0x28410, // SAME
+ union UINT_FLOAT SX_ALPHA_REF; // = 0x28438, // SAME
+
+/* Registers from DB block: */
+ union UINT_FLOAT DB_RENDER_CONTROL; //// // = 0x28000, // DIFF 0x28D0C
+ union UINT_FLOAT DB_COUNT_CONTROL; //// // = 0x28004, //
+ union UINT_FLOAT DB_DEPTH_VIEW; //// // = 0x28008, // DIFF 0x28004
+ union UINT_FLOAT DB_RENDER_OVERRIDE; //// // = 0x2800C, // DIFF 0x28D10
+ union UINT_FLOAT DB_RENDER_OVERRIDE2; //// // = 0x28010, //
+ union UINT_FLOAT DB_HTILE_DATA_BASE; //// // = 0x28014, // SAME
+ union UINT_FLOAT DB_STENCIL_CLEAR; //// // = 0x28028, // SAME
+ union UINT_FLOAT DB_DEPTH_CLEAR; //// // = 0x2802C, // SAME
+ union UINT_FLOAT DB_Z_INFO; //// // = 0x28040, //
+ union UINT_FLOAT DB_STENCIL_INFO; //// // = 0x28044, //
+ union UINT_FLOAT DB_Z_READ_BASE; //// // = 0x28048, //
+ union UINT_FLOAT DB_STENCIL_READ_BASE;//// // = 0x2804C, //
+ union UINT_FLOAT DB_Z_WRITE_BASE; //// // = 0x28050, //
+ union UINT_FLOAT DB_STENCIL_WRITE_BASE; //// // = 0x28054, //
+ union UINT_FLOAT DB_DEPTH_SIZE; //// // = 0x28058, // DIFF 0x28000
+ union UINT_FLOAT DB_DEPTH_SLICE; //// // = 0x2805C, //
+ union UINT_FLOAT DB_STENCILREFMASK; // = 0x28430, // SAME
+ union UINT_FLOAT DB_STENCILREFMASK_BF; // = 0x28434, // SAME
+ union UINT_FLOAT DB_DEPTH_CONTROL; //// // = 0x28800, // SAME
+ union UINT_FLOAT DB_SHADER_CONTROL;//// // = 0x2880C, // DIFF
+ union UINT_FLOAT DB_HTILE_SURFACE; //// // = 0x28ABC, // SAME 0x28D24
+ union UINT_FLOAT DB_SRESULTS_COMPARE_STATE0; //// // = 0x28AC0, // SAME 0x28D28
+ union UINT_FLOAT DB_SRESULTS_COMPARE_STATE1; //// // = 0x28AC4, // SAME 0x28D2C
+ union UINT_FLOAT DB_PRELOAD_CONTROL; //// // = 0x28AC8, // SAME 0x28D30
+ union UINT_FLOAT DB_ALPHA_TO_MASK; //// // = 0x28B70, // SAME 0x28D44
+
+/* Registers from CB block: */
+ union UINT_FLOAT CB_TARGET_MASK; //// // = 0x28238, // SAME
+ union UINT_FLOAT CB_SHADER_MASK; //// // = 0x2823C, // SAME
+ union UINT_FLOAT CB_BLEND_RED; //// // = 0x28414, // SAME
+ union UINT_FLOAT CB_BLEND_GREEN; //// // = 0x28418, // SAME
+ union UINT_FLOAT CB_BLEND_BLUE; //// // = 0x2841C, // SAME
+ union UINT_FLOAT CB_BLEND_ALPHA; //// // = 0x28420, // SAME
+ union UINT_FLOAT CB_BLEND0_CONTROL; //// // = 0x28780, // DIFF
+ union UINT_FLOAT CB_BLEND1_CONTROL; // = 0x28784, // DIFF
+ union UINT_FLOAT CB_BLEND2_CONTROL; // = 0x28788, // DIFF
+ union UINT_FLOAT CB_BLEND3_CONTROL; // = 0x2878C, // DIFF
+ union UINT_FLOAT CB_BLEND4_CONTROL; // = 0x28790, // DIFF
+ union UINT_FLOAT CB_BLEND5_CONTROL; // = 0x28794, // DIFF
+ union UINT_FLOAT CB_BLEND6_CONTROL; // = 0x28798, // DIFF
+ union UINT_FLOAT CB_BLEND7_CONTROL; // = 0x2879C, // DIFF
+ union UINT_FLOAT CB_COLOR_CONTROL; //// // = 0x28808, // DIFF
+ union UINT_FLOAT CB_CLRCMP_CONTROL; //// // = 0x28C40, // SAME 0x28C30
+ union UINT_FLOAT CB_CLRCMP_SRC; //// // = 0x28C44, // SAME 0x28C34
+ union UINT_FLOAT CB_CLRCMP_DST; //// // = 0x28C48, // SAME 0x28C38
+ union UINT_FLOAT CB_CLRCMP_MSK; //// // = 0x28C4C, // SAME 0x28C3C
+
+ EVERGREEN_RENDER_TARGET_STATE render_target[EVERGREEN_MAX_RENDER_TARGETS];
+
+ radeonTexObj* textures[R700_TEXTURE_NUMBERUNITS];
+
+ EVERGREEN_CONFIG evergreen_config;
+
+ GLboolean bEnablePerspective;
+
+} EVERGREEN_CHIP_CONTEXT;
+
+#endif /* _EVERGREEN_CHIP_H_ */ \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/evergreen_context.c b/src/mesa/drivers/dri/r600/evergreen_context.c
new file mode 100644
index 0000000000..65b5898efa
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/evergreen_context.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2008-2010 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#include "main/glheader.h"
+#include "main/api_arrayelt.h"
+#include "main/context.h"
+#include "main/simple_list.h"
+#include "main/imports.h"
+#include "main/bufferobj.h"
+#include "main/texobj.h"
+
+#include "radeon_common_context.h"
+#include "evergreen_context.h"
+#include "evergreen_state.h"
+#include "r600_blit.h"
+
+static void evergreen_get_lock(radeonContextPtr rmesa)
+{
+ drm_radeon_sarea_t *sarea = rmesa->sarea;
+
+ if (sarea->ctx_owner != rmesa->dri.hwContext) {
+ sarea->ctx_owner = rmesa->dri.hwContext;
+ if (!rmesa->radeonScreen->kernel_mm)
+ radeon_bo_legacy_texture_age(rmesa->radeonScreen->bom);
+ }
+}
+
+static void evergreen_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa)
+{
+ /* please flush pipe do all pending work */
+ /* to be enabled */
+}
+
+static void evergreen_vtbl_pre_emit_atoms(radeonContextPtr radeon)
+{
+ //TODO apr.01
+ //r700Start3D((context_t *)radeon);
+}
+
+static void evergreen_fallback(GLcontext *ctx, GLuint bit, GLboolean mode)
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ if (mode)
+ context->radeon.Fallback |= bit;
+ else
+ context->radeon.Fallback &= ~bit;
+}
+
+static void evergreen_emit_query_finish(radeonContextPtr radeon)
+{
+ //TODO apr.01
+ //context_t *context = (context_t*) radeon;
+ //BATCH_LOCALS(&context->radeon);
+
+ struct radeon_query_object *query = radeon->query.current;
+
+ //BEGIN_BATCH_NO_AUTOSTATE(4 + 2);
+ //R600_OUT_BATCH(CP_PACKET3(R600_IT_EVENT_WRITE, 2));
+ //R600_OUT_BATCH(ZPASS_DONE);
+ //R600_OUT_BATCH(query->curr_offset + 8); /* hw writes qwords */
+ //R600_OUT_BATCH(0x00000000);
+ //R600_OUT_BATCH_RELOC(VGT_EVENT_INITIATOR, query->bo, 0, 0, RADEON_GEM_DOMAIN_GTT, 0);
+ //END_BATCH();
+ //assert(query->curr_offset < RADEON_QUERY_PAGE_SIZE);
+ query->emitted_begin = GL_FALSE;
+}
+
+void evergreen_init_vtbl(radeonContextPtr radeon)
+{
+ radeon->vtbl.get_lock = evergreen_get_lock;
+ radeon->vtbl.update_viewport_offset = evergreenUpdateViewportOffset;
+ radeon->vtbl.emit_cs_header = evergreen_vtbl_emit_cs_header;
+ radeon->vtbl.swtcl_flush = NULL;
+ radeon->vtbl.pre_emit_atoms = evergreen_vtbl_pre_emit_atoms;
+ radeon->vtbl.fallback = evergreen_fallback;
+ radeon->vtbl.emit_query_finish = evergreen_emit_query_finish;
+ radeon->vtbl.check_blit = r600_check_blit;
+ radeon->vtbl.blit = r600_blit;
+ radeon->vtbl.is_format_renderable = radeonIsFormatRenderable;
+}
+
+
+
diff --git a/src/mesa/drivers/dri/r600/evergreen_context.h b/src/mesa/drivers/dri/r600/evergreen_context.h
new file mode 100644
index 0000000000..4e50999c98
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/evergreen_context.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2008-2010 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#ifndef _EVERGREEN_CONTEXT_H_
+#define _EVERGREEN_CONTEXT_H_
+
+extern void evergreen_init_vtbl(radeonContextPtr radeon);
+
+#endif //_EVERGREEN_CONTEXT_H_
+
+
+
+
+
+
diff --git a/src/mesa/drivers/dri/r600/evergreen_diff.h b/src/mesa/drivers/dri/r600/evergreen_diff.h
new file mode 100644
index 0000000000..c3a5fd0a38
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/evergreen_diff.h
@@ -0,0 +1,335 @@
+/*
+ * Copyright (C) 2008-2010 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#ifndef _EVERGREEN_DIFF_H_
+#define _EVERGREEN_DIFF_H_
+
+enum {
+ /* CB_BLEND_CONTROL */
+ EG_CB_BLENDX_CONTROL_ENABLE_bit = 1 << 30,
+ /* PA_SC_SCREEN_SCISSOR_TL */
+ EG_PA_SC_SCREEN_SCISSOR_TL__TL_X_mask = 0xffff << 0,
+ EG_PA_SC_SCREEN_SCISSOR_TL__TL_Y_mask = 0xffff << 16,
+ /* PA_SC_SCREEN_SCISSOR_BR */
+ EG_PA_SC_SCREEN_SCISSOR_BR__BR_X_mask = 0xffff << 0,
+ EG_PA_SC_SCREEN_SCISSOR_BR__BR_Y_mask = 0xffff << 16,
+ /* PA_SC_WINDOW_SCISSOR_TL */
+ EG_PA_SC_WINDOW_SCISSOR_TL__TL_X_mask = 0x7fff << 0,
+ EG_PA_SC_WINDOW_SCISSOR_TL__TL_Y_mask = 0x7fff << 16,
+ /* PA_SC_WINDOW_SCISSOR_BR */
+ EG_PA_SC_WINDOW_SCISSOR_BR__BR_X_mask = 0x7fff << 0,
+ EG_PA_SC_WINDOW_SCISSOR_BR__BR_Y_mask = 0x7fff << 16,
+ /* PA_SC_CLIPRECT_0_TL */
+ EG_PA_SC_CLIPRECT_0_TL__TL_X_mask = 0x7fff << 0,
+ EG_PA_SC_CLIPRECT_0_TL__TL_Y_mask = 0x7fff << 16,
+ /* PA_SC_CLIPRECT_0_BR */
+ EG_PA_SC_CLIPRECT_0_BR__BR_X_mask = 0x7fff << 0,
+ EG_PA_SC_CLIPRECT_0_BR__BR_Y_mask = 0x7fff << 16,
+ /* PA_SC_GENERIC_SCISSOR_TL */
+ EG_PA_SC_GENERIC_SCISSOR_TL__TL_X_mask = 0x7fff << 0,
+ EG_PA_SC_GENERIC_SCISSOR_TL__TL_Y_mask = 0x7fff << 16,
+ /* PA_SC_GENERIC_SCISSOR_BR */
+ EG_PA_SC_GENERIC_SCISSOR_BR__BR_X_mask = 0x7fff << 0,
+ EG_PA_SC_GENERIC_SCISSOR_BR__BR_Y_mask = 0x7fff << 16,
+ /* PA_SC_VPORT_SCISSOR_0_TL */
+ EG_PA_SC_VPORT_SCISSOR_0_TL__TL_X_mask = 0x7fff << 0,
+ EG_PA_SC_VPORT_SCISSOR_0_TL__TL_Y_mask = 0x7fff << 16,
+ /* PA_SC_VPORT_SCISSOR_0_BR */
+ EG_PA_SC_VPORT_SCISSOR_0_BR__BR_X_mask = 0x7fff << 0,
+ EG_PA_SC_VPORT_SCISSOR_0_BR__BR_Y_mask = 0x7fff << 16,
+ /* PA_SC_WINDOW_OFFSET */
+ EG_PA_SC_WINDOW_OFFSET__WINDOW_X_OFFSET_shift = 0,
+ EG_PA_SC_WINDOW_OFFSET__WINDOW_X_OFFSET_mask = 0xffff << 0,
+ EG_PA_SC_WINDOW_OFFSET__WINDOW_Y_OFFSET_shift = 16,
+ EG_PA_SC_WINDOW_OFFSET__WINDOW_Y_OFFSET_mask = 0xffff << 16,
+ /* SPI_BARYC_CNTL */
+ EG_SPI_BARYC_CNTL__PERSP_CENTROID_ENA_shift = 4,
+ EG_SPI_BARYC_CNTL__PERSP_CENTROID_ENA_mask = 0x3 << 4,
+ EG_SPI_BARYC_CNTL__LINEAR_CENTROID_ENA_shift = 20,
+ EG_SPI_BARYC_CNTL__LINEAR_CENTROID_ENA_mask = 0x3 << 20,
+ /* DB_SHADER_CONTROL */
+ EG_DB_SHADER_CONTROL__DUAL_EXPORT_ENABLE_bit = 1 << 9,
+
+ /* DB_Z_INFO */
+ EG_DB_Z_INFO__FORMAT_shift = 0, //2;
+ EG_DB_Z_INFO__FORMAT_mask = 0x3,
+ //2;
+ EG_DB_Z_INFO__ARRAY_MODE_shift = 4, //4;
+ EG_DB_Z_INFO__ARRAY_MODE_mask = 0xf << 4,
+ EG_DB_Z_INFO__TILE_SPLIT_shift = 8, //3;
+ EG_DB_Z_INFO__TILE_SPLIT_mask = 0x7 << 8,
+ //1;
+ EG_DB_Z_INFO__NUM_BANKS_shift = 12, //2;
+ EG_DB_Z_INFO__NUM_BANKS_mask = 0x3 << 12,
+ //2;
+ EG_DB_Z_INFO__BANK_WIDTH_shift = 16, //2;
+ EG_DB_Z_INFO__BANK_WIDTH_mask = 0x3 << 16,
+ //2;
+ EG_DB_Z_INFO__BANK_HEIGHT_shift = 20, //2;
+ EG_DB_Z_INFO__BANK_HEIGHT_mask = 0x3 << 20,
+
+ EG_Z_INVALID = 0x00000000,
+ EG_Z_16 = 0x00000001,
+ EG_Z_24 = 0x00000002,
+ EG_Z_32_FLOAT = 0x00000003,
+ EG_ADDR_SURF_TILE_SPLIT_256B = 0x00000002,
+ EG_ADDR_SURF_8_BANK = 0x00000002,
+ EG_ADDR_SURF_BANK_WIDTH_1 = 0x00000000,
+ EG_ADDR_SURF_BANK_HEIGHT_1 = 0x00000000,
+ /* DB_STENCIL_INFO */
+ EG_DB_STENCIL_INFO__FORMAT_bit = 1, //1;
+ //7;
+ EG_DB_STENCIL_INFO__TILE_SPLIT_shift = 8, //3;
+ EG_DB_STENCIL_INFO__TILE_SPLIT_mask = 0x7 << 8,
+
+ /* DB_DEPTH_SIZE */
+ EG_DB_DEPTH_SIZE__PITCH_TILE_MAX_shift = 0, // 11;
+ EG_DB_DEPTH_SIZE__PITCH_TILE_MAX_mask = 0x7ff,
+ EG_DB_DEPTH_SIZE__HEIGHT_TILE_MAX_shift = 11, // 11;
+ EG_DB_DEPTH_SIZE__HEIGHT_TILE_MAX_mask = 0x7ff << 11,
+
+ /* DB_COUNT_CONTROL */
+ EG_DB_COUNT_CONTROL__ZPASS_INCREMENT_DISABLE_shift = 0, //1
+ EG_DB_COUNT_CONTROL__ZPASS_INCREMENT_DISABLE_bit = 1,
+ EG_DB_COUNT_CONTROL__PERFECT_ZPASS_COUNTS_shift = 1, //1
+ EG_DB_COUNT_CONTROL__PERFECT_ZPASS_COUNTS_bit = 1 << 1,
+
+ /* CB_COLOR_CONTROL */
+ //3;
+ EG_CB_COLOR_CONTROL__DEGAMMA_ENABLE_bit = 1 << 3,//1;
+ EG_CB_COLOR_CONTROL__MODE_shift = 4, //3;
+ EG_CB_COLOR_CONTROL__MODE_mask = 0x7 << 4,
+ //9;
+ EG_CB_COLOR_CONTROL__ROP3_shift = 16, //8;
+ EG_CB_COLOR_CONTROL__ROP3_mask = 0xff << 16,
+ EG_CB_NORMAL = 0x00000001,
+
+ /* CB_COLOR0_INFO */
+ EG_CB_COLOR0_INFO__ENDIAN_shift = 0, //2;
+ EG_CB_COLOR0_INFO__ENDIAN_mask = 0x3,
+ EG_CB_COLOR0_INFO__FORMAT_shift = 2, //6;
+ EG_CB_COLOR0_INFO__FORMAT_mask = 0x3f << 2,
+ EG_CB_COLOR0_INFO__ARRAY_MODE_shift = 8, //4;
+ EG_CB_COLOR0_INFO__ARRAY_MODE_mask = 0xf << 8,
+ EG_CB_COLOR0_INFO__NUMBER_TYPE_shift = 12, //3;
+ EG_CB_COLOR0_INFO__NUMBER_TYPE_mask = 0x7 << 12,
+ EG_CB_COLOR0_INFO__COMP_SWAP_shift = 15, //2;
+ EG_CB_COLOR0_INFO__COMP_SWAP_mask = 0x3 << 15,
+ EG_CB_COLOR0_INFO__FAST_CLEAR_bit = 1 << 17,//1;
+ EG_CB_COLOR0_INFO__COMPRESSION_bit = 1 << 18,//1;
+ EG_CB_COLOR0_INFO__BLEND_CLAMP_bit = 1 << 19,//1;
+ EG_CB_COLOR0_INFO__BLEND_BYPASS_bit = 1 << 20,//1;
+ EG_CB_COLOR0_INFO__SIMPLE_FLOAT_bit = 1 << 21,//1;
+ EG_CB_COLOR0_INFO__ROUND_MODE_bit = 1 << 22,//1;
+ EG_CB_COLOR0_INFO__TILE_COMPACT_bit = 1 << 23,//1;
+ EG_CB_COLOR0_INFO__SOURCE_FORMAT_shift = 24, //2;
+ EG_CB_COLOR0_INFO__SOURCE_FORMAT_mask = 0x3 << 24,
+ EG_CB_COLOR0_INFO__RAT_bit = 1 << 26,//1;
+ EG_CB_COLOR0_INFO__RESOURCE_TYPE_shift = 27, //3;
+ EG_CB_COLOR0_INFO__RESOURCE_TYPE_mask = 0x7 << 27,
+
+ /* CB_COLOR0_ATTRIB */
+ EG_CB_COLOR0_ATTRIB__NON_DISP_TILING_ORDER_shift = 4,
+ EG_CB_COLOR0_ATTRIB__NON_DISP_TILING_ORDER_bit = 1 << 4,
+
+ /* SPI_CONFIG_CNTL_1 */
+ EG_SPI_CONFIG_CNTL_1__VTX_DONE_DELAY_shift = 0,
+ EG_SPI_CONFIG_CNTL_1__VTX_DONE_DELAY_mask = 0xf,
+ /* SQ_MS_FIFO_SIZES */
+ EG_SQ_MS_FIFO_SIZES__CACHE_FIFO_SIZE_shift = 0,
+ EG_SQ_MS_FIFO_SIZES__CACHE_FIFO_SIZE_mask = 0xff,
+ EG_SQ_MS_FIFO_SIZES__FETCH_FIFO_HIWATER_shift = 8,
+ EG_SQ_MS_FIFO_SIZES__FETCH_FIFO_HIWATER_mask = 0x1f << 8,
+ EG_SQ_MS_FIFO_SIZES__DONE_FIFO_HIWATER_shift = 16,
+ EG_SQ_MS_FIFO_SIZES__DONE_FIFO_HIWATER_mask = 0xff << 16,
+ EG_SQ_MS_FIFO_SIZES__ALU_UPDATE_FIFO_HIWATER_shift = 24,
+ EG_SQ_MS_FIFO_SIZES__ALU_UPDATE_FIFO_HIWATER_mask = 0x1f << 24,
+ /* SQ_CONFIG */
+ EG_SQ_CONFIG__VC_ENABLE_bit = 1,
+ EG_SQ_CONFIG__EXPORT_SRC_C_bit = 1 << 1,
+ EG_SQ_CONFIG__PS_PRIO_shift = 24,
+ EG_SQ_CONFIG__PS_PRIO_mask = 0x3 << 24,
+ EG_SQ_CONFIG__VS_PRIO_shift = 26,
+ EG_SQ_CONFIG__VS_PRIO_mask = 0x3 << 26,
+ EG_SQ_CONFIG__GS_PRIO_shift = 28,
+ EG_SQ_CONFIG__GS_PRIO_mask = 0x3 << 28,
+ EG_SQ_CONFIG__ES_PRIO_shift = 30,
+ EG_SQ_CONFIG__ES_PRIO_mask = 0x3 << 30,
+ /* PA_SC_FORCE_EOV_MAX_CNTS */
+ EG_PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_CLK_CNT_shift = 0,
+ EG_PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_CLK_CNT_mask = 0x3fff,
+ EG_PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_REZ_CNT_shift = 16,
+ EG_PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_REZ_CNT_mask = 0x3fff << 16,
+ /* VGT_CACHE_INVALIDATION */
+ EG_VGT_CACHE_INVALIDATION__CACHE_INVALIDATION_shift = 0,
+ EG_VGT_CACHE_INVALIDATION__CACHE_INVALIDATION_mask = 0x3,
+ /* CB_COLOR0_PITCH */
+ EG_CB_COLOR0_PITCH__TILE_MAX_shift = 0,
+ EG_CB_COLOR0_PITCH__TILE_MAX_mask = 0x7ff,
+ /* CB_COLOR0_SLICE */
+ EG_CB_COLOR0_SLICE__TILE_MAX_shift = 0,
+ EG_CB_COLOR0_SLICE__TILE_MAX_mask = 0x3fffff,
+ /* SQ_VTX_CONSTANT_WORD3_0 */
+ EG_SQ_VTX_CONSTANT_WORD3_0__UNCACHED_shift = 2,
+ EG_SQ_VTX_CONSTANT_WORD3_0__UNCACHED_bit = 1 << 2,
+
+ EG_SQ_VTX_CONSTANT_WORD3_0__DST_SEL_X_shift = 3,
+ EG_SQ_VTX_CONSTANT_WORD3_0__DST_SEL_X_mask = 0x7 << 3,
+
+ EG_SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Y_shift = 6,
+ EG_SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Y_mask = 0x7 << 6,
+
+ EG_SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Z_shift = 9,
+ EG_SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Z_mask = 0x7 << 9,
+
+ EG_SQ_VTX_CONSTANT_WORD3_0__DST_SEL_W_shift = 12,
+ EG_SQ_VTX_CONSTANT_WORD3_0__DST_SEL_W_mask = 0x7 << 12,
+ /* SQ_VTX_CONSTANT_WORD4_0 */
+ EG_SQ_VTX_CONSTANT_WORD4_0__NUM_ELEMENTS_shift = 0,
+ EG_SQ_VTX_CONSTANT_WORD4_0__NUM_ELEMENTS_mask = 0xFFFFFFFF,
+ /* SQ_VTX_CONSTANT_WORD7_0 */
+ EG_SQ_VTX_CONSTANT_WORD7_0__TYPE_shift = 30,
+ EG_SQ_VTX_CONSTANT_WORD7_0__TYPE_mask = 0x3 << 30,
+ /* SQ_TEX_SAMPLER_WORD0_0 */
+ EG_SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift = 0, // 3;
+ EG_SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_mask = 0x7,
+ EG_SQ_TEX_SAMPLER_WORD0_0__CLAMP_Y_shift = 3, // 3;
+ EG_SQ_TEX_SAMPLER_WORD0_0__CLAMP_Y_mask = 0x7 << 3,
+ EG_SQ_TEX_SAMPLER_WORD0_0__CLAMP_Z_shift = 6, // 3;
+ EG_SQ_TEX_SAMPLER_WORD0_0__CLAMP_Z_mask = 0x7 << 6,
+ EG_SQ_TEX_SAMPLER_WORD0_0__XY_MAG_FILTER_shift = 9, // 2;
+ EG_SQ_TEX_SAMPLER_WORD0_0__XY_MAG_FILTER_mask = 0x3 << 9,
+ EG_SQ_TEX_SAMPLER_WORD0_0__XY_MIN_FILTER_shift = 11, // 2;
+ EG_SQ_TEX_SAMPLER_WORD0_0__XY_MIN_FILTER_mask = 0x3 << 11,
+ EG_SQ_TEX_SAMPLER_WORD0_0__Z_FILTER_shift = 13, // 2;
+ EG_SQ_TEX_SAMPLER_WORD0_0__Z_FILTER_mask = 0x3 << 13,
+ EG_SQ_TEX_SAMPLER_WORD0_0__MIP_FILTER_shift = 15, // 2;
+ EG_SQ_TEX_SAMPLER_WORD0_0__MIP_FILTER_mask = 0x3 << 15,
+ EG_SQ_TEX_SAMPLER_WORD0_0__MAX_ANISO_RATIO_shift = 17, // 3;
+ EG_SQ_TEX_SAMPLER_WORD0_0__MAX_ANISO_RATIO_mask = 0x7 << 17,
+ EG_SQ_TEX_SAMPLER_WORD0_0__BORDER_COLOR_TYPE_shift = 20,//2;
+ EG_SQ_TEX_SAMPLER_WORD0_0__BORDER_COLOR_TYPE_mask = 0x3 << 20,
+ EG_SQ_TEX_SAMPLER_WORD0_0__DCF_shift = 22, // 3;
+ EG_SQ_TEX_SAMPLER_WORD0_0__DCF_mask = 0x7 << 22,
+ EG_SQ_TEX_SAMPLER_WORD0_0__CHROMA_KEY_shift = 25, // 2;
+ EG_SQ_TEX_SAMPLER_WORD0_0__CHROMA_KEY_mask = 0x3 << 25,
+ EG_SQ_TEX_SAMPLER_WORD0_0__ANISO_THRESHOLD_shift = 27, // 3;
+ EG_SQ_TEX_SAMPLER_WORD0_0__ANISO_THRESHOLD_mask = 0x7 << 27,
+ EG_SQ_TEX_SAMPLER_WORD0_0__Reserved_shift = 30, // 2
+ EG_SQ_TEX_SAMPLER_WORD0_0__Reserved_mask = 0x3 << 30,
+ /* SQ_TEX_SAMPLER_WORD1_0 */
+ EG_SQ_TEX_SAMPLER_WORD1_0__MIN_LOD_shift = 0, // 12;
+ EG_SQ_TEX_SAMPLER_WORD1_0__MIN_LOD_mask = 0xfff,
+ EG_SQ_TEX_SAMPLER_WORD1_0__MAX_LOD_shift = 12,// 12;
+ EG_SQ_TEX_SAMPLER_WORD1_0__MAX_LOD_mask = 0xfff << 12,
+ /* SQ_TEX_SAMPLER_WORD2_0 */
+ EG_SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_shift = 0, //14;
+ EG_SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_mask = 0x3fff,
+ EG_SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_SEC_shift = 14,//6;
+ EG_SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_SEC_mask = 0x3f << 14,
+ EG_SQ_TEX_SAMPLER_WORD2_0__MC_COORD_TRUNCATE_shift = 20,//1;
+ EG_SQ_TEX_SAMPLER_WORD2_0__MC_COORD_TRUNCATE_bit = 1 << 20,
+ EG_SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_shift = 21,//1;
+ EG_SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit = 1 << 21,
+ EG_SQ_TEX_SAMPLER_WORD2_0__ANISO_BIAS_shift = 22,//6;
+ EG_SQ_TEX_SAMPLER_WORD2_0__ANISO_BIAS_mask = 0x3f << 22,
+ EG_SQ_TEX_SAMPLER_WORD2_0__TRUNCATE_COORD_shift = 28,//1;
+ EG_SQ_TEX_SAMPLER_WORD2_0__TRUNCATE_COORD_bit = 1 << 28,
+ EG_SQ_TEX_SAMPLER_WORD2_0__DISABLE_CUBE_WRAP_shift = 29,//1;
+ EG_SQ_TEX_SAMPLER_WORD2_0__DISABLE_CUBE_WRAP_bit = 1 << 29,
+ EG_SQ_TEX_SAMPLER_WORD2_0__Reserved_shift = 30,//1;
+ EG_SQ_TEX_SAMPLER_WORD2_0__Reserved_bit = 1 << 30,
+ EG_SQ_TEX_SAMPLER_WORD2_0__TYPE_shift = 31,//1;
+ EG_SQ_TEX_SAMPLER_WORD2_0__TYPE_bit = 1 << 31,
+ /* SQ_TEX_RESOURCE_WORD0_0 */
+ EG_SQ_TEX_RESOURCE_WORD0_0__DIM_shift = 0, // 3;
+ EG_SQ_TEX_RESOURCE_WORD0_0__DIM_mask = 0x7,
+ EG_SQ_TEX_RESOURCE_WORD0_0__ISET_shift = 3, // 1;
+ EG_SQ_TEX_RESOURCE_WORD0_0__ISET_bit = 1 << 3,
+ EG_SQ_TEX_RESOURCE_WORD0_0__Reserve_shift = 4, // 1;
+ EG_SQ_TEX_RESOURCE_WORD0_0__Reserve_bit = 1 << 4,
+ EG_SQ_TEX_RESOURCE_WORD0_0__NDTO_shift = 5, // 1;
+ EG_SQ_TEX_RESOURCE_WORD0_0__NDTO_bit = 1 << 5,
+ EG_SQ_TEX_RESOURCE_WORD0_0__PITCH_shift = 6, // 12;
+ EG_SQ_TEX_RESOURCE_WORD0_0__PITCH_mask = 0xfff << 6,
+ EG_SQ_TEX_RESOURCE_WORD0_0__TEX_WIDTH_shift = 18,// 14;
+ EG_SQ_TEX_RESOURCE_WORD0_0__TEX_WIDTH_mask = 0x3fff << 18,
+ /* SQ_TEX_RESOURCE_WORD1_0 */
+ EG_SQ_TEX_RESOURCE_WORD1_0__TEX_HEIGHT_shift = 0, // 14;
+ EG_SQ_TEX_RESOURCE_WORD1_0__TEX_HEIGHT_mask = 0x3fff,
+ EG_SQ_TEX_RESOURCE_WORD1_0__TEX_DEPTH_shift = 14,// 13;
+ EG_SQ_TEX_RESOURCE_WORD1_0__TEX_DEPTH_mask = 0x1fff << 14,
+ EG_SQ_TEX_RESOURCE_WORD1_0__Reserved_shift = 27,// 1;
+ EG_SQ_TEX_RESOURCE_WORD1_0__Reserved_bit = 1 << 27,
+ EG_SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_shift = 28,// 4;
+ EG_SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_mask = 0xf << 28,
+ /* SQ_TEX_RESOURCE_WORD6_0 */
+ EG_SQ_TEX_RESOURCE_WORD6_0__MAX_ANISO_RATIO_shift = 0, //: 3;
+ EG_SQ_TEX_RESOURCE_WORD6_0__MAX_ANISO_RATIO_mask = 0x7,
+ EG_SQ_TEX_RESOURCE_WORD6_0__INTERLACED_shift = 6, //1;
+ EG_SQ_TEX_RESOURCE_WORD6_0__INTERLACED_bit = 1 << 6,
+ EG_SQ_TEX_RESOURCE_WORD6_0__MIN_LOD_shift = 8, //12;
+ EG_SQ_TEX_RESOURCE_WORD6_0__MIN_LOD_mask = 0xfff << 8,
+ EG_SQ_TEX_RESOURCE_WORD6_0__TILE_SPLIT_shift = 29,// 3;
+ EG_SQ_TEX_RESOURCE_WORD6_0__TILE_SPLIT_mask = 0x7 << 29,
+ /* SQ_TEX_RESOURCE_WORD7_0 */
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift = 0, // 6;
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask = 0x3f,
+ EG_SQ_TEX_RESOURCE_WORD7_0__MACRO_TILE_ASPECT_shift = 6, // 2;
+ EG_SQ_TEX_RESOURCE_WORD7_0__MACRO_TILE_ASPECT_mask = 0x3 << 6,
+ EG_SQ_TEX_RESOURCE_WORD7_0__BANK_WIDTH_shift = 8, // 2;
+ EG_SQ_TEX_RESOURCE_WORD7_0__BANK_WIDTH_mask = 0x3 << 8,
+ EG_SQ_TEX_RESOURCE_WORD7_0__BANK_HEIGHT_shift = 10,// 2;
+ EG_SQ_TEX_RESOURCE_WORD7_0__BANK_HEIGHT_mask = 0x3 << 10,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DEPTH_SAMPLE_ORDER_shift = 15,// 1;
+ EG_SQ_TEX_RESOURCE_WORD7_0__DEPTH_SAMPLE_ORDER_bit = 1 << 15,
+ EG_SQ_TEX_RESOURCE_WORD7_0__NUM_BANKS_shift = 16,// 2;
+ EG_SQ_TEX_RESOURCE_WORD7_0__NUM_BANKS_mask = 0x3 << 16,
+ EG_SQ_TEX_RESOURCE_WORD7_0__TYPE_shift = 30,// 2;
+ EG_SQ_TEX_RESOURCE_WORD7_0__TYPE_mask = 0x3 << 30,
+};
+
+/* */
+
+#define EG_SQ_FETCH_RESOURCE_COUNT 0x00000400
+#define EG_SQ_TEX_SAMPLER_COUNT 0x0000006c
+#define EG_SQ_LOOP_CONST_COUNT 0x000000c0
+
+#define EG_SET_RESOURCE_OFFSET 0x30000
+#define EG_SET_RESOURCE_END 0x30400 //r600 := offset + 0x4000
+
+#define EG_SET_LOOP_CONST_OFFSET 0x3A200
+#define EG_SET_LOOP_CONST_END 0x3A26C //r600 := offset + 0x180
+
+
+#define EG_SQ_FETCH_RESOURCE_VS_OFFSET 0x000000b0
+#define EG_FETCH_RESOURCE_STRIDE 8
+
+#define EG_SET_BOOL_CONST_OFFSET 0x3A500
+#define EG_SET_BOOL_CONST_END 0x3A506
+
+
+#endif //_EVERGREEN_DIFF_H_
diff --git a/src/mesa/drivers/dri/r600/evergreen_fragprog.c b/src/mesa/drivers/dri/r600/evergreen_fragprog.c
new file mode 100644
index 0000000000..b53ff424a0
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/evergreen_fragprog.c
@@ -0,0 +1,817 @@
+/*
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ * CooperYuan <cooper.yuan@amd.com>, <cooperyuan@gmail.com>
+ */
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include "main/imports.h"
+
+#include "program/prog_parameter.h"
+#include "program/prog_statevars.h"
+#include "program/program.h"
+
+#include "r600_context.h"
+#include "r600_cmdbuf.h"
+#include "r600_emit.h"
+
+#include "evergreen_vertprog.h"
+#include "evergreen_fragprog.h"
+
+void evergreen_insert_wpos_code(GLcontext *ctx, struct gl_fragment_program *fprog)
+{
+ static const gl_state_index winstate[STATE_LENGTH]
+ = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0};
+ struct prog_instruction *newInst, *inst;
+ GLint win_size; /* state reference */
+ GLuint wpos_temp; /* temp register */
+ int i, j;
+
+ /* PARAM win_size = STATE_FB_SIZE */
+ win_size = _mesa_add_state_reference(fprog->Base.Parameters, winstate);
+
+ wpos_temp = fprog->Base.NumTemporaries++;
+
+ /* scan program where WPOS is used and replace with wpos_temp */
+ inst = fprog->Base.Instructions;
+ for (i = 0; i < fprog->Base.NumInstructions; i++) {
+ for (j=0; j < 3; j++) {
+ if(inst->SrcReg[j].File == PROGRAM_INPUT &&
+ inst->SrcReg[j].Index == FRAG_ATTRIB_WPOS) {
+ inst->SrcReg[j].File = PROGRAM_TEMPORARY;
+ inst->SrcReg[j].Index = wpos_temp;
+ }
+ }
+ inst++;
+ }
+
+ _mesa_insert_instructions(&(fprog->Base), 0, 1);
+
+ newInst = fprog->Base.Instructions;
+ /* invert wpos.y
+ * wpos_temp.xyzw = wpos.x-yzw + winsize.0y00 */
+ newInst[0].Opcode = OPCODE_ADD;
+ newInst[0].DstReg.File = PROGRAM_TEMPORARY;
+ newInst[0].DstReg.Index = wpos_temp;
+ newInst[0].DstReg.WriteMask = WRITEMASK_XYZW;
+
+ newInst[0].SrcReg[0].File = PROGRAM_INPUT;
+ newInst[0].SrcReg[0].Index = FRAG_ATTRIB_WPOS;
+ newInst[0].SrcReg[0].Swizzle = SWIZZLE_XYZW;
+ newInst[0].SrcReg[0].Negate = NEGATE_Y;
+
+ newInst[0].SrcReg[1].File = PROGRAM_STATE_VAR;
+ newInst[0].SrcReg[1].Index = win_size;
+ newInst[0].SrcReg[1].Swizzle = MAKE_SWIZZLE4(SWIZZLE_ZERO, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ZERO);
+
+}
+
+//TODO : Validate FP input with VP output.
+void evergreen_Map_Fragment_Program(r700_AssemblerBase *pAsm,
+ struct gl_fragment_program *mesa_fp,
+ GLcontext *ctx)
+{
+ unsigned int unBit;
+ unsigned int i;
+ GLuint ui;
+
+ /* match fp inputs with vp exports. */
+ struct evergreen_vertex_program_cont *vpc =
+ (struct evergreen_vertex_program_cont *)ctx->VertexProgram._Current;
+ GLbitfield OutputsWritten = vpc->mesa_program.Base.OutputsWritten;
+
+ pAsm->number_used_registers = 0;
+
+//Input mapping : mesa_fp->Base.InputsRead set the flag, set in
+ //The flags parsed in parse_attrib_binding. FRAG_ATTRIB_COLx, FRAG_ATTRIB_TEXx, ...
+ //MUST match order in Map_Vertex_Output
+ unBit = 1 << FRAG_ATTRIB_WPOS;
+ if(mesa_fp->Base.InputsRead & unBit)
+ {
+ pAsm->uiFP_AttributeMap[FRAG_ATTRIB_WPOS] = pAsm->number_used_registers++;
+ }
+
+ unBit = 1 << VERT_RESULT_COL0;
+ if(OutputsWritten & unBit)
+ {
+ pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL0] = pAsm->number_used_registers++;
+ }
+
+ unBit = 1 << VERT_RESULT_COL1;
+ if(OutputsWritten & unBit)
+ {
+ pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1] = pAsm->number_used_registers++;
+ }
+
+ unBit = 1 << VERT_RESULT_FOGC;
+ if(OutputsWritten & unBit)
+ {
+ pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC] = pAsm->number_used_registers++;
+ }
+
+ for(i=0; i<8; i++)
+ {
+ unBit = 1 << (VERT_RESULT_TEX0 + i);
+ if(OutputsWritten & unBit)
+ {
+ pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i] = pAsm->number_used_registers++;
+ }
+ }
+
+/* order has been taken care of */
+#if 1
+ for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
+ {
+ unBit = 1 << i;
+ if(OutputsWritten & unBit)
+ {
+ pAsm->uiFP_AttributeMap[i-VERT_RESULT_VAR0+FRAG_ATTRIB_VAR0] = pAsm->number_used_registers++;
+ }
+ }
+#else
+ if( (mesa_fp->Base.InputsRead >> FRAG_ATTRIB_VAR0) > 0 )
+ {
+ struct evergreen_vertex_program_cont *vpc =
+ (struct evergreen_vertex_program_cont *)ctx->VertexProgram._Current;
+ struct gl_program_parameter_list * VsVarying = vpc->mesa_program.Base.Varying;
+ struct gl_program_parameter_list * PsVarying = mesa_fp->Base.Varying;
+ struct gl_program_parameter * pVsParam;
+ struct gl_program_parameter * pPsParam;
+ GLuint j, k;
+ GLuint unMaxVarying = 0;
+
+ for(i=0; i<VsVarying->NumParameters; i++)
+ {
+ pAsm->uiFP_AttributeMap[i + FRAG_ATTRIB_VAR0] = 0;
+ }
+
+ for(i=FRAG_ATTRIB_VAR0; i<FRAG_ATTRIB_MAX; i++)
+ {
+ unBit = 1 << i;
+ if(mesa_fp->Base.InputsRead & unBit)
+ {
+ j = i - FRAG_ATTRIB_VAR0;
+ pPsParam = PsVarying->Parameters + j;
+
+ for(k=0; k<VsVarying->NumParameters; k++)
+ {
+ pVsParam = VsVarying->Parameters + k;
+
+ if( strcmp(pPsParam->Name, pVsParam->Name) == 0)
+ {
+ pAsm->uiFP_AttributeMap[i] = pAsm->number_used_registers + k;
+ if(k > unMaxVarying)
+ {
+ unMaxVarying = k;
+ }
+ break;
+ }
+ }
+ }
+ }
+
+ pAsm->number_used_registers += unMaxVarying + 1;
+ }
+#endif
+ unBit = 1 << FRAG_ATTRIB_FACE;
+ if(mesa_fp->Base.InputsRead & unBit)
+ {
+ pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE] = pAsm->number_used_registers++;
+ }
+
+ unBit = 1 << FRAG_ATTRIB_PNTC;
+ if(mesa_fp->Base.InputsRead & unBit)
+ {
+ pAsm->uiFP_AttributeMap[FRAG_ATTRIB_PNTC] = pAsm->number_used_registers++;
+ }
+
+ pAsm->uIIns = pAsm->number_used_registers;
+
+/* Map temporary registers (GPRs) */
+ pAsm->starting_temp_register_number = pAsm->number_used_registers;
+
+ if(mesa_fp->Base.NumNativeTemporaries >= mesa_fp->Base.NumTemporaries)
+ {
+ pAsm->number_used_registers += mesa_fp->Base.NumNativeTemporaries;
+ }
+ else
+ {
+ pAsm->number_used_registers += mesa_fp->Base.NumTemporaries;
+ }
+
+/* Output mapping */
+ pAsm->number_of_exports = 0;
+ pAsm->number_of_colorandz_exports = 0; /* don't include stencil and mask out. */
+ pAsm->starting_export_register_number = pAsm->number_used_registers;
+ unBit = 1 << FRAG_RESULT_COLOR;
+ if(mesa_fp->Base.OutputsWritten & unBit)
+ {
+ pAsm->uiFP_OutputMap[FRAG_RESULT_COLOR] = pAsm->number_used_registers++;
+ pAsm->number_of_exports++;
+ pAsm->number_of_colorandz_exports++;
+ }
+ unBit = 1 << FRAG_RESULT_DEPTH;
+ if(mesa_fp->Base.OutputsWritten & unBit)
+ {
+ pAsm->depth_export_register_number = pAsm->number_used_registers;
+ pAsm->uiFP_OutputMap[FRAG_RESULT_DEPTH] = pAsm->number_used_registers++;
+ pAsm->number_of_exports++;
+ pAsm->number_of_colorandz_exports++;
+ pAsm->pR700Shader->depthIsExported = 1;
+ }
+
+ pAsm->pucOutMask = (unsigned char*) MALLOC(pAsm->number_of_exports);
+ for(ui=0; ui<pAsm->number_of_exports; ui++)
+ {
+ pAsm->pucOutMask[ui] = 0x0;
+ }
+
+ pAsm->flag_reg_index = pAsm->number_used_registers++;
+
+ pAsm->uFirstHelpReg = pAsm->number_used_registers;
+}
+
+GLboolean evergreen_Find_Instruction_Dependencies_fp(struct evergreen_fragment_program *fp,
+ struct gl_fragment_program *mesa_fp)
+{
+ GLuint i, j;
+ GLint * puiTEMPwrites;
+ GLint * puiTEMPreads;
+ struct prog_instruction * pILInst;
+ InstDeps *pInstDeps;
+ struct prog_instruction * texcoord_DepInst;
+ GLint nDepInstID;
+
+ puiTEMPwrites = (GLint*) MALLOC(sizeof(GLuint)*mesa_fp->Base.NumTemporaries);
+ puiTEMPreads = (GLint*) MALLOC(sizeof(GLuint)*mesa_fp->Base.NumTemporaries);
+
+ for(i=0; i<mesa_fp->Base.NumTemporaries; i++)
+ {
+ puiTEMPwrites[i] = -1;
+ puiTEMPreads[i] = -1;
+ }
+
+ pInstDeps = (InstDeps*)MALLOC(sizeof(InstDeps)*mesa_fp->Base.NumInstructions);
+
+ for(i=0; i<mesa_fp->Base.NumInstructions; i++)
+ {
+ pInstDeps[i].nDstDep = -1;
+ pILInst = &(mesa_fp->Base.Instructions[i]);
+
+ //Dst
+ if(pILInst->DstReg.File == PROGRAM_TEMPORARY)
+ {
+ //Set lastwrite for the temp
+ puiTEMPwrites[pILInst->DstReg.Index] = i;
+ }
+
+ //Src
+ for(j=0; j<3; j++)
+ {
+ if(pILInst->SrcReg[j].File == PROGRAM_TEMPORARY)
+ {
+ //Set dep.
+ pInstDeps[i].nSrcDeps[j] = puiTEMPwrites[pILInst->SrcReg[j].Index];
+ //Set first read
+ if(puiTEMPreads[pILInst->SrcReg[j].Index] < 0 )
+ {
+ puiTEMPreads[pILInst->SrcReg[j].Index] = i;
+ }
+ }
+ else
+ {
+ pInstDeps[i].nSrcDeps[j] = -1;
+ }
+ }
+ }
+
+ fp->r700AsmCode.pInstDeps = pInstDeps;
+
+ //Find dep for tex inst
+ for(i=0; i<mesa_fp->Base.NumInstructions; i++)
+ {
+ pILInst = &(mesa_fp->Base.Instructions[i]);
+
+ if(GL_TRUE == IsTex(pILInst->Opcode))
+ { //src0 is the tex coord register, src1 is texunit, src2 is textype
+ nDepInstID = pInstDeps[i].nSrcDeps[0];
+ if(nDepInstID >= 0)
+ {
+ texcoord_DepInst = &(mesa_fp->Base.Instructions[nDepInstID]);
+ if(GL_TRUE == IsAlu(texcoord_DepInst->Opcode) )
+ {
+ pInstDeps[nDepInstID].nDstDep = i;
+ pInstDeps[i].nDstDep = i;
+ }
+ else if(GL_TRUE == IsTex(texcoord_DepInst->Opcode) )
+ {
+ pInstDeps[i].nDstDep = i;
+ }
+ else
+ { //... other deps?
+ }
+ }
+ // make sure that we dont overwrite src used earlier
+ nDepInstID = puiTEMPreads[pILInst->DstReg.Index];
+ if(nDepInstID < i)
+ {
+ pInstDeps[i].nDstDep = puiTEMPreads[pILInst->DstReg.Index];
+ texcoord_DepInst = &(mesa_fp->Base.Instructions[nDepInstID]);
+ if(GL_TRUE == IsAlu(texcoord_DepInst->Opcode) )
+ {
+ pInstDeps[nDepInstID].nDstDep = i;
+ }
+
+ }
+
+ }
+ }
+
+ FREE(puiTEMPwrites);
+ FREE(puiTEMPreads);
+
+ return GL_TRUE;
+}
+
+GLboolean evergreenTranslateFragmentShader(struct evergreen_fragment_program *fp,
+ struct gl_fragment_program *mesa_fp,
+ GLcontext *ctx)
+{
+ GLuint number_of_colors_exported;
+ GLboolean z_enabled = GL_FALSE;
+ GLuint unBit, shadow_unit;
+ int i;
+ struct prog_instruction *inst;
+ gl_state_index shadow_ambient[STATE_LENGTH]
+ = { STATE_INTERNAL, STATE_SHADOW_AMBIENT, 0, 0, 0};
+
+ //Init_Program
+ Init_r700_AssemblerBase( SPT_FP, &(fp->r700AsmCode), &(fp->r700Shader) );
+
+ fp->constbo0 = NULL;
+ fp->r700AsmCode.bUseMemConstant = GL_TRUE;
+ fp->r700AsmCode.unAsic = 8;
+
+ if(mesa_fp->Base.InputsRead & FRAG_BIT_WPOS)
+ {
+ evergreen_insert_wpos_code(ctx, mesa_fp);
+ }
+
+ /* add/map consts for ARB_shadow_ambient */
+ if(mesa_fp->Base.ShadowSamplers)
+ {
+ inst = mesa_fp->Base.Instructions;
+ for (i = 0; i < mesa_fp->Base.NumInstructions; i++)
+ {
+ if(inst->TexShadow == 1)
+ {
+ shadow_unit = inst->TexSrcUnit;
+ shadow_ambient[2] = shadow_unit;
+ fp->r700AsmCode.shadow_regs[shadow_unit] =
+ _mesa_add_state_reference(mesa_fp->Base.Parameters, shadow_ambient);
+ }
+ inst++;
+ }
+ }
+
+ evergreen_Map_Fragment_Program(&(fp->r700AsmCode), mesa_fp, ctx);
+
+ if( GL_FALSE == evergreen_Find_Instruction_Dependencies_fp(fp, mesa_fp) )
+ {
+ return GL_FALSE;
+ }
+
+ InitShaderProgram(&(fp->r700AsmCode));
+
+ for(i=0; i < MAX_SAMPLERS; i++)
+ {
+ fp->r700AsmCode.SamplerUnits[i] = fp->mesa_program.Base.SamplerUnits[i];
+ }
+
+ fp->r700AsmCode.unCurNumILInsts = mesa_fp->Base.NumInstructions;
+
+ if( GL_FALSE == AssembleInstr(0,
+ 0,
+ mesa_fp->Base.NumInstructions,
+ &(mesa_fp->Base.Instructions[0]),
+ &(fp->r700AsmCode)) )
+ {
+ return GL_FALSE;
+ }
+
+ if(GL_FALSE == Process_Fragment_Exports(&(fp->r700AsmCode), mesa_fp->Base.OutputsWritten) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == RelocProgram(&(fp->r700AsmCode), &(mesa_fp->Base)) )
+ {
+ return GL_FALSE;
+ }
+
+ fp->r700Shader.nRegs = (fp->r700AsmCode.number_used_registers == 0) ? 0
+ : (fp->r700AsmCode.number_used_registers - 1);
+
+ fp->r700Shader.nParamExports = fp->r700AsmCode.number_of_exports;
+
+ number_of_colors_exported = fp->r700AsmCode.number_of_colorandz_exports;
+
+ unBit = 1 << FRAG_RESULT_DEPTH;
+ if(mesa_fp->Base.OutputsWritten & unBit)
+ {
+ z_enabled = GL_TRUE;
+ number_of_colors_exported--;
+ }
+
+ /* illegal to set this to 0 */
+ if(number_of_colors_exported || z_enabled)
+ {
+ fp->r700Shader.exportMode = number_of_colors_exported << 1 | z_enabled;
+ }
+ else
+ {
+ fp->r700Shader.exportMode = (1 << 1);
+ }
+
+ fp->translated = GL_TRUE;
+
+ return GL_TRUE;
+}
+
+void evergreenSelectFragmentShader(GLcontext *ctx)
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ struct evergreen_fragment_program *fp = (struct evergreen_fragment_program *)
+ (ctx->FragmentProgram._Current);
+ if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
+ {
+ fp->r700AsmCode.bR6xx = 1;
+ }
+
+ if (GL_FALSE == fp->translated)
+ evergreenTranslateFragmentShader(fp, &(fp->mesa_program), ctx);
+}
+
+void * evergreenGetActiveFpShaderBo(GLcontext * ctx)
+{
+ struct evergreen_fragment_program *fp = (struct evergreen_fragment_program *)
+ (ctx->FragmentProgram._Current);
+
+ return fp->shaderbo;
+}
+
+void * evergreenGetActiveFpShaderConstBo(GLcontext * ctx)
+{
+ struct evergreen_fragment_program *fp = (struct evergreen_fragment_program *)
+ (ctx->FragmentProgram._Current);
+
+ return fp->constbo0;
+}
+
+GLboolean evergreenSetupFragmentProgram(GLcontext * ctx)
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+ struct evergreen_fragment_program *fp = (struct evergreen_fragment_program *)
+ (ctx->FragmentProgram._Current);
+ r700_AssemblerBase *pAsm = &(fp->r700AsmCode);
+ struct gl_fragment_program *mesa_fp = &(fp->mesa_program);
+ unsigned int ui, i;
+ unsigned int unNumOfReg;
+ unsigned int unBit;
+ GLuint exportCount;
+ GLboolean point_sprite = GL_FALSE;
+
+ if(GL_FALSE == fp->loaded)
+ {
+ if(fp->r700Shader.bNeedsAssembly == GL_TRUE)
+ {
+ Assemble( &(fp->r700Shader) );
+ }
+
+ r600EmitShader(ctx,
+ &(fp->shaderbo),
+ (GLvoid *)(fp->r700Shader.pProgram),
+ fp->r700Shader.uShaderBinaryDWORDSize,
+ "FS");
+
+ fp->loaded = GL_TRUE;
+ }
+
+ /* TODO : enable this after MemUse fixed *=
+ (context->chipobj.MemUse)(context, fp->shadercode.buf->id);
+ */
+
+ EVERGREEN_STATECHANGE(context, sq);
+
+ evergreen->SQ_PGM_RESOURCES_PS.u32All = 0;
+ SETbit(evergreen->SQ_PGM_RESOURCES_PS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit);
+
+ evergreen->ps.SQ_ALU_CONST_CACHE_PS_0.u32All = 0;
+ evergreen->ps.SQ_PGM_START_PS.u32All = 0;
+
+ EVERGREEN_STATECHANGE(context, spi);
+
+ unNumOfReg = fp->r700Shader.nRegs + 1;
+
+ ui = (evergreen->SPI_PS_IN_CONTROL_0.u32All & NUM_INTERP_mask) / (1 << NUM_INTERP_shift);
+
+ /* PS uses fragment.position */
+ if (mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS))
+ {
+ ui += 1;
+ SETfield(evergreen->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask);
+ SETfield(evergreen->SPI_PS_IN_CONTROL_0.u32All, CENTERS_ONLY, BARYC_SAMPLE_CNTL_shift, BARYC_SAMPLE_CNTL_mask);
+ SETbit(evergreen->SPI_PS_IN_CONTROL_0.u32All, POSITION_ENA_bit);
+ SETbit(evergreen->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit);
+ }
+ else
+ {
+ CLEARbit(evergreen->SPI_PS_IN_CONTROL_0.u32All, POSITION_ENA_bit);
+ CLEARbit(evergreen->SPI_INPUT_Z.u32All, PROVIDE_Z_TO_SPI_bit);
+ }
+
+ if (mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_FACE))
+ {
+ ui += 1;
+ SETfield(evergreen->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask);
+ SETbit(evergreen->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ENA_bit);
+ SETbit(evergreen->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ALL_BITS_bit);
+ SETfield(evergreen->SPI_PS_IN_CONTROL_1.u32All, pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE], FRONT_FACE_ADDR_shift, FRONT_FACE_ADDR_mask);
+ }
+ else
+ {
+ CLEARbit(evergreen->SPI_PS_IN_CONTROL_1.u32All, FRONT_FACE_ENA_bit);
+ }
+
+ /* see if we need any point_sprite replacements */
+ for (i = VERT_RESULT_TEX0; i<= VERT_RESULT_TEX7; i++)
+ {
+ if(ctx->Point.CoordReplace[i - VERT_RESULT_TEX0] == GL_TRUE)
+ point_sprite = GL_TRUE;
+ }
+
+ if ((mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_PNTC)) || point_sprite)
+ {
+ /* for FRAG_ATTRIB_PNTC we need to increase num_interp */
+ if(mesa_fp->Base.InputsRead & (1 << FRAG_ATTRIB_PNTC))
+ {
+ ui++;
+ SETfield(evergreen->SPI_PS_IN_CONTROL_0.u32All, ui, NUM_INTERP_shift, NUM_INTERP_mask);
+ }
+ SETbit(evergreen->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_ENA_bit);
+ SETfield(evergreen->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_S, PNT_SPRITE_OVRD_X_shift, PNT_SPRITE_OVRD_X_mask);
+ SETfield(evergreen->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_T, PNT_SPRITE_OVRD_Y_shift, PNT_SPRITE_OVRD_Y_mask);
+ SETfield(evergreen->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_0, PNT_SPRITE_OVRD_Z_shift, PNT_SPRITE_OVRD_Z_mask);
+ SETfield(evergreen->SPI_INTERP_CONTROL_0.u32All, SPI_PNT_SPRITE_SEL_1, PNT_SPRITE_OVRD_W_shift, PNT_SPRITE_OVRD_W_mask);
+ if(ctx->Point.SpriteOrigin == GL_LOWER_LEFT)
+ SETbit(evergreen->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_TOP_1_bit);
+ else
+ CLEARbit(evergreen->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_TOP_1_bit);
+ }
+ else
+ {
+ CLEARbit(evergreen->SPI_INTERP_CONTROL_0.u32All, PNT_SPRITE_ENA_bit);
+ }
+
+
+ ui = (unNumOfReg < ui) ? ui : unNumOfReg;
+
+ SETfield(evergreen->SQ_PGM_RESOURCES_PS.u32All, ui, NUM_GPRS_shift, NUM_GPRS_mask);
+
+ CLEARbit(evergreen->SQ_PGM_RESOURCES_PS.u32All, UNCACHED_FIRST_INST_bit);
+
+ if(fp->r700Shader.uStackSize) /* we don't use branch for now, it should be zero. */
+ {
+ SETfield(evergreen->SQ_PGM_RESOURCES_PS.u32All, fp->r700Shader.uStackSize,
+ STACK_SIZE_shift, STACK_SIZE_mask);
+ }
+
+ SETfield(evergreen->SQ_PGM_EXPORTS_PS.u32All, fp->r700Shader.exportMode,
+ EXPORT_MODE_shift, EXPORT_MODE_mask);
+
+ // emit ps input map
+ struct evergreen_vertex_program_cont *vpc =
+ (struct evergreen_vertex_program_cont *)ctx->VertexProgram._Current;
+ GLbitfield OutputsWritten = vpc->mesa_program.Base.OutputsWritten;
+
+ for(ui = 0; ui < EVERGREEN_MAX_SHADER_EXPORTS; ui++)
+ evergreen->SPI_PS_INPUT_CNTL[ui].u32All = 0;
+
+ unBit = 1 << FRAG_ATTRIB_WPOS;
+ if(mesa_fp->Base.InputsRead & unBit)
+ {
+ ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_WPOS];
+ SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
+ SETfield(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, ui,
+ SEMANTIC_shift, SEMANTIC_mask);
+ if (evergreen->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
+ SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ else
+ CLEARbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ }
+
+ unBit = 1 << VERT_RESULT_COL0;
+ if(OutputsWritten & unBit)
+ {
+ ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL0];
+ SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
+ SETfield(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, ui,
+ SEMANTIC_shift, SEMANTIC_mask);
+ if (evergreen->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
+ SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ else
+ CLEARbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ }
+
+ unBit = 1 << VERT_RESULT_COL1;
+ if(OutputsWritten & unBit)
+ {
+ ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_COL1];
+ SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
+ SETfield(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, ui,
+ SEMANTIC_shift, SEMANTIC_mask);
+ if (evergreen->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
+ SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ else
+ CLEARbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ }
+
+ unBit = 1 << VERT_RESULT_FOGC;
+ if(OutputsWritten & unBit)
+ {
+ ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FOGC];
+ SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
+ SETfield(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, ui,
+ SEMANTIC_shift, SEMANTIC_mask);
+ if (evergreen->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
+ SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ else
+ CLEARbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ }
+
+ for(i=0; i<8; i++)
+ {
+ unBit = 1 << (VERT_RESULT_TEX0 + i);
+ if(OutputsWritten & unBit)
+ {
+ ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_TEX0 + i];
+ SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
+ SETfield(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, ui,
+ SEMANTIC_shift, SEMANTIC_mask);
+ CLEARbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ /* ARB_point_sprite */
+ if(ctx->Point.CoordReplace[i] == GL_TRUE)
+ {
+ SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, PT_SPRITE_TEX_bit);
+ }
+ }
+ }
+
+ unBit = 1 << FRAG_ATTRIB_FACE;
+ if(mesa_fp->Base.InputsRead & unBit)
+ {
+ ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_FACE];
+ SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
+ SETfield(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, ui,
+ SEMANTIC_shift, SEMANTIC_mask);
+ if (evergreen->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
+ SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ else
+ CLEARbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ }
+ unBit = 1 << FRAG_ATTRIB_PNTC;
+ if(mesa_fp->Base.InputsRead & unBit)
+ {
+ ui = pAsm->uiFP_AttributeMap[FRAG_ATTRIB_PNTC];
+ SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
+ SETfield(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, ui,
+ SEMANTIC_shift, SEMANTIC_mask);
+ if (evergreen->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
+ SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ else
+ CLEARbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, PT_SPRITE_TEX_bit);
+ }
+
+
+
+
+ for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
+ {
+ unBit = 1 << i;
+ if(OutputsWritten & unBit)
+ {
+ ui = pAsm->uiFP_AttributeMap[i-VERT_RESULT_VAR0+FRAG_ATTRIB_VAR0];
+ SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, SEL_CENTROID_bit);
+ SETfield(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, ui,
+ SEMANTIC_shift, SEMANTIC_mask);
+ if (evergreen->SPI_INTERP_CONTROL_0.u32All & FLAT_SHADE_ENA_bit)
+ SETbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ else
+ CLEARbit(evergreen->SPI_PS_INPUT_CNTL[ui].u32All, FLAT_SHADE_bit);
+ }
+ }
+
+ exportCount = (evergreen->SQ_PGM_EXPORTS_PS.u32All & EXPORT_MODE_mask) / (1 << EXPORT_MODE_shift);
+
+ return GL_TRUE;
+}
+
+GLboolean evergreenSetupFPconstants(GLcontext * ctx)
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+ struct evergreen_fragment_program *fp = (struct evergreen_fragment_program *)
+ (ctx->FragmentProgram._Current);
+ r700_AssemblerBase *pAsm = &(fp->r700AsmCode);
+
+ struct gl_program_parameter_list *paramList;
+ unsigned int unNumParamData;
+ unsigned int ui;
+
+ /* sent out shader constants. */
+ paramList = fp->mesa_program.Base.Parameters;
+
+ if(NULL != paramList)
+ {
+ _mesa_load_state_parameters(ctx, paramList);
+
+ if (paramList->NumParameters > EVERGREEN_MAX_DX9_CONSTS)
+ return GL_FALSE;
+
+ EVERGREEN_STATECHANGE(context, sq);
+
+ evergreen->ps.num_consts = paramList->NumParameters;
+
+ unNumParamData = paramList->NumParameters;
+
+ for(ui=0; ui<unNumParamData; ui++) {
+ evergreen->ps.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
+ evergreen->ps.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
+ evergreen->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
+ evergreen->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
+ }
+
+ /* Load fp constants to gpu */
+ if(unNumParamData > 0)
+ {
+ radeonAllocDmaRegion(&context->radeon,
+ &context->fp_Constbo,
+ &context->fp_bo_offset,
+ 256,
+ 256);
+ r600EmitShaderConsts(ctx,
+ context->fp_Constbo,
+ context->fp_bo_offset,
+ (GLvoid *)&(evergreen->ps.consts[0][0]),
+ unNumParamData * 4 * 4);
+ }
+ } else
+ evergreen->ps.num_consts = 0;
+
+ COMPILED_SUB * pCompiledSub;
+ GLuint uj;
+ GLuint unConstOffset = evergreen->ps.num_consts;
+ for(ui=0; ui<pAsm->unNumPresub; ui++)
+ {
+ pCompiledSub = pAsm->presubs[ui].pCompiledSub;
+
+ evergreen->ps.num_consts += pCompiledSub->NumParameters;
+
+ for(uj=0; uj<pCompiledSub->NumParameters; uj++)
+ {
+ evergreen->ps.consts[uj + unConstOffset][0].f32All = pCompiledSub->ParameterValues[uj][0];
+ evergreen->ps.consts[uj + unConstOffset][1].f32All = pCompiledSub->ParameterValues[uj][1];
+ evergreen->ps.consts[uj + unConstOffset][2].f32All = pCompiledSub->ParameterValues[uj][2];
+ evergreen->ps.consts[uj + unConstOffset][3].f32All = pCompiledSub->ParameterValues[uj][3];
+ }
+ unConstOffset += pCompiledSub->NumParameters;
+ }
+} \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/evergreen_fragprog.h b/src/mesa/drivers/dri/r600/evergreen_fragprog.h
new file mode 100644
index 0000000000..0e200bf383
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/evergreen_fragprog.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#ifndef _EVERGREEN_FRAGPROG_H_
+#define _EVERGREEN_FRAGPROG_H_
+
+#include "r600_context.h"
+#include "r700_assembler.h"
+
+struct evergreen_fragment_program
+{
+ struct gl_fragment_program mesa_program;
+
+ r700_AssemblerBase r700AsmCode;
+ R700_Shader r700Shader;
+
+ GLboolean translated;
+ GLboolean loaded;
+ GLboolean error;
+
+ void * shaderbo;
+
+ GLuint k0used;
+ void * constbo0;
+
+ GLboolean WritesDepth;
+ GLuint optimization;
+};
+
+/* Internal */
+void evergreen_insert_wpos_code(GLcontext *ctx, struct gl_fragment_program *fprog);
+
+void evergreen_Map_Fragment_Program(r700_AssemblerBase *pAsm,
+ struct gl_fragment_program *mesa_fp,
+ GLcontext *ctx);
+GLboolean evergreen_Find_Instruction_Dependencies_fp(struct evergreen_fragment_program *fp,
+ struct gl_fragment_program *mesa_fp);
+
+GLboolean evergreenTranslateFragmentShader(struct evergreen_fragment_program *fp,
+ struct gl_fragment_program *mesa_vp,
+ GLcontext *ctx);
+
+/* Interface */
+extern void evergreenSelectFragmentShader(GLcontext *ctx);
+
+extern GLboolean evergreenSetupFragmentProgram(GLcontext * ctx);
+
+extern GLboolean evergreenSetupFPconstants(GLcontext * ctx);
+
+extern void * evergreenGetActiveFpShaderBo(GLcontext * ctx);
+
+extern void * evergreenGetActiveFpShaderConstBo(GLcontext * ctx);
+
+#endif /*_EVERGREEN_FRAGPROG_H_*/
diff --git a/src/mesa/drivers/dri/r600/evergreen_ioctl.c b/src/mesa/drivers/dri/r600/evergreen_ioctl.c
new file mode 100644
index 0000000000..5c1270790d
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/evergreen_ioctl.c
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#include <sched.h>
+#include <errno.h>
+
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/context.h"
+#include "main/simple_list.h"
+
+#include "radeon_common.h"
+#include "r600_context.h"
+
+#include "evergreen_ioctl.h"
+
+#include "r700_clear.h"
+
+void evergreenClear(GLcontext * ctx, GLbitfield mask)
+{
+ r700Clear(ctx, mask);
+}
+
+void evergreenInitIoctlFuncs(struct dd_function_table *functions)
+{
+ functions->Clear = evergreenClear;
+ functions->Finish = radeonFinish;
+ functions->Flush = radeonFlush;
+}
diff --git a/src/mesa/drivers/dri/r600/evergreen_ioctl.h b/src/mesa/drivers/dri/r600/evergreen_ioctl.h
new file mode 100644
index 0000000000..3c663a7083
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/evergreen_ioctl.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2008-2010 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#ifndef _EVERGREEN_IOCTL_H_
+#define _EVERGREEN_IOCTL_H_
+
+#include "r600_context.h"
+#include "radeon_drm.h"
+
+extern void evergreenClear(GLcontext * ctx, GLbitfield mask);
+extern void evergreenInitIoctlFuncs(struct dd_function_table *functions);
+
+#endif /* _EVERGREEN_IOCTL_H_ */
diff --git a/src/mesa/drivers/dri/r600/evergreen_off.h b/src/mesa/drivers/dri/r600/evergreen_off.h
new file mode 100644
index 0000000000..8c250699ec
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/evergreen_off.h
@@ -0,0 +1,881 @@
+/*
+ * Copyright (C) 2008-2010 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#ifndef _EVERGREEN_OFF_H_
+#define _EVERGREEN_OFF_H_
+
+enum
+{
+/* Registers from PA block: */
+ EG_PA_SC_SCREEN_SCISSOR_TL = 0x28030, // DIFF
+ EG_PA_SC_SCREEN_SCISSOR_BR = 0x28034, // DIFF
+ EG_PA_SC_WINDOW_OFFSET = 0x28200, // DIFF
+ EG_PA_SC_WINDOW_SCISSOR_TL = 0x28204, // DIFF
+ EG_PA_SC_WINDOW_SCISSOR_BR = 0x28208, // DIFF
+ EG_PA_SC_CLIPRECT_RULE = 0x2820C, // SAME
+ EG_PA_SC_CLIPRECT_0_TL = 0x28210, // DIFF
+ EG_PA_SC_CLIPRECT_0_BR = 0x28214, // DIFF
+ EG_PA_SC_CLIPRECT_1_TL = 0x28218, // DIFF
+ EG_PA_SC_CLIPRECT_1_BR = 0x2821C, // DIFF
+ EG_PA_SC_CLIPRECT_2_TL = 0x28220, // DIFF
+ EG_PA_SC_CLIPRECT_2_BR = 0x28224, // DIFF
+ EG_PA_SC_CLIPRECT_3_TL = 0x28228, // DIFF
+ EG_PA_SC_CLIPRECT_3_BR = 0x2822C, // DIFF
+ EG_PA_SC_EDGERULE = 0x28230, // SAME
+ EG_PA_SU_HARDWARE_SCREEN_OFFSET = 0x28234, //
+ EG_PA_SC_GENERIC_SCISSOR_TL = 0x28240, // DIFF
+ EG_PA_SC_GENERIC_SCISSOR_BR = 0x28244, // DIFF
+ EG_PA_SC_VPORT_SCISSOR_0_TL = 0x28250, // DIFF
+ EG_PA_SC_VPORT_SCISSOR_0_BR = 0x28254, // DIFF
+ EG_PA_SC_VPORT_SCISSOR_1_TL = 0x28258, // DIFF
+ EG_PA_SC_VPORT_SCISSOR_1_BR = 0x2825C, // DIFF
+ EG_PA_SC_VPORT_SCISSOR_2_TL = 0x28260, // DIFF
+ EG_PA_SC_VPORT_SCISSOR_2_BR = 0x28264, // DIFF
+ EG_PA_SC_VPORT_SCISSOR_3_TL = 0x28268, // DIFF
+ EG_PA_SC_VPORT_SCISSOR_3_BR = 0x2826C, // DIFF
+ EG_PA_SC_VPORT_SCISSOR_4_TL = 0x28270, // DIFF
+ EG_PA_SC_VPORT_SCISSOR_4_BR = 0x28274, // DIFF
+ EG_PA_SC_VPORT_SCISSOR_5_TL = 0x28278, // DIFF
+ EG_PA_SC_VPORT_SCISSOR_5_BR = 0x2827C, // DIFF
+ EG_PA_SC_VPORT_SCISSOR_6_TL = 0x28280, // DIFF
+ EG_PA_SC_VPORT_SCISSOR_6_BR = 0x28284, // DIFF
+ EG_PA_SC_VPORT_SCISSOR_7_TL = 0x28288, // DIFF
+ EG_PA_SC_VPORT_SCISSOR_7_BR = 0x2828C, // DIFF
+ EG_PA_SC_VPORT_SCISSOR_8_TL = 0x28290, // DIFF
+ EG_PA_SC_VPORT_SCISSOR_8_BR = 0x28294, // DIFF
+ EG_PA_SC_VPORT_SCISSOR_9_TL = 0x28298, // DIFF
+ EG_PA_SC_VPORT_SCISSOR_9_BR = 0x2829C, // DIFF
+ EG_PA_SC_VPORT_SCISSOR_10_TL = 0x282A0, // DIFF
+ EG_PA_SC_VPORT_SCISSOR_10_BR = 0x282A4, // DIFF
+ EG_PA_SC_VPORT_SCISSOR_11_TL = 0x282A8, // DIFF
+ EG_PA_SC_VPORT_SCISSOR_11_BR = 0x282AC, // DIFF
+ EG_PA_SC_VPORT_SCISSOR_12_TL = 0x282B0, // DIFF
+ EG_PA_SC_VPORT_SCISSOR_12_BR = 0x282B4, // DIFF
+ EG_PA_SC_VPORT_SCISSOR_13_TL = 0x282B8, // DIFF
+ EG_PA_SC_VPORT_SCISSOR_13_BR = 0x282BC, // DIFF
+ EG_PA_SC_VPORT_SCISSOR_14_TL = 0x282C0, // DIFF
+ EG_PA_SC_VPORT_SCISSOR_14_BR = 0x282C4, // DIFF
+ EG_PA_SC_VPORT_SCISSOR_15_TL = 0x282C8, // DIFF
+ EG_PA_SC_VPORT_SCISSOR_15_BR = 0x282CC, // DIFF
+ EG_PA_SC_VPORT_ZMIN_0 = 0x282D0, // SAME
+ EG_PA_SC_VPORT_ZMAX_0 = 0x282D4, // SAME
+ EG_PA_SC_VPORT_ZMIN_1 = 0x282D8, // SAME
+ EG_PA_SC_VPORT_ZMAX_1 = 0x282DC, // SAME
+ EG_PA_SC_VPORT_ZMIN_2 = 0x282E0, // SAME
+ EG_PA_SC_VPORT_ZMAX_2 = 0x282E4, // SAME
+ EG_PA_SC_VPORT_ZMIN_3 = 0x282E8, // SAME
+ EG_PA_SC_VPORT_ZMAX_3 = 0x282EC, // SAME
+ EG_PA_SC_VPORT_ZMIN_4 = 0x282F0, // SAME
+ EG_PA_SC_VPORT_ZMAX_4 = 0x282F4, // SAME
+ EG_PA_SC_VPORT_ZMIN_5 = 0x282F8, // SAME
+ EG_PA_SC_VPORT_ZMAX_5 = 0x282FC, // SAME
+ EG_PA_SC_VPORT_ZMIN_6 = 0x28300, // SAME
+ EG_PA_SC_VPORT_ZMAX_6 = 0x28304, // SAME
+ EG_PA_SC_VPORT_ZMIN_7 = 0x28308, // SAME
+ EG_PA_SC_VPORT_ZMAX_7 = 0x2830C, // SAME
+ EG_PA_SC_VPORT_ZMIN_8 = 0x28310, // SAME
+ EG_PA_SC_VPORT_ZMAX_8 = 0x28314, // SAME
+ EG_PA_SC_VPORT_ZMIN_9 = 0x28318, // SAME
+ EG_PA_SC_VPORT_ZMAX_9 = 0x2831C, // SAME
+ EG_PA_SC_VPORT_ZMIN_10 = 0x28320, // SAME
+ EG_PA_SC_VPORT_ZMAX_10 = 0x28324, // SAME
+ EG_PA_SC_VPORT_ZMIN_11 = 0x28328, // SAME
+ EG_PA_SC_VPORT_ZMAX_11 = 0x2832C, // SAME
+ EG_PA_SC_VPORT_ZMIN_12 = 0x28330, // SAME
+ EG_PA_SC_VPORT_ZMAX_12 = 0x28334, // SAME
+ EG_PA_SC_VPORT_ZMIN_13 = 0x28338, // SAME
+ EG_PA_SC_VPORT_ZMAX_13 = 0x2833C, // SAME
+ EG_PA_SC_VPORT_ZMIN_14 = 0x28340, // SAME
+ EG_PA_SC_VPORT_ZMAX_14 = 0x28344, // SAME
+ EG_PA_SC_VPORT_ZMIN_15 = 0x28348, // SAME
+ EG_PA_SC_VPORT_ZMAX_15 = 0x2834C, // SAME
+ EG_PA_CL_VPORT_XSCALE = 0x2843C, // SAME
+ EG_PA_CL_VPORT_XOFFSET = 0x28440, // SAME
+ EG_PA_CL_VPORT_YSCALE = 0x28444, // SAME
+ EG_PA_CL_VPORT_YOFFSET = 0x28448, // SAME
+ EG_PA_CL_VPORT_ZSCALE = 0x2844C, // SAME
+ EG_PA_CL_VPORT_ZOFFSET = 0x28450, // SAME
+ EG_PA_CL_VPORT_XSCALE_1 = 0x28454, // SAME
+ EG_PA_CL_VPORT_XOFFSET_1 = 0x28458, // SAME
+ EG_PA_CL_VPORT_YSCALE_1 = 0x2845C, // SAME
+ EG_PA_CL_VPORT_YOFFSET_1 = 0x28460, // SAME
+ EG_PA_CL_VPORT_ZSCALE_1 = 0x28464, // SAME
+ EG_PA_CL_VPORT_ZOFFSET_1 = 0x28468, // SAME
+ EG_PA_CL_VPORT_XSCALE_2 = 0x2846C, // SAME
+ EG_PA_CL_VPORT_XOFFSET_2 = 0x28470, // SAME
+ EG_PA_CL_VPORT_YSCALE_2 = 0x28474, // SAME
+ EG_PA_CL_VPORT_YOFFSET_2 = 0x28478, // SAME
+ EG_PA_CL_VPORT_ZSCALE_2 = 0x2847C, // SAME
+ EG_PA_CL_VPORT_ZOFFSET_2 = 0x28480, // SAME
+ EG_PA_CL_VPORT_XSCALE_3 = 0x28484, // SAME
+ EG_PA_CL_VPORT_XOFFSET_3 = 0x28488, // SAME
+ EG_PA_CL_VPORT_YSCALE_3 = 0x2848C, // SAME
+ EG_PA_CL_VPORT_YOFFSET_3 = 0x28490, // SAME
+ EG_PA_CL_VPORT_ZSCALE_3 = 0x28494, // SAME
+ EG_PA_CL_VPORT_ZOFFSET_3 = 0x28498, // SAME
+ EG_PA_CL_VPORT_XSCALE_4 = 0x2849C, // SAME
+ EG_PA_CL_VPORT_XOFFSET_4 = 0x284A0, // SAME
+ EG_PA_CL_VPORT_YSCALE_4 = 0x284A4, // SAME
+ EG_PA_CL_VPORT_YOFFSET_4 = 0x284A8, // SAME
+ EG_PA_CL_VPORT_ZSCALE_4 = 0x284AC, // SAME
+ EG_PA_CL_VPORT_ZOFFSET_4 = 0x284B0, // SAME
+ EG_PA_CL_VPORT_XSCALE_5 = 0x284B4, // SAME
+ EG_PA_CL_VPORT_XOFFSET_5 = 0x284B8, // SAME
+ EG_PA_CL_VPORT_YSCALE_5 = 0x284BC, // SAME
+ EG_PA_CL_VPORT_YOFFSET_5 = 0x284C0, // SAME
+ EG_PA_CL_VPORT_ZSCALE_5 = 0x284C4, // SAME
+ EG_PA_CL_VPORT_ZOFFSET_5 = 0x284C8, // SAME
+ EG_PA_CL_VPORT_XSCALE_6 = 0x284CC, // SAME
+ EG_PA_CL_VPORT_XOFFSET_6 = 0x284D0, // SAME
+ EG_PA_CL_VPORT_YSCALE_6 = 0x284D4, // SAME
+ EG_PA_CL_VPORT_YOFFSET_6 = 0x284D8, // SAME
+ EG_PA_CL_VPORT_ZSCALE_6 = 0x284DC, // SAME
+ EG_PA_CL_VPORT_ZOFFSET_6 = 0x284E0, // SAME
+ EG_PA_CL_VPORT_XSCALE_7 = 0x284E4, // SAME
+ EG_PA_CL_VPORT_XOFFSET_7 = 0x284E8, // SAME
+ EG_PA_CL_VPORT_YSCALE_7 = 0x284EC, // SAME
+ EG_PA_CL_VPORT_YOFFSET_7 = 0x284F0, // SAME
+ EG_PA_CL_VPORT_ZSCALE_7 = 0x284F4, // SAME
+ EG_PA_CL_VPORT_ZOFFSET_7 = 0x284F8, // SAME
+ EG_PA_CL_VPORT_XSCALE_8 = 0x284FC, // SAME
+ EG_PA_CL_VPORT_XOFFSET_8 = 0x28500, // SAME
+ EG_PA_CL_VPORT_YSCALE_8 = 0x28504, // SAME
+ EG_PA_CL_VPORT_YOFFSET_8 = 0x28508, // SAME
+ EG_PA_CL_VPORT_ZSCALE_8 = 0x2850C, // SAME
+ EG_PA_CL_VPORT_ZOFFSET_8 = 0x28510, // SAME
+ EG_PA_CL_VPORT_XSCALE_9 = 0x28514, // SAME
+ EG_PA_CL_VPORT_XOFFSET_9 = 0x28518, // SAME
+ EG_PA_CL_VPORT_YSCALE_9 = 0x2851C, // SAME
+ EG_PA_CL_VPORT_YOFFSET_9 = 0x28520, // SAME
+ EG_PA_CL_VPORT_ZSCALE_9 = 0x28524, // SAME
+ EG_PA_CL_VPORT_ZOFFSET_9 = 0x28528, // SAME
+ EG_PA_CL_VPORT_XSCALE_10 = 0x2852C, // SAME
+ EG_PA_CL_VPORT_XOFFSET_10 = 0x28530, // SAME
+ EG_PA_CL_VPORT_YSCALE_10 = 0x28534, // SAME
+ EG_PA_CL_VPORT_YOFFSET_10 = 0x28538, // SAME
+ EG_PA_CL_VPORT_ZSCALE_10 = 0x2853C, // SAME
+ EG_PA_CL_VPORT_ZOFFSET_10 = 0x28540, // SAME
+ EG_PA_CL_VPORT_XSCALE_11 = 0x28544, // SAME
+ EG_PA_CL_VPORT_XOFFSET_11 = 0x28548, // SAME
+ EG_PA_CL_VPORT_YSCALE_11 = 0x2854C, // SAME
+ EG_PA_CL_VPORT_YOFFSET_11 = 0x28550, // SAME
+ EG_PA_CL_VPORT_ZSCALE_11 = 0x28554, // SAME
+ EG_PA_CL_VPORT_ZOFFSET_11 = 0x28558, // SAME
+ EG_PA_CL_VPORT_XSCALE_12 = 0x2855C, // SAME
+ EG_PA_CL_VPORT_XOFFSET_12 = 0x28560, // SAME
+ EG_PA_CL_VPORT_YSCALE_12 = 0x28564, // SAME
+ EG_PA_CL_VPORT_YOFFSET_12 = 0x28568, // SAME
+ EG_PA_CL_VPORT_ZSCALE_12 = 0x2856C, // SAME
+ EG_PA_CL_VPORT_ZOFFSET_12 = 0x28570, // SAME
+ EG_PA_CL_VPORT_XSCALE_13 = 0x28574, // SAME
+ EG_PA_CL_VPORT_XOFFSET_13 = 0x28578, // SAME
+ EG_PA_CL_VPORT_YSCALE_13 = 0x2857C, // SAME
+ EG_PA_CL_VPORT_YOFFSET_13 = 0x28580, // SAME
+ EG_PA_CL_VPORT_ZSCALE_13 = 0x28584, // SAME
+ EG_PA_CL_VPORT_ZOFFSET_13 = 0x28588, // SAME
+ EG_PA_CL_VPORT_XSCALE_14 = 0x2858C, // SAME
+ EG_PA_CL_VPORT_XOFFSET_14 = 0x28590, // SAME
+ EG_PA_CL_VPORT_YSCALE_14 = 0x28594, // SAME
+ EG_PA_CL_VPORT_YOFFSET_14 = 0x28598, // SAME
+ EG_PA_CL_VPORT_ZSCALE_14 = 0x2859C, // SAME
+ EG_PA_CL_VPORT_ZOFFSET_14 = 0x285A0, // SAME
+ EG_PA_CL_VPORT_XSCALE_15 = 0x285A4, // SAME
+ EG_PA_CL_VPORT_XOFFSET_15 = 0x285A8, // SAME
+ EG_PA_CL_VPORT_YSCALE_15 = 0x285AC, // SAME
+ EG_PA_CL_VPORT_YOFFSET_15 = 0x285B0, // SAME
+ EG_PA_CL_VPORT_ZSCALE_15 = 0x285B4, // SAME
+ EG_PA_CL_VPORT_ZOFFSET_15 = 0x285B8, // SAME
+ EG_PA_CL_UCP_0_X = 0x285BC, // SAME 0x28E20
+ EG_PA_CL_UCP_0_Y = 0x285C0, // SAME 0x28E24
+ EG_PA_CL_UCP_0_Z = 0x285C4, // SAME 0x28E28
+ EG_PA_CL_UCP_0_W = 0x285C8, // SAME 0x28E2C
+ EG_PA_CL_UCP_1_X = 0x285CC, // SAME 0x28E30
+ EG_PA_CL_UCP_1_Y = 0x285D0, // SAME 0x28E34
+ EG_PA_CL_UCP_1_Z = 0x285D4, // SAME 0x28E38
+ EG_PA_CL_UCP_1_W = 0x285D8, // SAME 0x28E3C
+ EG_PA_CL_UCP_2_X = 0x285DC, // SAME 0x28E40
+ EG_PA_CL_UCP_2_Y = 0x285E0, // SAME 0x28E44
+ EG_PA_CL_UCP_2_Z = 0x285E4, // SAME 0x28E48
+ EG_PA_CL_UCP_2_W = 0x285E8, // SAME 0x28E4C
+ EG_PA_CL_UCP_3_X = 0x285EC, // SAME 0x28E50
+ EG_PA_CL_UCP_3_Y = 0x285F0, // SAME 0x28E54
+ EG_PA_CL_UCP_3_Z = 0x285F4, // SAME 0x28E58
+ EG_PA_CL_UCP_3_W = 0x285F8, // SAME 0x28E5C
+ EG_PA_CL_UCP_4_X = 0x285FC, // SAME 0x28E60
+ EG_PA_CL_UCP_4_Y = 0x28600, // SAME 0x28E64
+ EG_PA_CL_UCP_4_Z = 0x28604, // SAME 0x28E68
+ EG_PA_CL_UCP_4_W = 0x28608, // SAME 0x28E6C
+ EG_PA_CL_UCP_5_X = 0x2860C, // SAME 0x28E70
+ EG_PA_CL_UCP_5_Y = 0x28610, // SAME 0x28E74
+ EG_PA_CL_UCP_5_Z = 0x28614, // SAME 0x28E78
+ EG_PA_CL_UCP_5_W = 0x28618, // SAME 0x28E7C
+ EG_PA_CL_POINT_X_RAD = 0x287D4, // SAME 0x28E10
+ EG_PA_CL_POINT_Y_RAD = 0x287D8, // SAME 0x28E14
+ EG_PA_CL_POINT_SIZE = 0x287DC, // SAME 0x28E18
+ EG_PA_CL_POINT_CULL_RAD = 0x287E0, // SAME 0x28E1C
+ EG_PA_CL_CLIP_CNTL = 0x28810, // SAME
+ EG_PA_SU_SC_MODE_CNTL = 0x28814, // SAME
+ EG_PA_CL_VTE_CNTL = 0x28818, // SAME
+ EG_PA_CL_VS_OUT_CNTL = 0x2881C, // SAME
+ EG_PA_CL_NANINF_CNTL = 0x28820, // SAME
+ EG_PA_SU_LINE_STIPPLE_CNTL = 0x28824, //
+ EG_PA_SU_LINE_STIPPLE_SCALE = 0x28828, //
+ EG_PA_SU_PRIM_FILTER_CNTL = 0x2882C, //
+ EG_PA_SU_POINT_SIZE = 0x28A00, // SAME
+ EG_PA_SU_POINT_MINMAX = 0x28A04, // SAME
+ EG_PA_SU_LINE_CNTL = 0x28A08, // SAME
+ EG_PA_SC_LINE_STIPPLE = 0x28A0C, // SAME
+ EG_PA_SC_MODE_CNTL_0 = 0x28A48, //
+ EG_PA_SC_MODE_CNTL_1 = 0x28A4C, //
+ EG_PA_SU_POLY_OFFSET_DB_FMT_CNTL = 0x28B78, // SAME 0x28DF8
+ EG_PA_SU_POLY_OFFSET_CLAMP = 0x28B7C, // SAME 0x28DFC
+ EG_PA_SU_POLY_OFFSET_FRONT_SCALE = 0x28B80, // SAME 0x28E00
+ EG_PA_SU_POLY_OFFSET_FRONT_OFFSET = 0x28B84, // SAME 0x28E04
+ EG_PA_SU_POLY_OFFSET_BACK_SCALE = 0x28B88, // SAME 0x28E08
+ EG_PA_SU_POLY_OFFSET_BACK_OFFSET = 0x28B8C, // SAME 0x28E0C
+ EG_PA_SC_LINE_CNTL = 0x28C00, // DIFF
+ EG_PA_SC_AA_CONFIG = 0x28C04, // SAME
+ EG_PA_SU_VTX_CNTL = 0x28C08, // SAME
+ EG_PA_CL_GB_VERT_CLIP_ADJ = 0x28C0C, // SAME
+ EG_PA_CL_GB_VERT_DISC_ADJ = 0x28C10, // SAME
+ EG_PA_CL_GB_HORZ_CLIP_ADJ = 0x28C14, // SAME
+ EG_PA_CL_GB_HORZ_DISC_ADJ = 0x28C18, // SAME
+ EG_PA_SC_AA_SAMPLE_LOCS_0 = 0x28C1C, //
+ EG_PA_SC_AA_SAMPLE_LOCS_1 = 0x28C20, //
+ EG_PA_SC_AA_SAMPLE_LOCS_2 = 0x28C24, //
+ EG_PA_SC_AA_SAMPLE_LOCS_3 = 0x28C28, //
+ EG_PA_SC_AA_SAMPLE_LOCS_4 = 0x28C2C, //
+ EG_PA_SC_AA_SAMPLE_LOCS_5 = 0x28C30, //
+ EG_PA_SC_AA_SAMPLE_LOCS_6 = 0x28C34, //
+ EG_PA_SC_AA_SAMPLE_LOCS_7 = 0x28C38, //
+ EG_PA_SC_AA_MASK = 0x28C3C, // SAME 0x28C48
+
+/* Registers from VGT block: */
+ EG_VGT_INDEX_TYPE = 0x895C, //? config space
+ EG_VGT_PRIMITIVE_TYPE = 0x8958, //? config space
+
+ EG_VGT_MAX_VTX_INDX = 0x28400, // SAME
+ EG_VGT_MIN_VTX_INDX = 0x28404, // SAME
+ EG_VGT_INDX_OFFSET = 0x28408, // SAME
+ EG_VGT_MULTI_PRIM_IB_RESET_INDX = 0x2840C, // SAME
+ EG_CS_COPY_STATE = 0x287CC, //
+ EG_GFX_COPY_STATE = 0x287D0, // SAME
+ EG_VGT_DMA_BASE_HI = 0x287E4, // SAME
+ EG_VGT_DMA_BASE = 0x287E8, // SAME
+ EG_VGT_DRAW_INITIATOR = 0x287F0, // SAME
+ EG_VGT_IMMED_DATA = 0x287F4, // SAME
+ EG_VGT_EVENT_ADDRESS_REG = 0x287F8, // SAME
+ EG_VGT_OUTPUT_PATH_CNTL = 0x28A10, // DIFF
+ EG_VGT_HOS_CNTL = 0x28A14, // SAME
+ EG_VGT_HOS_MAX_TESS_LEVEL = 0x28A18, // SAME
+ EG_VGT_HOS_MIN_TESS_LEVEL = 0x28A1C, // SAME
+ EG_VGT_HOS_REUSE_DEPTH = 0x28A20, // SAME
+ EG_VGT_GROUP_PRIM_TYPE = 0x28A24, // SAME
+ EG_VGT_GROUP_FIRST_DECR = 0x28A28, // SAME
+ EG_VGT_GROUP_DECR = 0x28A2C, // SAME
+ EG_VGT_GROUP_VECT_0_CNTL = 0x28A30, // SAME
+ EG_VGT_GROUP_VECT_1_CNTL = 0x28A34, // SAME
+ EG_VGT_GROUP_VECT_0_FMT_CNTL = 0x28A38, // SAME
+ EG_VGT_GROUP_VECT_1_FMT_CNTL = 0x28A3C, // SAME
+ EG_VGT_GS_MODE = 0x28A40, // DIFF
+ EG_VGT_ENHANCE = 0x28A50, // DIFF
+ EG_VGT_GS_PER_ES = 0x28A54, // DIFF 0x88C8
+ EG_VGT_ES_PER_GS = 0x28A58, // DIFF 0x88CC
+ EG_VGT_GS_PER_VS = 0x28A5C, // SAME 0x88E8
+ EG_VGT_GS_OUT_PRIM_TYPE = 0x28A6C, // SAME
+ EG_VGT_DMA_SIZE = 0x28A74, // SAME
+ EG_VGT_DMA_MAX_SIZE = 0x28A78, // SAME
+ EG_VGT_DMA_INDEX_TYPE = 0x28A7C, // SAME
+ EG_VGT_PRIMITIVEID_EN = 0x28A84, // SAME
+ EG_VGT_DMA_NUM_INSTANCES = 0x28A88, // SAME
+ EG_VGT_EVENT_INITIATOR = 0x28A90, // SAME
+ EG_VGT_MULTI_PRIM_IB_RESET_EN = 0x28A94, // SAME
+ EG_VGT_INSTANCE_STEP_RATE_0 = 0x28AA0, // SAME
+ EG_VGT_INSTANCE_STEP_RATE_1 = 0x28AA4, // SAME
+ EG_VGT_REUSE_OFF = 0x28AB4, // SAME
+ EG_VGT_VTX_CNT_EN = 0x28AB8, // SAME
+ EG_VGT_STRMOUT_BUFFER_SIZE_0 = 0x28AD0, // SAME
+ EG_VGT_STRMOUT_VTX_STRIDE_0 = 0x28AD4, // SAME
+ EG_VGT_STRMOUT_BUFFER_BASE_0 = 0x28AD8, // SAME
+ EG_VGT_STRMOUT_BUFFER_OFFSET_0 = 0x28ADC, // SAME
+ EG_VGT_STRMOUT_BUFFER_SIZE_1 = 0x28AE0, // SAME
+ EG_VGT_STRMOUT_VTX_STRIDE_1 = 0x28AE4, // SAME
+ EG_VGT_STRMOUT_BUFFER_BASE_1 = 0x28AE8, // SAME
+ EG_VGT_STRMOUT_BUFFER_OFFSET_1 = 0x28AEC, // SAME
+ EG_VGT_STRMOUT_BUFFER_SIZE_2 = 0x28AF0, // SAME
+ EG_VGT_STRMOUT_VTX_STRIDE_2 = 0x28AF4, // SAME
+ EG_VGT_STRMOUT_BUFFER_BASE_2 = 0x28AF8, // SAME
+ EG_VGT_STRMOUT_BUFFER_OFFSET_2 = 0x28AFC, // SAME
+ EG_VGT_STRMOUT_BUFFER_SIZE_3 = 0x28B00, // SAME
+ EG_VGT_STRMOUT_VTX_STRIDE_3 = 0x28B04, // SAME
+ EG_VGT_STRMOUT_BUFFER_BASE_3 = 0x28B08, // SAME
+ EG_VGT_STRMOUT_BUFFER_OFFSET_3 = 0x28B0C, // SAME
+ EG_VGT_STRMOUT_BASE_OFFSET_0 = 0x28B10, // SAME
+ EG_VGT_STRMOUT_BASE_OFFSET_1 = 0x28B14, // SAME
+ EG_VGT_STRMOUT_BASE_OFFSET_2 = 0x28B18, // SAME
+ EG_VGT_STRMOUT_BASE_OFFSET_3 = 0x28B1C, // SAME
+ EG_VGT_STRMOUT_DRAW_OPAQUE_OFFSET = 0x28B28, // SAME
+ EG_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE = 0x28B2C, // SAME
+ EG_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE = 0x28B30, // DIFF
+ EG_VGT_GS_MAX_VERT_OUT = 0x28B38, // SAME
+ EG_VGT_STRMOUT_BASE_OFFSET_HI_0 = 0x28B44, // SAME
+ EG_VGT_STRMOUT_BASE_OFFSET_HI_1 = 0x28B48, // SAME
+ EG_VGT_STRMOUT_BASE_OFFSET_HI_2 = 0x28B4C, // SAME
+ EG_VGT_STRMOUT_BASE_OFFSET_HI_3 = 0x28B50, // SAME
+ EG_VGT_SHADER_STAGES_EN = 0x28B54, //
+ EG_VGT_LS_HS_CONFIG = 0x28B58, //
+ EG_VGT_LS_SIZE = 0x28B5C, //
+ EG_VGT_HS_SIZE = 0x28B60, //
+ EG_VGT_LS_HS_ALLOC = 0x28B64, //
+ EG_VGT_HS_PATCH_CONST = 0x28B68, //
+ EG_VGT_TF_PARAM = 0x28B6C, //
+ EG_VGT_DISPATCH_INITIATOR = 0x28B74, //
+ EG_VGT_GS_INSTANCE_CNT = 0x28B90, //
+ EG_VGT_STRMOUT_CONFIG = 0x28B94, //
+ EG_VGT_STRMOUT_BUFFER_CONFIG = 0x28B98, //
+ EG_VGT_VERTEX_REUSE_BLOCK_CNTL = 0x28C58, // SAME
+ EG_VGT_OUT_DEALLOC_CNTL = 0x28C5C, // SAME
+
+/* Registers from TP block: */
+ EG_GDS_ADDR_BASE = 0x28720, //
+ EG_GDS_ADDR_SIZE = 0x28724, //
+ EG_GDS_ORDERED_WAVE_PER_SE = 0x28728, //
+ EG_GDS_APPEND_CONSUME_UAV0 = 0x2872C, //
+ EG_GDS_APPEND_CONSUME_UAV1 = 0x28730, //
+ EG_GDS_APPEND_CONSUME_UAV2 = 0x28734, //
+ EG_GDS_APPEND_CONSUME_UAV3 = 0x28738, //
+ EG_GDS_APPEND_CONSUME_UAV4 = 0x2873C, //
+ EG_GDS_APPEND_CONSUME_UAV5 = 0x28740, //
+ EG_GDS_APPEND_CONSUME_UAV6 = 0x28744, //
+ EG_GDS_APPEND_CONSUME_UAV7 = 0x28748, //
+ EG_GDS_APPEND_CONSUME_UAV8 = 0x2874C, //
+ EG_GDS_APPEND_CONSUME_UAV9 = 0x28750, //
+ EG_GDS_APPEND_CONSUME_UAV10 = 0x28754, //
+ EG_GDS_APPEND_CONSUME_UAV11 = 0x28758, //
+
+/* Registers from SQ block: */
+ EG_SQ_LOOP_CONST_0 = 0x3A200, // 0x3E200
+ EG_SQ_ALU_CONST_BUFFER_SIZE_VS_0 = 0x28180, // ?
+ EG_SQ_VTX_SEMANTIC_0 = 0x28380, // SAME
+ EG_SQ_VTX_SEMANTIC_1 = 0x28384, // SAME
+ EG_SQ_VTX_SEMANTIC_2 = 0x28388, // SAME
+ EG_SQ_VTX_SEMANTIC_3 = 0x2838C, // SAME
+ EG_SQ_VTX_SEMANTIC_4 = 0x28390, // SAME
+ EG_SQ_VTX_SEMANTIC_5 = 0x28394, // SAME
+ EG_SQ_VTX_SEMANTIC_6 = 0x28398, // SAME
+ EG_SQ_VTX_SEMANTIC_7 = 0x2839C, // SAME
+ EG_SQ_VTX_SEMANTIC_8 = 0x283A0, // SAME
+ EG_SQ_VTX_SEMANTIC_9 = 0x283A4, // SAME
+ EG_SQ_VTX_SEMANTIC_10 = 0x283A8, // SAME
+ EG_SQ_VTX_SEMANTIC_11 = 0x283AC, // SAME
+ EG_SQ_VTX_SEMANTIC_12 = 0x283B0, // SAME
+ EG_SQ_VTX_SEMANTIC_13 = 0x283B4, // SAME
+ EG_SQ_VTX_SEMANTIC_14 = 0x283B8, // SAME
+ EG_SQ_VTX_SEMANTIC_15 = 0x283BC, // SAME
+ EG_SQ_VTX_SEMANTIC_16 = 0x283C0, // SAME
+ EG_SQ_VTX_SEMANTIC_17 = 0x283C4, // SAME
+ EG_SQ_VTX_SEMANTIC_18 = 0x283C8, // SAME
+ EG_SQ_VTX_SEMANTIC_19 = 0x283CC, // SAME
+ EG_SQ_VTX_SEMANTIC_20 = 0x283D0, // SAME
+ EG_SQ_VTX_SEMANTIC_21 = 0x283D4, // SAME
+ EG_SQ_VTX_SEMANTIC_22 = 0x283D8, // SAME
+ EG_SQ_VTX_SEMANTIC_23 = 0x283DC, // SAME
+ EG_SQ_VTX_SEMANTIC_24 = 0x283E0, // SAME
+ EG_SQ_VTX_SEMANTIC_25 = 0x283E4, // SAME
+ EG_SQ_VTX_SEMANTIC_26 = 0x283E8, // SAME
+ EG_SQ_VTX_SEMANTIC_27 = 0x283EC, // SAME
+ EG_SQ_VTX_SEMANTIC_28 = 0x283F0, // SAME
+ EG_SQ_VTX_SEMANTIC_29 = 0x283F4, // SAME
+ EG_SQ_VTX_SEMANTIC_30 = 0x283F8, // SAME
+ EG_SQ_VTX_SEMANTIC_31 = 0x283FC, // SAME
+ EG_SQ_LSTMP_RING_ITEMSIZE = 0x28830, //
+ EG_SQ_HSTMP_RING_ITEMSIZE = 0x28834, //
+ EG_SQ_DYN_GPR_RESOURCE_LIMIT_1 = 0x28838, //
+ EG_SQ_PGM_START_PS = 0x28840, // SAME
+ EG_SQ_PGM_RESOURCES_PS = 0x28844, // DIFF 0x28850
+ EG_SQ_PGM_RESOURCES_2_PS = 0x28848, //
+ EG_SQ_PGM_EXPORTS_PS = 0x2884C, // SAME 0x28854
+ EG_SQ_PGM_START_VS = 0x2885C, // SAME 0x28858
+ EG_SQ_PGM_RESOURCES_VS = 0x28860, // DIFF 0x28868
+ EG_SQ_PGM_RESOURCES_2_VS = 0x28864, //
+ EG_SQ_PGM_START_GS = 0x28874, // SAME 0x2886C
+ EG_SQ_PGM_RESOURCES_GS = 0x28878, // DIFF 0x2887C
+ EG_SQ_PGM_RESOURCES_2_GS = 0x2887C, //
+ EG_SQ_PGM_START_ES = 0x2888C, // SAME 0x28880
+ EG_SQ_PGM_RESOURCES_ES = 0x28890, // DIFF
+ EG_SQ_PGM_RESOURCES_2_ES = 0x28894, //
+ EG_SQ_PGM_START_FS = 0x288A4, // SAME 0x28894
+ EG_SQ_PGM_RESOURCES_FS = 0x288A8, // DIFF 0x288A4
+ EG_SQ_PGM_START_HS = 0x288B8, //
+ EG_SQ_PGM_RESOURCES_HS = 0x288BC, //
+ EG_SQ_PGM_RESOURCES_2_HS = 0x288C0, //
+ EG_SQ_PGM_START_LS = 0x288D0, //
+ EG_SQ_PGM_RESOURCES_LS = 0x288D4, //
+ EG_SQ_PGM_RESOURCES_2_LS = 0x288D8, //
+ EG_SQ_THREAD_TRACE_USERDATA = 0x288DC, //
+ EG_SQ_LDS_ALLOC = 0x288E8, //
+ EG_SQ_LDS_ALLOC_PS = 0x288EC, //
+ EG_SQ_VTX_SEMANTIC_CLEAR = 0x288F0, // SAME 0x288E0
+ EG_SQ_THREAD_TRACE_CTRL = 0x288F8, //
+ EG_SQ_ESGS_RING_ITEMSIZE = 0x28900, // SAME 0x288A8
+ EG_SQ_GSVS_RING_ITEMSIZE = 0x28904, // SAME 0x288AC
+ EG_SQ_ESTMP_RING_ITEMSIZE = 0x28908, // SAME 0x288B0
+ EG_SQ_GSTMP_RING_ITEMSIZE = 0x2890C, // SAME 0x288B4
+ EG_SQ_VSTMP_RING_ITEMSIZE = 0x28910, // SAME 0x288B8
+ EG_SQ_PSTMP_RING_ITEMSIZE = 0x28914, // SAME 0x288BC
+ EG_SQ_GS_VERT_ITEMSIZE = 0x2891C, // SAME 0x288C8
+ EG_SQ_GS_VERT_ITEMSIZE_1 = 0x28920, //
+ EG_SQ_GS_VERT_ITEMSIZE_2 = 0x28924, //
+ EG_SQ_GS_VERT_ITEMSIZE_3 = 0x28928, //
+ EG_SQ_GSVS_RING_OFFSET_1 = 0x2892C, //
+ EG_SQ_GSVS_RING_OFFSET_2 = 0x28930, //
+ EG_SQ_GSVS_RING_OFFSET_3 = 0x28934, //
+ EG_SQ_ALU_CONST_CACHE_PS_0 = 0x28940, // SAME
+ EG_SQ_ALU_CONST_CACHE_PS_1 = 0x28944, // SAME
+ EG_SQ_ALU_CONST_CACHE_PS_2 = 0x28948, // SAME
+ EG_SQ_ALU_CONST_CACHE_PS_3 = 0x2894C, // SAME
+ EG_SQ_ALU_CONST_CACHE_PS_4 = 0x28950, // SAME
+ EG_SQ_ALU_CONST_CACHE_PS_5 = 0x28954, // SAME
+ EG_SQ_ALU_CONST_CACHE_PS_6 = 0x28958, // SAME
+ EG_SQ_ALU_CONST_CACHE_PS_7 = 0x2895C, // SAME
+ EG_SQ_ALU_CONST_CACHE_PS_8 = 0x28960, // SAME
+ EG_SQ_ALU_CONST_CACHE_PS_9 = 0x28964, // SAME
+ EG_SQ_ALU_CONST_CACHE_PS_10 = 0x28968, // SAME
+ EG_SQ_ALU_CONST_CACHE_PS_11 = 0x2896C, // SAME
+ EG_SQ_ALU_CONST_CACHE_PS_12 = 0x28970, // SAME
+ EG_SQ_ALU_CONST_CACHE_PS_13 = 0x28974, // SAME
+ EG_SQ_ALU_CONST_CACHE_PS_14 = 0x28978, // SAME
+ EG_SQ_ALU_CONST_CACHE_PS_15 = 0x2897C, // SAME
+ EG_SQ_ALU_CONST_CACHE_VS_0 = 0x28980, // SAME
+ EG_SQ_ALU_CONST_CACHE_VS_1 = 0x28984, // SAME
+ EG_SQ_ALU_CONST_CACHE_VS_2 = 0x28988, // SAME
+ EG_SQ_ALU_CONST_CACHE_VS_3 = 0x2898C, // SAME
+ EG_SQ_ALU_CONST_CACHE_VS_4 = 0x28990, // SAME
+ EG_SQ_ALU_CONST_CACHE_VS_5 = 0x28994, // SAME
+ EG_SQ_ALU_CONST_CACHE_VS_6 = 0x28998, // SAME
+ EG_SQ_ALU_CONST_CACHE_VS_7 = 0x2899C, // SAME
+ EG_SQ_ALU_CONST_CACHE_VS_8 = 0x289A0, // SAME
+ EG_SQ_ALU_CONST_CACHE_VS_9 = 0x289A4, // SAME
+ EG_SQ_ALU_CONST_CACHE_VS_10 = 0x289A8, // SAME
+ EG_SQ_ALU_CONST_CACHE_VS_11 = 0x289AC, // SAME
+ EG_SQ_ALU_CONST_CACHE_VS_12 = 0x289B0, // SAME
+ EG_SQ_ALU_CONST_CACHE_VS_13 = 0x289B4, // SAME
+ EG_SQ_ALU_CONST_CACHE_VS_14 = 0x289B8, // SAME
+ EG_SQ_ALU_CONST_CACHE_VS_15 = 0x289BC, // SAME
+ EG_SQ_ALU_CONST_CACHE_GS_0 = 0x289C0, // SAME
+ EG_SQ_ALU_CONST_CACHE_GS_1 = 0x289C4, // SAME
+ EG_SQ_ALU_CONST_CACHE_GS_2 = 0x289C8, // SAME
+ EG_SQ_ALU_CONST_CACHE_GS_3 = 0x289CC, // SAME
+ EG_SQ_ALU_CONST_CACHE_GS_4 = 0x289D0, // SAME
+ EG_SQ_ALU_CONST_CACHE_GS_5 = 0x289D4, // SAME
+ EG_SQ_ALU_CONST_CACHE_GS_6 = 0x289D8, // SAME
+ EG_SQ_ALU_CONST_CACHE_GS_7 = 0x289DC, // SAME
+ EG_SQ_ALU_CONST_CACHE_GS_8 = 0x289E0, // SAME
+ EG_SQ_ALU_CONST_CACHE_GS_9 = 0x289E4, // SAME
+ EG_SQ_ALU_CONST_CACHE_GS_10 = 0x289E8, // SAME
+ EG_SQ_ALU_CONST_CACHE_GS_11 = 0x289EC, // SAME
+ EG_SQ_ALU_CONST_CACHE_GS_12 = 0x289F0, // SAME
+ EG_SQ_ALU_CONST_CACHE_GS_13 = 0x289F4, // SAME
+ EG_SQ_ALU_CONST_CACHE_GS_14 = 0x289F8, // SAME
+ EG_SQ_ALU_CONST_CACHE_GS_15 = 0x289FC, // SAME
+ EG_SQ_ALU_CONST_CACHE_HS_0 = 0x28F00, //
+ EG_SQ_ALU_CONST_CACHE_HS_1 = 0x28F04, //
+ EG_SQ_ALU_CONST_CACHE_HS_2 = 0x28F08, //
+ EG_SQ_ALU_CONST_CACHE_HS_3 = 0x28F0C, //
+ EG_SQ_ALU_CONST_CACHE_HS_4 = 0x28F10, //
+ EG_SQ_ALU_CONST_CACHE_HS_5 = 0x28F14, //
+ EG_SQ_ALU_CONST_CACHE_HS_6 = 0x28F18, //
+ EG_SQ_ALU_CONST_CACHE_HS_7 = 0x28F1C, //
+ EG_SQ_ALU_CONST_CACHE_HS_8 = 0x28F20, //
+ EG_SQ_ALU_CONST_CACHE_HS_9 = 0x28F24, //
+ EG_SQ_ALU_CONST_CACHE_HS_10 = 0x28F28, //
+ EG_SQ_ALU_CONST_CACHE_HS_11 = 0x28F2C, //
+ EG_SQ_ALU_CONST_CACHE_HS_12 = 0x28F30, //
+ EG_SQ_ALU_CONST_CACHE_HS_13 = 0x28F34, //
+ EG_SQ_ALU_CONST_CACHE_HS_14 = 0x28F38, //
+ EG_SQ_ALU_CONST_CACHE_HS_15 = 0x28F3C, //
+ EG_SQ_ALU_CONST_CACHE_LS_0 = 0x28F40, //
+ EG_SQ_ALU_CONST_CACHE_LS_1 = 0x28F44, //
+ EG_SQ_ALU_CONST_CACHE_LS_2 = 0x28F48, //
+ EG_SQ_ALU_CONST_CACHE_LS_3 = 0x28F4C, //
+ EG_SQ_ALU_CONST_CACHE_LS_4 = 0x28F50, //
+ EG_SQ_ALU_CONST_CACHE_LS_5 = 0x28F54, //
+ EG_SQ_ALU_CONST_CACHE_LS_6 = 0x28F58, //
+ EG_SQ_ALU_CONST_CACHE_LS_7 = 0x28F5C, //
+ EG_SQ_ALU_CONST_CACHE_LS_8 = 0x28F60, //
+ EG_SQ_ALU_CONST_CACHE_LS_9 = 0x28F64, //
+ EG_SQ_ALU_CONST_CACHE_LS_10 = 0x28F68, //
+ EG_SQ_ALU_CONST_CACHE_LS_11 = 0x28F6C, //
+ EG_SQ_ALU_CONST_CACHE_LS_12 = 0x28F70, //
+ EG_SQ_ALU_CONST_CACHE_LS_13 = 0x28F74, //
+ EG_SQ_ALU_CONST_CACHE_LS_14 = 0x28F78, //
+ EG_SQ_ALU_CONST_CACHE_LS_15 = 0x28F7C, //
+ EG_SQ_ALU_CONST_BUFFER_SIZE_PS_0 = 0x28140,
+ EG_SQ_ALU_CONST_BUFFER_SIZE_HS_0 = 0x28F80, //
+ EG_SQ_ALU_CONST_BUFFER_SIZE_HS_1 = 0x28F84, //
+ EG_SQ_ALU_CONST_BUFFER_SIZE_HS_2 = 0x28F88, //
+ EG_SQ_ALU_CONST_BUFFER_SIZE_HS_3 = 0x28F8C, //
+ EG_SQ_ALU_CONST_BUFFER_SIZE_HS_4 = 0x28F90, //
+ EG_SQ_ALU_CONST_BUFFER_SIZE_HS_5 = 0x28F94, //
+ EG_SQ_ALU_CONST_BUFFER_SIZE_HS_6 = 0x28F98, //
+ EG_SQ_ALU_CONST_BUFFER_SIZE_HS_7 = 0x28F9C, //
+ EG_SQ_ALU_CONST_BUFFER_SIZE_HS_8 = 0x28FA0, //
+ EG_SQ_ALU_CONST_BUFFER_SIZE_HS_9 = 0x28FA4, //
+ EG_SQ_ALU_CONST_BUFFER_SIZE_HS_10 = 0x28FA8, //
+ EG_SQ_ALU_CONST_BUFFER_SIZE_HS_11 = 0x28FAC, //
+ EG_SQ_ALU_CONST_BUFFER_SIZE_HS_12 = 0x28FB0, //
+ EG_SQ_ALU_CONST_BUFFER_SIZE_HS_13 = 0x28FB4, //
+ EG_SQ_ALU_CONST_BUFFER_SIZE_HS_14 = 0x28FB8, //
+ EG_SQ_ALU_CONST_BUFFER_SIZE_HS_15 = 0x28FBC, //
+ EG_SQ_ALU_CONST_BUFFER_SIZE_LS_0 = 0x28FC0, //
+ EG_SQ_ALU_CONST_BUFFER_SIZE_LS_1 = 0x28FC4, //
+ EG_SQ_ALU_CONST_BUFFER_SIZE_LS_2 = 0x28FC8, //
+ EG_SQ_ALU_CONST_BUFFER_SIZE_LS_3 = 0x28FCC, //
+ EG_SQ_ALU_CONST_BUFFER_SIZE_LS_4 = 0x28FD0, //
+ EG_SQ_ALU_CONST_BUFFER_SIZE_LS_5 = 0x28FD4, //
+ EG_SQ_ALU_CONST_BUFFER_SIZE_LS_6 = 0x28FD8, //
+ EG_SQ_ALU_CONST_BUFFER_SIZE_LS_7 = 0x28FDC, //
+ EG_SQ_ALU_CONST_BUFFER_SIZE_LS_8 = 0x28FE0, //
+ EG_SQ_ALU_CONST_BUFFER_SIZE_LS_9 = 0x28FE4, //
+ EG_SQ_ALU_CONST_BUFFER_SIZE_LS_10 = 0x28FE8, //
+ EG_SQ_ALU_CONST_BUFFER_SIZE_LS_11 = 0x28FEC, //
+ EG_SQ_ALU_CONST_BUFFER_SIZE_LS_12 = 0x28FF0, //
+ EG_SQ_ALU_CONST_BUFFER_SIZE_LS_13 = 0x28FF4, //
+ EG_SQ_ALU_CONST_BUFFER_SIZE_LS_14 = 0x28FF8, //
+ EG_SQ_ALU_CONST_BUFFER_SIZE_LS_15 = 0x28FFC, //
+
+/* Registers from SPI block: */
+ EG_SPI_VS_OUT_ID_0 = 0x2861C, // SAME 0x28614
+ EG_SPI_VS_OUT_ID_1 = 0x28620, // SAME 0x28618
+ EG_SPI_VS_OUT_ID_2 = 0x28624, // SAME 0x2861C
+ EG_SPI_VS_OUT_ID_3 = 0x28628, // SAME 0x28620
+ EG_SPI_VS_OUT_ID_4 = 0x2862C, // SAME 0x28624
+ EG_SPI_VS_OUT_ID_5 = 0x28630, // SAME 0x28628
+ EG_SPI_VS_OUT_ID_6 = 0x28634, // SAME 0x2862C
+ EG_SPI_VS_OUT_ID_7 = 0x28638, // SAME 0x28630
+ EG_SPI_VS_OUT_ID_8 = 0x2863C, // SAME 0x28634
+ EG_SPI_VS_OUT_ID_9 = 0x28640, // SAME 0x28638
+ EG_SPI_PS_INPUT_CNTL_0 = 0x28644, // SAME
+ EG_SPI_PS_INPUT_CNTL_1 = 0x28648, // SAME
+ EG_SPI_PS_INPUT_CNTL_2 = 0x2864C, // SAME
+ EG_SPI_PS_INPUT_CNTL_3 = 0x28650, // SAME
+ EG_SPI_PS_INPUT_CNTL_4 = 0x28654, // SAME
+ EG_SPI_PS_INPUT_CNTL_5 = 0x28658, // SAME
+ EG_SPI_PS_INPUT_CNTL_6 = 0x2865C, // SAME
+ EG_SPI_PS_INPUT_CNTL_7 = 0x28660, // SAME
+ EG_SPI_PS_INPUT_CNTL_8 = 0x28664, // SAME
+ EG_SPI_PS_INPUT_CNTL_9 = 0x28668, // SAME
+ EG_SPI_PS_INPUT_CNTL_10 = 0x2866C, // SAME
+ EG_SPI_PS_INPUT_CNTL_11 = 0x28670, // SAME
+ EG_SPI_PS_INPUT_CNTL_12 = 0x28674, // SAME
+ EG_SPI_PS_INPUT_CNTL_13 = 0x28678, // SAME
+ EG_SPI_PS_INPUT_CNTL_14 = 0x2867C, // SAME
+ EG_SPI_PS_INPUT_CNTL_15 = 0x28680, // SAME
+ EG_SPI_PS_INPUT_CNTL_16 = 0x28684, // SAME
+ EG_SPI_PS_INPUT_CNTL_17 = 0x28688, // SAME
+ EG_SPI_PS_INPUT_CNTL_18 = 0x2868C, // SAME
+ EG_SPI_PS_INPUT_CNTL_19 = 0x28690, // SAME
+ EG_SPI_PS_INPUT_CNTL_20 = 0x28694, // SAME
+ EG_SPI_PS_INPUT_CNTL_21 = 0x28698, // SAME
+ EG_SPI_PS_INPUT_CNTL_22 = 0x2869C, // SAME
+ EG_SPI_PS_INPUT_CNTL_23 = 0x286A0, // SAME
+ EG_SPI_PS_INPUT_CNTL_24 = 0x286A4, // SAME
+ EG_SPI_PS_INPUT_CNTL_25 = 0x286A8, // SAME
+ EG_SPI_PS_INPUT_CNTL_26 = 0x286AC, // SAME
+ EG_SPI_PS_INPUT_CNTL_27 = 0x286B0, // SAME
+ EG_SPI_PS_INPUT_CNTL_28 = 0x286B4, // SAME
+ EG_SPI_PS_INPUT_CNTL_29 = 0x286B8, // SAME
+ EG_SPI_PS_INPUT_CNTL_30 = 0x286BC, // SAME
+ EG_SPI_PS_INPUT_CNTL_31 = 0x286C0, // SAME
+ EG_SPI_VS_OUT_CONFIG = 0x286C4, // SAME
+ EG_SPI_THREAD_GROUPING = 0x286C8, // DIFF
+ EG_SPI_PS_IN_CONTROL_0 = 0x286CC, // SAME
+ EG_SPI_PS_IN_CONTROL_1 = 0x286D0, // SAME
+ EG_SPI_INTERP_CONTROL_0 = 0x286D4, // SAME
+ EG_SPI_INPUT_Z = 0x286D8, // SAME
+ EG_SPI_FOG_CNTL = 0x286DC, // SAME
+ EG_SPI_BARYC_CNTL = 0x286E0, //
+ EG_SPI_PS_IN_CONTROL_2 = 0x286E4, //
+ EG_SPI_COMPUTE_INPUT_CNTL = 0x286E8, //
+ EG_SPI_COMPUTE_NUM_THREAD_X = 0x286EC, //
+ EG_SPI_COMPUTE_NUM_THREAD_Y = 0x286F0, //
+ EG_SPI_COMPUTE_NUM_THREAD_Z = 0x286F4, //
+
+/* Registers from SX block: */
+ EG_SX_MISC = 0x28350, // SAME
+ EG_SX_SURFACE_SYNC = 0x28354, // DIFF
+ EG_SX_ALPHA_TEST_CONTROL = 0x28410, // SAME
+ EG_SX_ALPHA_REF = 0x28438, // SAME
+
+/* Registers from DB block: */
+ EG_DB_RENDER_CONTROL = 0x28000, // DIFF 0x28D0C
+ EG_DB_COUNT_CONTROL = 0x28004, //
+ EG_DB_DEPTH_VIEW = 0x28008, // DIFF 0x28004
+ EG_DB_RENDER_OVERRIDE = 0x2800C, // DIFF 0x28D10
+ EG_DB_RENDER_OVERRIDE2 = 0x28010, //
+ EG_DB_HTILE_DATA_BASE = 0x28014, // SAME
+
+ EG_DB_STENCIL_CLEAR = 0x28028, // SAME
+ EG_DB_DEPTH_CLEAR = 0x2802C, // SAME
+
+ EG_DB_Z_INFO = 0x28040, //
+ EG_DB_STENCIL_INFO = 0x28044, //
+ EG_DB_Z_READ_BASE = 0x28048, //
+ EG_DB_STENCIL_READ_BASE = 0x2804C, //
+ EG_DB_Z_WRITE_BASE = 0x28050, //
+ EG_DB_STENCIL_WRITE_BASE = 0x28054, //
+ EG_DB_DEPTH_SIZE = 0x28058, // DIFF 0x28000
+ EG_DB_DEPTH_SLICE = 0x2805C, //
+
+ EG_DB_STENCILREFMASK = 0x28430, // SAME
+ EG_DB_STENCILREFMASK_BF = 0x28434, // SAME
+ EG_DB_DEPTH_CONTROL = 0x28800, // SAME
+ EG_DB_SHADER_CONTROL = 0x2880C, // DIFF
+ EG_DB_HTILE_SURFACE = 0x28ABC, // SAME 0x28D24
+ EG_DB_SRESULTS_COMPARE_STATE0 = 0x28AC0, // SAME 0x28D28
+ EG_DB_SRESULTS_COMPARE_STATE1 = 0x28AC4, // SAME 0x28D2C
+ EG_DB_PRELOAD_CONTROL = 0x28AC8, // SAME 0x28D30
+ EG_DB_ALPHA_TO_MASK = 0x28B70, // SAME 0x28D44
+
+/* Registers from CB block: */
+ EG_CB_TARGET_MASK = 0x28238, // SAME
+ EG_CB_SHADER_MASK = 0x2823C, // SAME
+ EG_CB_BLEND_RED = 0x28414, // SAME
+ EG_CB_BLEND_GREEN = 0x28418, // SAME
+ EG_CB_BLEND_BLUE = 0x2841C, // SAME
+ EG_CB_BLEND_ALPHA = 0x28420, // SAME
+ EG_CB_BLEND0_CONTROL = 0x28780, // DIFF
+ EG_CB_BLEND1_CONTROL = 0x28784, // DIFF
+ EG_CB_BLEND2_CONTROL = 0x28788, // DIFF
+ EG_CB_BLEND3_CONTROL = 0x2878C, // DIFF
+ EG_CB_BLEND4_CONTROL = 0x28790, // DIFF
+ EG_CB_BLEND5_CONTROL = 0x28794, // DIFF
+ EG_CB_BLEND6_CONTROL = 0x28798, // DIFF
+ EG_CB_BLEND7_CONTROL = 0x2879C, // DIFF
+ EG_CB_COLOR_CONTROL = 0x28808, // DIFF
+ EG_CB_IMMED0_BASE = 0x28B9C, //
+ EG_CB_IMMED1_BASE = 0x28BA0, //
+ EG_CB_IMMED2_BASE = 0x28BA4, //
+ EG_CB_IMMED3_BASE = 0x28BA8, //
+ EG_CB_IMMED4_BASE = 0x28BAC, //
+ EG_CB_IMMED5_BASE = 0x28BB0, //
+ EG_CB_IMMED6_BASE = 0x28BB4, //
+ EG_CB_IMMED7_BASE = 0x28BB8, //
+ EG_CB_IMMED8_BASE = 0x28BBC, //
+ EG_CB_IMMED9_BASE = 0x28BC0, //
+ EG_CB_IMMED10_BASE = 0x28BC4, //
+ EG_CB_IMMED11_BASE = 0x28BC8, //
+ EG_CB_CLRCMP_CONTROL = 0x28C40, // SAME 0x28C30
+ EG_CB_CLRCMP_SRC = 0x28C44, // SAME 0x28C34
+ EG_CB_CLRCMP_DST = 0x28C48, // SAME 0x28C38
+ EG_CB_CLRCMP_MSK = 0x28C4C, // SAME 0x28C3C
+ EG_CB_COLOR0_BASE = 0x28C60, // SAME 0x28040
+ EG_CB_COLOR0_PITCH = 0x28C64, //
+ EG_CB_COLOR0_SLICE = 0x28C68, //
+ EG_CB_COLOR0_VIEW = 0x28C6C, // SAME 0x28080
+ EG_CB_COLOR0_INFO = 0x28C70, // DIFF 0x280A0
+ EG_CB_COLOR0_ATTRIB = 0x28C74, //
+ EG_CB_COLOR0_DIM = 0x28C78, //
+ EG_CB_COLOR0_CMASK = 0x28C7C, //
+ EG_CB_COLOR0_CMASK_SLICE = 0x28C80, //
+ EG_CB_COLOR0_FMASK = 0x28C84, //
+ EG_CB_COLOR0_FMASK_SLICE = 0x28C88, //
+ EG_CB_COLOR0_CLEAR_WORD0 = 0x28C8C, //
+ EG_CB_COLOR0_CLEAR_WORD1 = 0x28C90, //
+ EG_CB_COLOR0_CLEAR_WORD2 = 0x28C94, //
+ EG_CB_COLOR0_CLEAR_WORD3 = 0x28C98, //
+ EG_CB_COLOR1_BASE = 0x28C9C, // SAME 0x28044
+ EG_CB_COLOR1_PITCH = 0x28CA0, //
+ EG_CB_COLOR1_SLICE = 0x28CA4, //
+ EG_CB_COLOR1_VIEW = 0x28CA8, // SAME 0x28084
+ EG_CB_COLOR1_INFO = 0x28CAC, // DIFF 0x280A4
+ EG_CB_COLOR1_ATTRIB = 0x28CB0, //
+ EG_CB_COLOR1_DIM = 0x28CB4, //
+ EG_CB_COLOR1_CMASK = 0x28CB8, //
+ EG_CB_COLOR1_CMASK_SLICE = 0x28CBC, //
+ EG_CB_COLOR1_FMASK = 0x28CC0, //
+ EG_CB_COLOR1_FMASK_SLICE = 0x28CC4, //
+ EG_CB_COLOR1_CLEAR_WORD0 = 0x28CC8, //
+ EG_CB_COLOR1_CLEAR_WORD1 = 0x28CCC, //
+ EG_CB_COLOR1_CLEAR_WORD2 = 0x28CD0, //
+ EG_CB_COLOR1_CLEAR_WORD3 = 0x28CD4, //
+ EG_CB_COLOR2_BASE = 0x28CD8, // SAME 0x28048
+ EG_CB_COLOR2_PITCH = 0x28CDC, //
+ EG_CB_COLOR2_SLICE = 0x28CE0, //
+ EG_CB_COLOR2_VIEW = 0x28CE4, // SAME 0x28088
+ EG_CB_COLOR2_INFO = 0x28CE8, // DIFF 0x280A8
+ EG_CB_COLOR2_ATTRIB = 0x28CEC, //
+ EG_CB_COLOR2_DIM = 0x28CF0, //
+ EG_CB_COLOR2_CMASK = 0x28CF4, //
+ EG_CB_COLOR2_CMASK_SLICE = 0x28CF8, //
+ EG_CB_COLOR2_FMASK = 0x28CFC, //
+ EG_CB_COLOR2_FMASK_SLICE = 0x28D00, //
+ EG_CB_COLOR2_CLEAR_WORD0 = 0x28D04, //
+ EG_CB_COLOR2_CLEAR_WORD1 = 0x28D08, //
+ EG_CB_COLOR2_CLEAR_WORD2 = 0x28D0C, //
+ EG_CB_COLOR2_CLEAR_WORD3 = 0x28D10, //
+ EG_CB_COLOR3_BASE = 0x28D14, // SAME 0x2804C
+ EG_CB_COLOR3_PITCH = 0x28D18, //
+ EG_CB_COLOR3_SLICE = 0x28D1C, //
+ EG_CB_COLOR3_VIEW = 0x28D20, // SAME 0x2808C
+ EG_CB_COLOR3_INFO = 0x28D24, // DIFF 0x280AC
+ EG_CB_COLOR3_ATTRIB = 0x28D28, //
+ EG_CB_COLOR3_DIM = 0x28D2C, //
+ EG_CB_COLOR3_CMASK = 0x28D30, //
+ EG_CB_COLOR3_CMASK_SLICE = 0x28D34, //
+ EG_CB_COLOR3_FMASK = 0x28D38, //
+ EG_CB_COLOR3_FMASK_SLICE = 0x28D3C, //
+ EG_CB_COLOR3_CLEAR_WORD0 = 0x28D40, //
+ EG_CB_COLOR3_CLEAR_WORD1 = 0x28D44, //
+ EG_CB_COLOR3_CLEAR_WORD2 = 0x28D48, //
+ EG_CB_COLOR3_CLEAR_WORD3 = 0x28D4C, //
+ EG_CB_COLOR4_BASE = 0x28D50, // SAME 0x28050
+ EG_CB_COLOR4_PITCH = 0x28D54, //
+ EG_CB_COLOR4_SLICE = 0x28D58, //
+ EG_CB_COLOR4_VIEW = 0x28D5C, // SAME 0x28090
+ EG_CB_COLOR4_INFO = 0x28D60, // DIFF 0x280B0
+ EG_CB_COLOR4_ATTRIB = 0x28D64, //
+ EG_CB_COLOR4_DIM = 0x28D68, //
+ EG_CB_COLOR4_CMASK = 0x28D6C, //
+ EG_CB_COLOR4_CMASK_SLICE = 0x28D70, //
+ EG_CB_COLOR4_FMASK = 0x28D74, //
+ EG_CB_COLOR4_FMASK_SLICE = 0x28D78, //
+ EG_CB_COLOR4_CLEAR_WORD0 = 0x28D7C, //
+ EG_CB_COLOR4_CLEAR_WORD1 = 0x28D80, //
+ EG_CB_COLOR4_CLEAR_WORD2 = 0x28D84, //
+ EG_CB_COLOR4_CLEAR_WORD3 = 0x28D88, //
+ EG_CB_COLOR5_BASE = 0x28D8C, // SAME 0x28054
+ EG_CB_COLOR5_PITCH = 0x28D90, //
+ EG_CB_COLOR5_SLICE = 0x28D94, //
+ EG_CB_COLOR5_VIEW = 0x28D98, // SAME 0x28094
+ EG_CB_COLOR5_INFO = 0x28D9C, // DIFF 0x280B4
+ EG_CB_COLOR5_ATTRIB = 0x28DA0, //
+ EG_CB_COLOR5_DIM = 0x28DA4, //
+ EG_CB_COLOR5_CMASK = 0x28DA8, //
+ EG_CB_COLOR5_CMASK_SLICE = 0x28DAC, //
+ EG_CB_COLOR5_FMASK = 0x28DB0, //
+ EG_CB_COLOR5_FMASK_SLICE = 0x28DB4, //
+ EG_CB_COLOR5_CLEAR_WORD0 = 0x28DB8, //
+ EG_CB_COLOR5_CLEAR_WORD1 = 0x28DBC, //
+ EG_CB_COLOR5_CLEAR_WORD2 = 0x28DC0, //
+ EG_CB_COLOR5_CLEAR_WORD3 = 0x28DC4, //
+ EG_CB_COLOR6_BASE = 0x28DC8, // SAME 0x28058
+ EG_CB_COLOR6_PITCH = 0x28DCC, //
+ EG_CB_COLOR6_SLICE = 0x28DD0, //
+ EG_CB_COLOR6_VIEW = 0x28DD4, // SAME 0x28098
+ EG_CB_COLOR6_INFO = 0x28DD8, // DIFF 0x280B8
+ EG_CB_COLOR6_ATTRIB = 0x28DDC, //
+ EG_CB_COLOR6_DIM = 0x28DE0, //
+ EG_CB_COLOR6_CMASK = 0x28DE4, //
+ EG_CB_COLOR6_CMASK_SLICE = 0x28DE8, //
+ EG_CB_COLOR6_FMASK = 0x28DEC, //
+ EG_CB_COLOR6_FMASK_SLICE = 0x28DF0, //
+ EG_CB_COLOR6_CLEAR_WORD0 = 0x28DF4, //
+ EG_CB_COLOR6_CLEAR_WORD1 = 0x28DF8, //
+ EG_CB_COLOR6_CLEAR_WORD2 = 0x28DFC, //
+ EG_CB_COLOR6_CLEAR_WORD3 = 0x28E00, //
+ EG_CB_COLOR7_BASE = 0x28E04, // SAME 0x2805C
+ EG_CB_COLOR7_PITCH = 0x28E08, //
+ EG_CB_COLOR7_SLICE = 0x28E0C, //
+ EG_CB_COLOR7_VIEW = 0x28E10, // SAME 0x2809C
+ EG_CB_COLOR7_INFO = 0x28E14, // DIFF 0x280BC
+ EG_CB_COLOR7_ATTRIB = 0x28E18, //
+ EG_CB_COLOR7_DIM = 0x28E1C, //
+ EG_CB_COLOR7_CMASK = 0x28E20, //
+ EG_CB_COLOR7_CMASK_SLICE = 0x28E24, //
+ EG_CB_COLOR7_FMASK = 0x28E28, //
+ EG_CB_COLOR7_FMASK_SLICE = 0x28E2C, //
+ EG_CB_COLOR7_CLEAR_WORD0 = 0x28E30, //
+ EG_CB_COLOR7_CLEAR_WORD1 = 0x28E34, //
+ EG_CB_COLOR7_CLEAR_WORD2 = 0x28E38, //
+ EG_CB_COLOR7_CLEAR_WORD3 = 0x28E3C, //
+ EG_CB_COLOR8_BASE = 0x28E40, //
+ EG_CB_COLOR8_PITCH = 0x28E44, //
+ EG_CB_COLOR8_SLICE = 0x28E48, //
+ EG_CB_COLOR8_VIEW = 0x28E4C, //
+ EG_CB_COLOR8_INFO = 0x28E50, //
+ EG_CB_COLOR8_ATTRIB = 0x28E54, //
+ EG_CB_COLOR8_DIM = 0x28E58, //
+ EG_CB_COLOR9_BASE = 0x28E5C, //
+ EG_CB_COLOR9_PITCH = 0x28E60, //
+ EG_CB_COLOR9_SLICE = 0x28E64, //
+ EG_CB_COLOR9_VIEW = 0x28E68, //
+ EG_CB_COLOR9_INFO = 0x28E6C, //
+ EG_CB_COLOR9_ATTRIB = 0x28E70, //
+ EG_CB_COLOR9_DIM = 0x28E74, //
+ EG_CB_COLOR10_BASE = 0x28E78, //
+ EG_CB_COLOR10_PITCH = 0x28E7C, //
+ EG_CB_COLOR10_SLICE = 0x28E80, //
+ EG_CB_COLOR10_VIEW = 0x28E84, //
+ EG_CB_COLOR10_INFO = 0x28E88, //
+ EG_CB_COLOR10_ATTRIB = 0x28E8C, //
+ EG_CB_COLOR10_DIM = 0x28E90, //
+ EG_CB_COLOR11_BASE = 0x28E94, //
+ EG_CB_COLOR11_PITCH = 0x28E98, //
+ EG_CB_COLOR11_SLICE = 0x28E9C, //
+ EG_CB_COLOR11_VIEW = 0x28EA0, //
+ EG_CB_COLOR11_INFO = 0x28EA4, //
+ EG_CB_COLOR11_ATTRIB = 0x28EA8, //
+ EG_CB_COLOR11_DIM = 0x28EAC, //
+
+/* Registers from CP block: */
+ EG_COHER_DEST_BASE_0 = 0x28248, // SAME
+ EG_COHER_DEST_BASE_1 = 0x2824C, // SAME
+ EG_CP_PERFMON_CNTX_CNTL = 0x28358, //
+
+/* Config: */
+ EG_SPI_CONFIG_CNTL = 0x9100, // DIFF
+ EG_SPI_CONFIG_CNTL_1 = 0x913C, // DIFF
+ EG_CP_PERFMON_CNTL = 0x87FC, // SAME
+ EG_SQ_MS_FIFO_SIZES = 0x8CF0, // SAME
+ EG_SQ_CONFIG = 0x8C00, // DIFF
+ EG_SQ_GPR_RESOURCE_MGMT_1 = 0x8C04, // SAME
+ EG_SQ_GPR_RESOURCE_MGMT_2 = 0x8C08, // SAME
+ EG_SQ_THREAD_RESOURCE_MGMT = 0x8C18, // SAME 0x8C0C,
+ EG_SQ_STACK_RESOURCE_MGMT_1 = 0x8C20, // SAME 0x8C10,
+ EG_SQ_STACK_RESOURCE_MGMT_2 = 0x8C24, // SAME 0x8C14,
+ EG_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ = 0x8D8C, // DIFF
+ EG_SQ_LDS_RESOURCE_MGMT = 0x8E2C, //
+ EG_SQ_GPR_RESOURCE_MGMT_3 = 0x8C0C, //
+ EG_SQ_STACK_RESOURCE_MGMT_3 = 0x8C28, //
+ EG_SQ_THREAD_RESOURCE_MGMT_2 = 0x8C1C, //
+ EG_VGT_CACHE_INVALIDATION = 0x88C4, // DIFF
+ EG_VGT_GS_VERTEX_REUSE = 0x88D4, // SAME
+ EG_PA_SC_FORCE_EOV_MAX_CNTS = 0x8B24, // SAME
+ EG_PA_SC_LINE_STIPPLE_STATE = 0x8B10, // SAME
+ EG_PA_CL_ENHANCE = 0x8A14, // SAME
+
+/* Tex border color */
+ EG_TD_PS_BORDER_COLOR_RED = 0xA404,
+ EG_TD_PS_BORDER_COLOR_GREEN = 0xA408,
+ EG_TD_PS_BORDER_COLOR_BLUE = 0xA40C,
+ EG_TD_PS_BORDER_COLOR_ALPHA = 0xA410,
+
+/* const */
+ EG_SQ_VTX_CONSTANT_WORD0_0 = 0x30000, // 0x38000
+};
+
+#endif /* _EVERGREEN_OFF_H_ */ \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/evergreen_oglprog.c b/src/mesa/drivers/dri/r600/evergreen_oglprog.c
new file mode 100644
index 0000000000..9fe523234c
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/evergreen_oglprog.c
@@ -0,0 +1,193 @@
+/*
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#include <string.h>
+
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "program/program.h"
+
+#include "tnl/tnl.h"
+
+#include "r600_context.h"
+#include "r600_emit.h"
+
+#include "evergreen_oglprog.h"
+#include "evergreen_fragprog.h"
+#include "evergreen_vertprog.h"
+
+
+static void evergreen_freeVertProgCache(GLcontext *ctx, struct r700_vertex_program_cont *cache)
+{
+ struct evergreen_vertex_program *tmp, *vp = cache->progs;
+
+ while (vp) {
+ tmp = vp->next;
+ /* Release DMA region */
+ r600DeleteShader(ctx, vp->shaderbo);
+
+ if(NULL != vp->constbo0)
+ {
+ r600DeleteShader(ctx, vp->constbo0);
+ }
+
+ /* Clean up */
+ Clean_Up_Assembler(&(vp->r700AsmCode));
+ Clean_Up_Shader(&(vp->r700Shader));
+
+ _mesa_reference_vertprog(ctx, &vp->mesa_program, NULL);
+ free(vp);
+ vp = tmp;
+ }
+}
+
+static struct gl_program *evergreenNewProgram(GLcontext * ctx,
+ GLenum target,
+ GLuint id)
+{
+ struct gl_program *pProgram = NULL;
+
+ struct evergreen_vertex_program_cont *vpc;
+ struct evergreen_fragment_program *fp;
+
+ radeon_print(RADEON_SHADER, RADEON_VERBOSE,
+ "%s %u, %u\n", __func__, target, id);
+
+ switch (target)
+ {
+ case GL_VERTEX_STATE_PROGRAM_NV:
+ case GL_VERTEX_PROGRAM_ARB:
+ vpc = CALLOC_STRUCT(evergreen_vertex_program_cont);
+ pProgram = _mesa_init_vertex_program(ctx,
+ &vpc->mesa_program,
+ target,
+ id);
+
+ break;
+ case GL_FRAGMENT_PROGRAM_NV:
+ case GL_FRAGMENT_PROGRAM_ARB:
+ fp = CALLOC_STRUCT(evergreen_fragment_program);
+ pProgram = _mesa_init_fragment_program(ctx,
+ &fp->mesa_program,
+ target,
+ id);
+ fp->translated = GL_FALSE;
+ fp->loaded = GL_FALSE;
+
+ fp->shaderbo = NULL;
+
+ fp->constbo0 = NULL;
+
+ break;
+ default:
+ _mesa_problem(ctx, "Bad target in evergreenNewProgram");
+ }
+
+ return pProgram;
+}
+
+static void evergreenDeleteProgram(GLcontext * ctx, struct gl_program *prog)
+{
+ struct evergreen_vertex_program_cont *vpc = (struct evergreen_vertex_program_cont *)prog;
+ struct evergreen_fragment_program * fp;
+
+ radeon_print(RADEON_SHADER, RADEON_VERBOSE,
+ "%s %p\n", __func__, prog);
+
+ switch (prog->Target)
+ {
+ case GL_VERTEX_STATE_PROGRAM_NV:
+ case GL_VERTEX_PROGRAM_ARB:
+ evergreen_freeVertProgCache(ctx, vpc);
+ break;
+ case GL_FRAGMENT_PROGRAM_NV:
+ case GL_FRAGMENT_PROGRAM_ARB:
+ fp = (struct evergreen_fragment_program*)prog;
+ /* Release DMA region */
+
+ r600DeleteShader(ctx, fp->shaderbo);
+
+ if(NULL != fp->constbo0)
+ {
+ r600DeleteShader(ctx, fp->constbo0);
+ }
+
+ /* Clean up */
+ Clean_Up_Assembler(&(fp->r700AsmCode));
+ Clean_Up_Shader(&(fp->r700Shader));
+ break;
+ default:
+ _mesa_problem(ctx, "Bad target in evergreenNewProgram");
+ }
+
+ _mesa_delete_program(ctx, prog);
+}
+
+static GLboolean
+evergreenProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog)
+{
+ struct evergreen_vertex_program_cont *vpc = (struct evergreen_vertex_program_cont *)prog;
+ struct evergreen_fragment_program * fp = (struct evergreen_fragment_program*)prog;
+
+ switch (target) {
+ case GL_VERTEX_PROGRAM_ARB:
+ evergreen_freeVertProgCache(ctx, vpc);
+ vpc->progs = NULL;
+ break;
+ case GL_FRAGMENT_PROGRAM_ARB:
+ r600DeleteShader(ctx, fp->shaderbo);
+
+ if(NULL != fp->constbo0)
+ {
+ r600DeleteShader(ctx, fp->constbo0);
+ fp->constbo0 = NULL;
+ }
+
+ Clean_Up_Assembler(&(fp->r700AsmCode));
+ Clean_Up_Shader(&(fp->r700Shader));
+ fp->translated = GL_FALSE;
+ fp->loaded = GL_FALSE;
+ fp->shaderbo = NULL;
+ break;
+ }
+
+ /* XXX check if program is legal, within limits */
+ return GL_TRUE;
+}
+
+static GLboolean evergreenIsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog)
+{
+
+ return GL_TRUE;
+}
+
+void evergreenInitShaderFuncs(struct dd_function_table *functions)
+{
+ functions->NewProgram = evergreenNewProgram;
+ functions->DeleteProgram = evergreenDeleteProgram;
+ functions->ProgramStringNotify = evergreenProgramStringNotify;
+ functions->IsProgramNative = evergreenIsProgramNative;
+}
diff --git a/src/mesa/drivers/dri/r600/evergreen_oglprog.h b/src/mesa/drivers/dri/r600/evergreen_oglprog.h
new file mode 100644
index 0000000000..1cf3e79d05
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/evergreen_oglprog.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#ifndef _EVERGREEN_OGLPROG_H_
+#define _EVERGREEN_OGLPROG_H_
+#include "r600_context.h"
+
+extern void evergreenInitShaderFuncs(struct dd_function_table *functions);
+
+#endif /*_EVERGREEN_OGLPROG_H_*/
diff --git a/src/mesa/drivers/dri/r600/evergreen_render.c b/src/mesa/drivers/dri/r600/evergreen_render.c
new file mode 100644
index 0000000000..85b2f9d6ab
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/evergreen_render.c
@@ -0,0 +1,937 @@
+/*
+ * Copyright (C) 2008-2010 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#include "main/glheader.h"
+#include "main/state.h"
+#include "main/imports.h"
+#include "main/enums.h"
+#include "main/macros.h"
+#include "main/context.h"
+#include "main/dd.h"
+#include "main/simple_list.h"
+#include "main/api_arrayelt.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "vbo/vbo.h"
+
+#include "tnl/tnl.h"
+#include "tnl/t_vp_build.h"
+#include "tnl/t_context.h"
+#include "tnl/t_vertex.h"
+#include "vbo/vbo_context.h"
+
+#include "r600_context.h"
+#include "r600_cmdbuf.h"
+
+#include "evergreen_fragprog.h"
+#include "evergreen_vertprog.h"
+
+#include "evergreen_state.h"
+#include "evergreen_tex.h"
+
+#include "radeon_buffer_objects.h"
+#include "radeon_common_context.h"
+
+static unsigned int evergreenPrimitiveType(int prim) //same
+{
+ switch (prim & PRIM_MODE_MASK)
+ {
+ case GL_POINTS:
+ return DI_PT_POINTLIST;
+ break;
+ case GL_LINES:
+ return DI_PT_LINELIST;
+ break;
+ case GL_LINE_STRIP:
+ return DI_PT_LINESTRIP;
+ break;
+ case GL_LINE_LOOP:
+ return DI_PT_LINELOOP;
+ break;
+ case GL_TRIANGLES:
+ return DI_PT_TRILIST;
+ break;
+ case GL_TRIANGLE_STRIP:
+ return DI_PT_TRISTRIP;
+ break;
+ case GL_TRIANGLE_FAN:
+ return DI_PT_TRIFAN;
+ break;
+ case GL_QUADS:
+ return DI_PT_QUADLIST;
+ break;
+ case GL_QUAD_STRIP:
+ return DI_PT_QUADSTRIP;
+ break;
+ case GL_POLYGON:
+ return DI_PT_POLYGON;
+ break;
+ default:
+ assert(0);
+ return -1;
+ break;
+ }
+}
+
+static int evergreenNumVerts(int num_verts, int prim) //same
+{
+ int verts_off = 0;
+
+ switch (prim & PRIM_MODE_MASK) {
+ case GL_POINTS:
+ verts_off = 0;
+ break;
+ case GL_LINES:
+ verts_off = num_verts % 2;
+ break;
+ case GL_LINE_STRIP:
+ if (num_verts < 2)
+ verts_off = num_verts;
+ break;
+ case GL_LINE_LOOP:
+ if (num_verts < 2)
+ verts_off = num_verts;
+ break;
+ case GL_TRIANGLES:
+ verts_off = num_verts % 3;
+ break;
+ case GL_TRIANGLE_STRIP:
+ if (num_verts < 3)
+ verts_off = num_verts;
+ break;
+ case GL_TRIANGLE_FAN:
+ if (num_verts < 3)
+ verts_off = num_verts;
+ break;
+ case GL_QUADS:
+ verts_off = num_verts % 4;
+ break;
+ case GL_QUAD_STRIP:
+ if (num_verts < 4)
+ verts_off = num_verts;
+ else
+ verts_off = num_verts % 2;
+ break;
+ case GL_POLYGON:
+ if (num_verts < 3)
+ verts_off = num_verts;
+ break;
+ default:
+ assert(0);
+ return -1;
+ break;
+ }
+
+ return num_verts - verts_off;
+}
+
+static void evergreenRunRenderPrimitive(GLcontext * ctx, int start, int end, int prim) //same
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ BATCH_LOCALS(&context->radeon);
+ int type, total_emit;
+ int num_indices;
+ uint32_t vgt_draw_initiator = 0;
+ uint32_t vgt_index_type = 0;
+ uint32_t vgt_primitive_type = 0;
+ uint32_t vgt_num_indices = 0;
+
+ type = evergreenPrimitiveType(prim);
+ num_indices = evergreenNumVerts(end - start, prim);
+
+ radeon_print(RADEON_RENDER, RADEON_TRACE,
+ "%s type %x num_indices %d\n",
+ __func__, type, num_indices);
+
+ if (type < 0 || num_indices <= 0)
+ return;
+
+ SETfield(vgt_primitive_type, type,
+ VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask);
+
+ SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
+
+ if(GL_TRUE != context->ind_buf.is_32bit)
+ {
+ SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
+ }
+
+ vgt_num_indices = num_indices;
+ SETfield(vgt_draw_initiator, DI_SRC_SEL_DMA, SOURCE_SELECT_shift, SOURCE_SELECT_mask);
+ SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask);
+
+ total_emit = 3 /* VGT_PRIMITIVE_TYPE */
+ + 2 /* VGT_INDEX_TYPE */
+ + 2 /* NUM_INSTANCES */
+ + 5 + 2; /* DRAW_INDEX */
+
+ BEGIN_BATCH_NO_AUTOSTATE(total_emit);
+ // prim
+ R600_OUT_BATCH_REGSEQ(VGT_PRIMITIVE_TYPE, 1);
+ R600_OUT_BATCH(vgt_primitive_type);
+ // index type
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
+ R600_OUT_BATCH(vgt_index_type);
+ // num instances
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
+ R600_OUT_BATCH(1);
+ // draw packet
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX, 3));
+ R600_OUT_BATCH(context->ind_buf.bo_offset);
+ R600_OUT_BATCH(0);
+ R600_OUT_BATCH(vgt_num_indices);
+ R600_OUT_BATCH(vgt_draw_initiator);
+ R600_OUT_BATCH_RELOC(context->ind_buf.bo_offset,
+ context->ind_buf.bo,
+ context->ind_buf.bo_offset,
+ RADEON_GEM_DOMAIN_GTT, 0, 0);
+ END_BATCH();
+ COMMIT_BATCH();
+}
+
+static void evergreenRunRenderPrimitiveImmediate(GLcontext * ctx, int start, int end, int prim) //same
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ BATCH_LOCALS(&context->radeon);
+ int type, i;
+ uint32_t num_indices, total_emit = 0;
+ uint32_t vgt_draw_initiator = 0;
+ uint32_t vgt_index_type = 0;
+ uint32_t vgt_primitive_type = 0;
+ uint32_t vgt_num_indices = 0;
+
+ type = evergreenPrimitiveType(prim);
+ num_indices = evergreenNumVerts(end - start, prim);
+
+ radeon_print(RADEON_RENDER, RADEON_TRACE,
+ "%s type %x num_indices %d\n",
+ __func__, type, num_indices);
+
+ if (type < 0 || num_indices <= 0)
+ return;
+
+ SETfield(vgt_primitive_type, type,
+ VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift, VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask);
+
+ if (num_indices > 0xffff)
+ {
+ SETfield(vgt_index_type, DI_INDEX_SIZE_32_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
+ }
+ else
+ {
+ SETfield(vgt_index_type, DI_INDEX_SIZE_16_BIT, INDEX_TYPE_shift, INDEX_TYPE_mask);
+ }
+
+ vgt_num_indices = num_indices;
+ SETfield(vgt_draw_initiator, DI_MAJOR_MODE_0, MAJOR_MODE_shift, MAJOR_MODE_mask);
+
+ if (start == 0)
+ {
+ SETfield(vgt_draw_initiator, DI_SRC_SEL_AUTO_INDEX, SOURCE_SELECT_shift, SOURCE_SELECT_mask);
+ }
+ else
+ {
+ if (num_indices > 0xffff)
+ {
+ total_emit += num_indices;
+ }
+ else
+ {
+ total_emit += (num_indices + 1) / 2;
+ }
+ SETfield(vgt_draw_initiator, DI_SRC_SEL_IMMEDIATE, SOURCE_SELECT_shift, SOURCE_SELECT_mask);
+ }
+
+ total_emit += 3 /* VGT_PRIMITIVE_TYPE */
+ + 2 /* VGT_INDEX_TYPE */
+ + 2 /* NUM_INSTANCES */
+ + 3; /* DRAW */
+
+ BEGIN_BATCH_NO_AUTOSTATE(total_emit);
+ // prim
+ R600_OUT_BATCH_REGSEQ(VGT_PRIMITIVE_TYPE, 1);
+ R600_OUT_BATCH(vgt_primitive_type);
+ // index type
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_INDEX_TYPE, 0));
+ R600_OUT_BATCH(vgt_index_type);
+ // num instances
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_NUM_INSTANCES, 0));
+ R600_OUT_BATCH(1);
+ // draw packet
+ if(start == 0)
+ {
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_AUTO, 1));
+ R600_OUT_BATCH(vgt_num_indices);
+ R600_OUT_BATCH(vgt_draw_initiator);
+ }
+ else
+ {
+ if (num_indices > 0xffff)
+ {
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (num_indices + 1)));
+ R600_OUT_BATCH(vgt_num_indices);
+ R600_OUT_BATCH(vgt_draw_initiator);
+ for (i = start; i < (start + num_indices); i++)
+ {
+ R600_OUT_BATCH(i);
+ }
+ }
+ else
+ {
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_DRAW_INDEX_IMMD, (((num_indices + 1) / 2) + 1)));
+ R600_OUT_BATCH(vgt_num_indices);
+ R600_OUT_BATCH(vgt_draw_initiator);
+ for (i = start; i < (start + num_indices); i += 2)
+ {
+ if ((i + 1) == (start + num_indices))
+ {
+ R600_OUT_BATCH(i);
+ }
+ else
+ {
+ R600_OUT_BATCH(((i + 1) << 16) | (i));
+ }
+ }
+ }
+ }
+
+ END_BATCH();
+ COMMIT_BATCH();
+}
+
+#define CONVERT( TYPE, MACRO ) do { \
+ GLuint i, j, sz; \
+ sz = input->Size; \
+ if (input->Normalized) { \
+ for (i = 0; i < count; i++) { \
+ const TYPE *in = (TYPE *)src_ptr; \
+ for (j = 0; j < sz; j++) { \
+ *dst_ptr++ = MACRO(*in); \
+ in++; \
+ } \
+ src_ptr += stride; \
+ } \
+ } else { \
+ for (i = 0; i < count; i++) { \
+ const TYPE *in = (TYPE *)src_ptr; \
+ for (j = 0; j < sz; j++) { \
+ *dst_ptr++ = (GLfloat)(*in); \
+ in++; \
+ } \
+ src_ptr += stride; \
+ } \
+ } \
+} while (0)
+
+/**
+ * Convert attribute data type to float
+ * If the attribute uses named buffer object replace the bo with newly allocated bo
+ */
+static void evergreenConvertAttrib(GLcontext *ctx, int count,
+ const struct gl_client_array *input,
+ struct StreamDesc *attr)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ const GLvoid *src_ptr;
+ GLboolean mapped_named_bo = GL_FALSE;
+ GLfloat *dst_ptr;
+ GLuint stride;
+
+ stride = (input->StrideB == 0) ? evergreen_getTypeSize(input->Type) * input->Size : input->StrideB;
+
+ /* Convert value for first element only */
+ if (input->StrideB == 0)
+ {
+ count = 1;
+ }
+
+ if (input->BufferObj->Name)
+ {
+ if (!input->BufferObj->Pointer)
+ {
+ ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+ mapped_named_bo = GL_TRUE;
+ }
+
+ src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr);
+ }
+ else
+ {
+ src_ptr = input->Ptr;
+ }
+
+ radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset,
+ sizeof(GLfloat) * input->Size * count, 32);
+
+ radeon_bo_map(attr->bo, 1);
+
+ dst_ptr = (GLfloat *)ADD_POINTERS(attr->bo->ptr, attr->bo_offset);
+
+ assert(src_ptr != NULL);
+
+ switch (input->Type)
+ {
+ case GL_DOUBLE:
+ CONVERT(GLdouble, (GLfloat));
+ break;
+ case GL_UNSIGNED_INT:
+ CONVERT(GLuint, UINT_TO_FLOAT);
+ break;
+ case GL_INT:
+ CONVERT(GLint, INT_TO_FLOAT);
+ break;
+ case GL_UNSIGNED_SHORT:
+ CONVERT(GLushort, USHORT_TO_FLOAT);
+ break;
+ case GL_SHORT:
+ CONVERT(GLshort, SHORT_TO_FLOAT);
+ break;
+ case GL_UNSIGNED_BYTE:
+ assert(input->Format != GL_BGRA);
+ CONVERT(GLubyte, UBYTE_TO_FLOAT);
+ break;
+ case GL_BYTE:
+ CONVERT(GLbyte, BYTE_TO_FLOAT);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ radeon_bo_unmap(attr->bo);
+
+ if (mapped_named_bo)
+ {
+ ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+ }
+}
+
+static void evergreenFixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf)
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ GLvoid *src_ptr;
+ GLuint *out;
+ int i;
+ GLboolean mapped_named_bo = GL_FALSE;
+
+ if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
+ {
+ ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+ mapped_named_bo = GL_TRUE;
+ assert(mesa_ind_buf->obj->Pointer != NULL);
+ }
+ src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr);
+
+ if (mesa_ind_buf->type == GL_UNSIGNED_BYTE)
+ {
+ GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1);
+ GLubyte *in = (GLubyte *)src_ptr;
+
+ radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo,
+ &context->ind_buf.bo_offset, size, 4);
+
+ radeon_bo_map(context->ind_buf.bo, 1);
+ assert(context->ind_buf.bo->ptr != NULL);
+ out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset);
+
+ for (i = 0; i + 1 < mesa_ind_buf->count; i += 2)
+ {
+ *out++ = in[i] | in[i + 1] << 16;
+ }
+
+ if (i < mesa_ind_buf->count)
+ {
+ *out++ = in[i];
+ }
+
+ radeon_bo_unmap(context->ind_buf.bo);
+#if MESA_BIG_ENDIAN
+ }
+ else
+ { /* if (mesa_ind_buf->type == GL_UNSIGNED_SHORT) */
+ GLushort *in = (GLushort *)src_ptr;
+ GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1);
+
+ radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo,
+ &context->ind_buf.bo_offset, size, 4);
+
+ radeon_bo_map(context->ind_buf.bo, 1);
+ assert(context->ind_buf.bo->ptr != NULL);
+ out = (GLuint *)ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset);
+
+ for (i = 0; i + 1 < mesa_ind_buf->count; i += 2)
+ {
+ *out++ = in[i] | in[i + 1] << 16;
+ }
+
+ if (i < mesa_ind_buf->count)
+ {
+ *out++ = in[i];
+ }
+ radeon_bo_unmap(context->ind_buf.bo);
+#endif
+ }
+
+ context->ind_buf.is_32bit = GL_FALSE;
+ context->ind_buf.count = mesa_ind_buf->count;
+
+ if (mapped_named_bo)
+ {
+ ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+ }
+}
+
+static GLboolean evergreen_check_fallbacks(GLcontext *ctx) //same
+{
+ if (ctx->RenderMode != GL_RENDER)
+ return GL_TRUE;
+
+ return GL_FALSE;
+}
+
+/* start 3d, idle, cb/db flush */
+#define PRE_EMIT_STATE_BUFSZ 5 + 5 + 14
+
+static GLuint evergreenPredictRenderSize(GLcontext* ctx,
+ const struct _mesa_prim *prim,
+ const struct _mesa_index_buffer *ib,
+ GLuint nr_prims)
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ GLboolean flushed;
+ GLuint dwords, i;
+ GLuint state_size;
+
+ dwords = PRE_EMIT_STATE_BUFSZ;
+ if (ib)
+ dwords += nr_prims * 14;
+ else {
+ for (i = 0; i < nr_prims; ++i)
+ {
+ if (prim[i].start == 0)
+ dwords += 10;
+ else if (prim[i].count > 0xffff)
+ dwords += prim[i].count + 10;
+ else
+ dwords += ((prim[i].count + 1) / 2) + 10;
+ }
+ }
+
+ state_size = radeonCountStateEmitSize(&context->radeon);
+ flushed = rcommonEnsureCmdBufSpace(&context->radeon,
+ dwords + state_size,
+ __FUNCTION__);
+ if (flushed)
+ dwords += radeonCountStateEmitSize(&context->radeon);
+ else
+ dwords += state_size;
+
+ radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s: total prediction size is %d.\n", __FUNCTION__, dwords);
+ return dwords;
+
+}
+
+static void evergreenSetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf)
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+
+ if (!mesa_ind_buf) {
+ context->ind_buf.bo = NULL;
+ return;
+ }
+
+#if MESA_BIG_ENDIAN
+ if (mesa_ind_buf->type == GL_UNSIGNED_INT)
+#else
+ if (mesa_ind_buf->type != GL_UNSIGNED_BYTE)
+#endif
+ {
+ const GLvoid *src_ptr;
+ GLvoid *dst_ptr;
+ GLboolean mapped_named_bo = GL_FALSE;
+
+ if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer)
+ {
+ ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+ assert(mesa_ind_buf->obj->Pointer != NULL);
+ mapped_named_bo = GL_TRUE;
+ }
+
+ src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr);
+
+ const GLuint size = mesa_ind_buf->count * getTypeSize(mesa_ind_buf->type);
+
+ radeonAllocDmaRegion(&context->radeon, &context->ind_buf.bo,
+ &context->ind_buf.bo_offset, size, 4);
+ radeon_bo_map(context->ind_buf.bo, 1);
+ assert(context->ind_buf.bo->ptr != NULL);
+ dst_ptr = ADD_POINTERS(context->ind_buf.bo->ptr, context->ind_buf.bo_offset);
+
+ memcpy(dst_ptr, src_ptr, size);
+
+ radeon_bo_unmap(context->ind_buf.bo);
+ context->ind_buf.is_32bit = (mesa_ind_buf->type == GL_UNSIGNED_INT);
+ context->ind_buf.count = mesa_ind_buf->count;
+
+ if (mapped_named_bo)
+ {
+ ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+ }
+ }
+ else
+ {
+ evergreenFixupIndexBuffer(ctx, mesa_ind_buf);
+ }
+}
+
+static void evergreenAlignDataToDword(GLcontext *ctx,
+ const struct gl_client_array *input,
+ int count,
+ struct StreamDesc *attr)
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ const int dst_stride = (input->StrideB + 3) & ~3;
+ const int size = getTypeSize(input->Type) * input->Size * count;
+ GLboolean mapped_named_bo = GL_FALSE;
+
+ radeonAllocDmaRegion(&context->radeon, &attr->bo, &attr->bo_offset, size, 32);
+
+ radeon_bo_map(attr->bo, 1);
+
+ if (!input->BufferObj->Pointer)
+ {
+ ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+ mapped_named_bo = GL_TRUE;
+ }
+
+ {
+ GLvoid *src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr);
+ GLvoid *dst_ptr = ADD_POINTERS(attr->bo->ptr, attr->bo_offset);
+ int i;
+
+ for (i = 0; i < count; ++i)
+ {
+ memcpy(dst_ptr, src_ptr, input->StrideB);
+ src_ptr += input->StrideB;
+ dst_ptr += dst_stride;
+ }
+ }
+
+ radeon_bo_unmap(attr->bo);
+ if (mapped_named_bo)
+ {
+ ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+ }
+
+ attr->stride = dst_stride;
+}
+
+static void evergreenSetupStreams(GLcontext *ctx, const struct gl_client_array *input[], int count)
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ GLuint stride;
+ int ret;
+ int i, index;
+
+ EVERGREEN_STATECHANGE(context, vtx);
+
+ for(index = 0; index < context->nNumActiveAos; index++)
+ {
+ struct radeon_aos *aos = &context->radeon.tcl.aos[index];
+ i = context->stream_desc[index].element;
+
+ stride = (input[i]->StrideB == 0) ? getTypeSize(input[i]->Type) * input[i]->Size : input[i]->StrideB;
+
+ if (input[i]->Type == GL_DOUBLE || input[i]->Type == GL_UNSIGNED_INT || input[i]->Type == GL_INT ||
+#if MESA_BIG_ENDIAN
+ getTypeSize(input[i]->Type) != 4 ||
+#endif
+ stride < 4)
+ {
+ evergreenConvertAttrib(ctx, count, input[i], &context->stream_desc[index]);
+ }
+ else
+ {
+ if (input[i]->BufferObj->Name)
+ {
+ if (stride % 4 != 0)
+ {
+ assert(((intptr_t) input[i]->Ptr) % input[i]->StrideB == 0);
+ evergreenAlignDataToDword(ctx, input[i], count, &context->stream_desc[index]);
+ context->stream_desc[index].is_named_bo = GL_FALSE;
+ }
+ else
+ {
+ context->stream_desc[index].stride = input[i]->StrideB;
+ context->stream_desc[index].bo_offset = (intptr_t) input[i]->Ptr;
+ context->stream_desc[index].bo = get_radeon_buffer_object(input[i]->BufferObj)->bo;
+ context->stream_desc[index].is_named_bo = GL_TRUE;
+ }
+ }
+ else
+ {
+ int size;
+ int local_count = count;
+ uint32_t *dst;
+
+ if (input[i]->StrideB == 0)
+ {
+ size = getTypeSize(input[i]->Type) * input[i]->Size;
+ local_count = 1;
+ }
+ else
+ {
+ size = getTypeSize(input[i]->Type) * input[i]->Size * local_count;
+ }
+
+ radeonAllocDmaRegion(&context->radeon, &context->stream_desc[index].bo,
+ &context->stream_desc[index].bo_offset, size, 32);
+
+ radeon_bo_map(context->stream_desc[index].bo, 1);
+ assert(context->stream_desc[index].bo->ptr != NULL);
+
+
+ dst = (uint32_t *)ADD_POINTERS(context->stream_desc[index].bo->ptr,
+ context->stream_desc[index].bo_offset);
+
+ switch (context->stream_desc[index].dwords)
+ {
+ case 1:
+ radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count);
+ break;
+ case 2:
+ radeonEmitVec8(dst, input[i]->Ptr, input[i]->StrideB, local_count);
+ break;
+ case 3:
+ radeonEmitVec12(dst, input[i]->Ptr, input[i]->StrideB, local_count);
+ break;
+ case 4:
+ radeonEmitVec16(dst, input[i]->Ptr, input[i]->StrideB, local_count);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ radeon_bo_unmap(context->stream_desc[index].bo);
+ }
+ }
+
+ aos->count = context->stream_desc[index].stride == 0 ? 1 : count;
+ aos->stride = context->stream_desc[index].stride / sizeof(float);
+ aos->components = context->stream_desc[index].dwords;
+ aos->bo = context->stream_desc[index].bo;
+ aos->offset = context->stream_desc[index].bo_offset;
+
+ if(context->stream_desc[index].is_named_bo)
+ {
+ radeon_cs_space_add_persistent_bo(context->radeon.cmdbuf.cs,
+ context->stream_desc[index].bo,
+ RADEON_GEM_DOMAIN_GTT, 0);
+ }
+ }
+
+ ret = radeon_cs_space_check_with_bo(context->radeon.cmdbuf.cs,
+ first_elem(&context->radeon.dma.reserved)->bo,
+ RADEON_GEM_DOMAIN_GTT, 0);
+}
+
+static void evergreenFreeData(GLcontext *ctx)
+{
+ /* Need to zero tcl.aos[n].bo and tcl.elt_dma_bo
+ * to prevent double unref in radeonReleaseArrays
+ * called during context destroy
+ */
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+
+ int i;
+
+ for (i = 0; i < context->nNumActiveAos; i++)
+ {
+ if (!context->stream_desc[i].is_named_bo)
+ {
+ radeon_bo_unref(context->stream_desc[i].bo);
+ }
+ context->radeon.tcl.aos[i].bo = NULL;
+ }
+
+ if(context->vp_Constbo != NULL)
+ {
+ radeon_bo_unref(context->vp_Constbo);
+ context->vp_Constbo = NULL;
+ }
+ if(context->fp_Constbo != NULL)
+ {
+ radeon_bo_unref(context->fp_Constbo);
+ context->fp_Constbo = NULL;
+ }
+
+ if (context->ind_buf.bo != NULL)
+ {
+ radeon_bo_unref(context->ind_buf.bo);
+ }
+}
+
+static GLboolean evergreenTryDrawPrims(GLcontext *ctx,
+ const struct gl_client_array *arrays[],
+ const struct _mesa_prim *prim,
+ GLuint nr_prims,
+ const struct _mesa_index_buffer *ib,
+ GLuint min_index,
+ GLuint max_index )
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ radeonContextPtr radeon = &context->radeon;
+ GLuint i, id = 0;
+ struct radeon_renderbuffer *rrb;
+
+ if (ctx->NewState)
+ _mesa_update_state( ctx );
+
+ if (evergreen_check_fallbacks(ctx))
+ return GL_FALSE;
+
+ _tnl_UpdateFixedFunctionProgram(ctx);
+ evergreenSetVertexFormat(ctx, arrays, max_index + 1);
+
+
+ /* shaders need to be updated before buffers are validated */
+ evergreenUpdateShaders(ctx);
+ if (!evergreenValidateBuffers(ctx))
+ return GL_FALSE;
+
+ /* always emit CB base to prevent
+ * lock ups on some chips.
+ */
+ EVERGREEN_STATECHANGE(context, cb);
+ /* mark vtx as dirty since it changes per-draw */
+ EVERGREEN_STATECHANGE(context, vtx);
+
+ evergreenSetScissor(context);
+
+ evergreenSetupVertexProgram(ctx);
+ evergreenSetupFragmentProgram(ctx);
+ evergreenUpdateShaderStates(ctx);
+
+ GLuint emit_end = evergreenPredictRenderSize(ctx, prim, ib, nr_prims)
+ + context->radeon.cmdbuf.cs->cdw;
+
+ /* evergreenPredictRenderSize will call radeonReleaseDmaRegions, so update VP/FP const buf after it. */
+ evergreenSetupVPconstants(ctx);
+ evergreenSetupFPconstants(ctx);
+
+ evergreenSetupIndexBuffer(ctx, ib);
+
+ evergreenSetupStreams(ctx, arrays, max_index + 1);
+
+ radeonEmitState(radeon);
+
+ radeon_debug_add_indent();
+
+ for (i = 0; i < nr_prims; ++i)
+ {
+ if (context->ind_buf.bo)
+ evergreenRunRenderPrimitive(ctx,
+ prim[i].start,
+ prim[i].start + prim[i].count,
+ prim[i].mode);
+ else
+ evergreenRunRenderPrimitiveImmediate(ctx,
+ prim[i].start,
+ prim[i].start + prim[i].count,
+ prim[i].mode);
+ }
+
+ radeon_debug_remove_indent();
+
+ /* Flush render op cached for last several quads. */
+ /* XXX drm should handle this in fence submit */
+
+ //evergreeWaitForIdleClean(context);
+
+ rrb = radeon_get_colorbuffer(&context->radeon);
+ if (rrb && rrb->bo)
+ r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM,
+ CB_ACTION_ENA_bit | (1 << (id + 6)));
+
+ rrb = radeon_get_depthbuffer(&context->radeon);
+ if (rrb && rrb->bo)
+ r700SyncSurf(context, rrb->bo, 0, RADEON_GEM_DOMAIN_VRAM,
+ DB_ACTION_ENA_bit | DB_DEST_BASE_ENA_bit);
+
+ evergreenFreeData(ctx);
+
+ if (emit_end < context->radeon.cmdbuf.cs->cdw)
+ {
+ WARN_ONCE("Rendering was %d commands larger than predicted size."
+ " We might overflow command buffer.\n", context->radeon.cmdbuf.cs->cdw - emit_end);
+ }
+
+ return GL_TRUE;
+}
+
+static void evergreenDrawPrims(GLcontext *ctx,
+ const struct gl_client_array *arrays[],
+ const struct _mesa_prim *prim,
+ GLuint nr_prims,
+ const struct _mesa_index_buffer *ib,
+ GLboolean index_bounds_valid,
+ GLuint min_index,
+ GLuint max_index)
+{
+ GLboolean retval = GL_FALSE;
+
+ /* This check should get folded into just the places that
+ * min/max index are really needed.
+ */
+ if (!index_bounds_valid) {
+ vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index);
+ }
+
+ if (min_index) {
+ vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, evergreenDrawPrims );
+ return;
+ }
+
+ /* Make an attempt at drawing */
+ retval = evergreenTryDrawPrims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
+
+ /* If failed run tnl pipeline - it should take care of fallbacks */
+ if (!retval) {
+ _swsetup_Wakeup(ctx);
+ _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
+ }
+}
+
+void evergreenInitDraw(GLcontext *ctx)
+{
+ struct vbo_context *vbo = vbo_context(ctx);
+
+ /* to be enabled */
+ vbo->draw_prims = evergreenDrawPrims;
+}
+
+
diff --git a/src/mesa/drivers/dri/r600/evergreen_sq.h b/src/mesa/drivers/dri/r600/evergreen_sq.h
new file mode 100644
index 0000000000..b1a536e76f
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/evergreen_sq.h
@@ -0,0 +1,735 @@
+/*
+ * Copyright (C) 2008-2010 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#ifndef _EVERGREEN_SQ_H_
+#define _EVERGREEN_SQ_H_
+
+enum{
+//CF
+ EG_CF_WORD0__ADDR_shift = 0,
+ EG_CF_WORD0__ADDR_mask = 0xFFFFFF,
+ EG_CF_WORD0__JUMPTABLE_SEL_shift = 24,
+ EG_CF_WORD0__JUMPTABLE_SEL_mask = 0x7 << 24,
+
+ EG_CF_WORD1__POP_COUNT_shift = 0, //3 bits
+ EG_CF_WORD1__POP_COUNT_mask = 0x7,
+ EG_CF_WORD1__CF_CONST_shift = 3, //5 bits
+ EG_CF_WORD1__CF_CONST_mask = 0x1F << 3,
+ EG_CF_WORD1__COND_shift = 8, //2 bits
+ EG_CF_WORD1__COND_mask = 0x3 << 8,
+ EG_CF_WORD1__COUNT_shift = 10,//6 bits
+ EG_CF_WORD1__COUNT_mask = 0x3F << 10,
+ EG_CF_WORD1__reserved_shift = 16,//4 bits
+ EG_CF_WORD1__VPM_shift = 20,//1 bit
+ EG_CF_WORD1__VPM_bit = 1 << 20,
+ EG_CF_WORD1__EOP_shift = 21,//1 bit
+ EG_CF_WORD1__EOP_bit = 1 << 21,
+ EG_CF_WORD1__CF_INST_shift = 22,//8 bits
+ EG_CF_WORD1__CF_INST_mask = 0xFF << 22,
+ EG_CF_WORD1__WQM_shift = 30,//1 bit
+ EG_CF_WORD1__WQM_bit = 1 << 30,
+ EG_CF_WORD1__BARRIER_shift = 31,//1 bit
+ EG_CF_WORD1__BARRIER_bit = 1 << 31,
+
+ EG_CF_INST_NOP = 0,
+ EG_CF_INST_TC = 1,
+ EG_CF_INST_VC = 2,
+ EG_CF_INST_GDS = 3,
+ EG_CF_INST_LOOP_START = 4,
+ EG_CF_INST_LOOP_END = 5,
+ EG_CF_INST_LOOP_START_DX10 = 6,
+ EG_CF_INST_LOOP_START_NO_AL = 7,
+ EG_CF_INST_LOOP_CONTINUE = 8,
+ EG_CF_INST_LOOP_BREAK = 9,
+ EG_CF_INST_JUMP = 10,
+ EG_CF_INST_PUSH = 11,
+ EG_CF_INST_Reserved_12 = 12,
+ EG_CF_INST_ELSE = 13,
+ EG_CF_INST_POP = 14,
+ EG_CF_INST_Reserved_15 = 15,
+ EG_CF_INST_Reserved_16 = 16,
+ EG_CF_INST_Reserved_17 = 17,
+ EG_CF_INST_CALL = 18,
+ EG_CF_INST_CALL_FS = 19,
+ EG_CF_INST_RETURN = 20,
+ EG_CF_INST_EMIT_VERTEX = 21,
+ EG_CF_INST_EMIT_CUT_VERTEX = 22,
+ EG_CF_INST_CUT_VERTEX = 23,
+ EG_CF_INST_KILL = 24,
+ EG_CF_INST_Reserved_25 = 25,
+ EG_CF_INST_WAIT_ACK = 26,
+ EG_CF_INST_TC_ACK = 27,
+ EG_CF_INST_VC_ACK = 28,
+ EG_CF_INST_JUMPTABLE = 29,
+ EG_CF_INST_GLOBAL_WAVE_SYNC = 30,
+ EG_CF_INST_HALT = 31,
+
+//TEX
+ EG_TEX_WORD0__TEX_INST_shift = 0, //5 bits
+ EG_TEX_WORD0__TEX_INST_mask = 0x1F,
+ EG_TEX_WORD0__INST_MOD_shift = 5, //2 bits
+ EG_TEX_WORD0__INST_MOD_mask = 0x3 << 5,
+ EG_TEX_WORD0__FWQ_shift = 7, //1 bit
+ EG_TEX_WORD0__FWQ_bit = 1 << 7,
+ EG_TEX_WORD0__RESOURCE_ID_shift = 8, //8 bits
+ EG_TEX_WORD0__RESOURCE_ID_mask = 0xFF << 8,
+ EG_TEX_WORD0__SRC_GPR_shift = 16,//7 bits
+ EG_TEX_WORD0__SRC_GPR_mask = 0x7F << 16,
+ EG_TEX_WORD0__SRC_REL_shift = 23,//1 bit
+ EG_TEX_WORD0__SRC_REL_bit = 1 << 23,
+ EG_TEX_WORD0__ALT_CONST_shift = 24,//1 bit
+ EG_TEX_WORD0__ALT_CONST_bit = 1 << 24,
+ EG_TEX_WORD0__RIM_shift = 25,//2 bits
+ EG_TEX_WORD0__RIM_mask = 0x3 << 25,
+ EG_TEX_WORD0__SIM_shift = 27,//2 bits
+ EG_TEX_WORD0__SIM_mask = 0x3 << 27,
+ EG_TEX_WORD0__Reserved_shift = 29,//3 bits
+ EG_TEX_WORD0__Reserved_mask = 0x7 << 29,
+
+ EG_TEX_INST_Reserved_0 = 0,
+ EG_TEX_INST_Reserved_1 = 1,
+ EG_TEX_INST_Reserved_2 = 2,
+ EG_TEX_INST_LD = 3,
+ EG_TEX_INST_GET_TEXTURE_RESINFO = 4,
+ EG_TEX_INST_GET_NUMBER_OF_SAMPLES= 5,
+ EG_TEX_INST_GET_COMP_TEX_LOD = 6,
+ EG_TEX_INST_GET_GRADIENTS_H = 7,
+ EG_TEX_INST_GET_GRADIENTS_V = 8,
+ EG_TEX_INST_SET_TEXTURE_OFFSETS = 9,
+ EG_TEX_INST_KEEP_GRADIENTS = 10,
+ EG_TEX_INST_SET_GRADIENTS_H = 11,
+ EG_TEX_INST_SET_GRADIENTS_V = 12,
+ EG_TEX_INST_Reserved_13 = 13,
+ EG_TEX_INST_Reserved_14 = 14,
+ EG_TEX_INST_Reserved_15 = 15,
+ EG_TEX_INST_SAMPLE = 16,
+ EG_TEX_INST_SAMPLE_L = 17,
+ EG_TEX_INST_SAMPLE_LB = 18,
+ EG_TEX_INST_SAMPLE_LZ = 19,
+ EG_TEX_INST_SAMPLE_G = 20,
+ EG_TEX_INST_GATHER4 = 21,
+ EG_TEX_INST_SAMPLE_G_LB = 22,
+ EG_TEX_INST_GATHER4_O = 23,
+ EG_TEX_INST_SAMPLE_C = 24,
+ EG_TEX_INST_SAMPLE_C_L = 25,
+ EG_TEX_INST_SAMPLE_C_LB = 26,
+ EG_TEX_INST_SAMPLE_C_LZ = 27,
+ EG_TEX_INST_SAMPLE_C_G = 28,
+ EG_TEX_INST_GATHER4_C = 29,
+ EG_TEX_INST_SAMPLE_C_G_LB = 30,
+ EG_TEX_INST_GATHER4_C_O = 31,
+
+ EG_TEX_WORD1__DST_GPR_shift = 0, //7 bits
+ EG_TEX_WORD1__DST_GPR_mask = 0x7F,
+ EG_TEX_WORD1__DST_REL_shift = 7, //1 bit
+ EG_TEX_WORD1__DST_REL_bit = 1 << 7,
+ EG_TEX_WORD1__Reserved_shift = 8, //1 bit
+ EG_TEX_WORD1__Reserved_bit = 1 << 8,
+ EG_TEX_WORD1__DST_SEL_X_shift = 9, //3 bits
+ EG_TEX_WORD1__DST_SEL_X_mask = 0x7 << 9,
+ EG_TEX_WORD1__DST_SEL_Y_shift = 12,//3 bits
+ EG_TEX_WORD1__DST_SEL_Y_mask = 0x7 << 12,
+ EG_TEX_WORD1__DST_SEL_Z_shift = 15,//3 bits
+ EG_TEX_WORD1__DST_SEL_Z_mask = 0x7 << 15,
+ EG_TEX_WORD1__DST_SEL_W_shift = 18,//3 bits
+ EG_TEX_WORD1__DST_SEL_W_mask = 0x7 << 18,
+ EG_TEX_WORD1__LOD_BIAS_shift = 21,//7 bits
+ EG_TEX_WORD1__LOD_BIAS_mask = 0x7F << 21,
+ EG_TEX_WORD1__COORD_TYPE_X_shift = 28,//1 bit
+ EG_TEX_WORD1__COORD_TYPE_X_bit = 1 << 28,
+ EG_TEX_WORD1__COORD_TYPE_Y_shift = 29,//1 bit
+ EG_TEX_WORD1__COORD_TYPE_Y_bit = 1 << 29,
+ EG_TEX_WORD1__COORD_TYPE_Z_shift = 30,//1 bit
+ EG_TEX_WORD1__COORD_TYPE_Z_bit = 1 << 30,
+ EG_TEX_WORD1__COORD_TYPE_W_shift = 31,//1 bit
+ EG_TEX_WORD1__COORD_TYPE_W_bit = 1 << 31,
+
+ EG_TEX_WORD2__OFFSET_X_shift = 0, //5 bits
+ EG_TEX_WORD2__OFFSET_X_mask = 0x1F,
+ EG_TEX_WORD2__OFFSET_Y_shift = 5, //5 bits
+ EG_TEX_WORD2__OFFSET_Y_mask = 0x1F << 5,
+ EG_TEX_WORD2__OFFSET_Z_shift = 10,//5 bits
+ EG_TEX_WORD2__OFFSET_Z_mask = 0x1F << 10,
+ EG_TEX_WORD2__SAMPLER_ID_shift = 15,//5 bits
+ EG_TEX_WORD2__SAMPLER_ID_mask = 0x1F << 15,
+ EG_TEX_WORD2__SRC_SEL_X_shift = 20,//3 bits
+ EG_TEX_WORD2__SRC_SEL_X_mask = 0x7 << 20,
+ EG_TEX_WORD2__SRC_SEL_Y_shift = 23,//3 bits
+ EG_TEX_WORD2__SRC_SEL_Y_mask = 0x7 << 23,
+ EG_TEX_WORD2__SRC_SEL_Z_shift = 26,//3 bits
+ EG_TEX_WORD2__SRC_SEL_Z_mask = 0x7 << 26,
+ EG_TEX_WORD2__SRC_SEL_W_shift = 29,//3 bits
+ EG_TEX_WORD2__SRC_SEL_W_mask = 0x7 << 29,
+
+//VTX
+ EG_VTX_WORD0__VC_INST_shift = 0, //5 bits
+ EG_VTX_WORD0__VC_INST_mask = 0x1F,
+ EG_VTX_WORD0__FETCH_TYPE_shift = 5, //2 bits
+ EG_VTX_WORD0__FETCH_TYPE_mask = 0x3 << 5,
+ EG_VTX_WORD0__FWQ_shift = 7, //1 bit
+ EG_VTX_WORD0__FWQ_bit = 1 << 7,
+ EG_VTX_WORD0__BUFFER_ID_shift = 8, //8 bits
+ EG_VTX_WORD0__BUFFER_ID_mask = 0xFF << 8,
+ EG_VTX_WORD0__SRC_GPR_shift = 16,//7 bits
+ EG_VTX_WORD0__SRC_GPR_mask = 0x7F << 16,
+ EG_VTX_WORD0__SRC_REL_shift = 23,//1 bit
+ EG_VTX_WORD0__SRC_REL_bit = 1 << 23,
+ EG_VTX_WORD0__SRC_SEL_X_shift = 24,//2 bits
+ EG_VTX_WORD0__SRC_SEL_X_mask = 0x3 << 24,
+ EG_VTX_WORD0__MFC_shift = 26,//6 bits
+ EG_VTX_WORD0__MFC_mask = 0x3F << 26,
+
+ EG_VC_INST_FETCH = 0,
+ EG_VC_INST_SEMANTIC = 1,
+ EG_VC_INST_Reserved_2 = 2,
+ EG_VC_INST_Reserved_3 = 3,
+ EG_VC_INST_Reserved_4 = 4,
+ EG_VC_INST_Reserved_5 = 5,
+ EG_VC_INST_Reserved_6 = 6,
+ EG_VC_INST_Reserved_7 = 7,
+ EG_VC_INST_Reserved_8 = 8,
+ EG_VC_INST_Reserved_9 = 9,
+ EG_VC_INST_Reserved_10 = 10,
+ EG_VC_INST_Reserved_11 = 11,
+ EG_VC_INST_Reserved_12 = 12,
+ EG_VC_INST_Reserved_13 = 13,
+ EG_VC_INST_GET_BUFFER_RESINFO = 14,
+
+ EG_VTX_FETCH_VERTEX_DATA = 0,
+ EG_VTX_FETCH_INSTANCE_DATA = 1,
+ EG_VTX_FETCH_NO_INDEX_OFFSET = 2,
+
+ EG_VTX_WORD1_SEM__SEMANTIC_ID_shift = 0, //8 bits
+ EG_VTX_WORD1_SEM__SEMANTIC_ID_mask = 0xFF,
+ EG_VTX_WORD1_GPR__DST_GPR_shift = 0, //7 bits
+ EG_VTX_WORD1_GPR__DST_GPR_mask = 0x7F,
+ EG_VTX_WORD1_GPR__DST_REL_shift = 7, //1 bit
+ EG_VTX_WORD1_GPR__DST_REL_bit = 1 << 7,
+ EG_VTX_WORD1__Reserved_shift = 8, //1 bit
+ EG_VTX_WORD1__Reserved_bit = 1 << 8,
+ EG_VTX_WORD1__DST_SEL_X_shift = 9, //3 bits
+ EG_VTX_WORD1__DST_SEL_X_mask = 0x7 << 9,
+ EG_VTX_WORD1__DST_SEL_Y_shift = 12,//3 bits
+ EG_VTX_WORD1__DST_SEL_Y_mask = 0x7 << 12,
+ EG_VTX_WORD1__DST_SEL_Z_shift = 15,//3 bits
+ EG_VTX_WORD1__DST_SEL_Z_mask = 0x7 << 15,
+ EG_VTX_WORD1__DST_SEL_W_shift = 18,//3 bits
+ EG_VTX_WORD1__DST_SEL_W_mask = 0x7 << 18,
+ EG_VTX_WORD1__UCF_shift = 21,//1 bit
+ EG_VTX_WORD1__UCF_bit = 1 << 21,
+ EG_VTX_WORD1__DATA_FORMAT_shift = 22,//6 bits
+ EG_VTX_WORD1__DATA_FORMAT_mask = 0x3F << 22,
+ EG_VTX_WORD1__NFA_shift = 28,//2 bits
+ EG_VTX_WORD1__NFA_mask = 0x3 << 28,
+ EG_VTX_WORD1__FCA_shift = 30,//1 bit
+ EG_VTX_WORD1__FCA_bit = 1 << 30,
+ EG_VTX_WORD1__SMA_shift = 31,//1 bit
+ EG_VTX_WORD1__SMA_bit = 1 << 31,
+
+ EG_VTX_WORD2__OFFSET_shift = 0, //16 bits
+ EG_VTX_WORD2__OFFSET_mask = 0xFFFF,
+ EG_VTX_WORD2__ENDIAN_SWAP_shift = 16,//2 bits
+ EG_VTX_WORD2__ENDIAN_SWAP_mask = 0x3 << 16,
+ EG_VTX_WORD2__CBNS_shift = 18,//1 bit
+ EG_VTX_WORD2__CBNS_bit = 1 << 18,
+ EG_VTX_WORD2__MEGA_FETCH_shift = 19,//1 bit
+ EG_VTX_WORD2__MEGA_FETCH_mask = 1 << 19,
+ EG_VTX_WORD2__ALT_CONST_shift = 20,//1 bit
+ EG_VTX_WORD2__ALT_CONST_mask = 1 << 20,
+ EG_VTX_WORD2__BIM_shift = 21,//2 bits
+ EG_VTX_WORD2__BIM_mask = 0x3 << 21,
+ EG_VTX_WORD2__Reserved_shift = 23,//9 bits
+ EG_VTX_WORD2__Reserved_mask = 0x1FF << 23,
+
+//CF_ALU
+ EG_CF_ALU_WORD0__ADDR_shift = 0, //22 bits
+ EG_CF_ALU_WORD0__ADDR_mask = 0x3FFFFF,
+ EG_CF_ALU_WORD0__KCACHE_BANK0_shift = 22,//4 bits
+ EG_CF_ALU_WORD0__KCACHE_BANK0_mask = 0xF << 22,
+ EG_CF_ALU_WORD0__KCACHE_BANK1_shift = 26,//4 bits
+ EG_CF_ALU_WORD0__KCACHE_BANK1_mask = 0xF << 26,
+ EG_CF_ALU_WORD0__KCACHE_MODE0_shift = 30,//2 bits
+ EG_CF_ALU_WORD0__KCACHE_MODE0_mask = 0x3 << 30,
+
+ EG_CF_ALU_WORD1__KCACHE_MODE1_shift = 0, //2 bits
+ EG_CF_ALU_WORD1__KCACHE_MODE1_mask = 0x3,
+ EG_CF_ALU_WORD1__KCACHE_ADDR0_shift = 2, //8 bits
+ EG_CF_ALU_WORD1__KCACHE_ADDR0_mask = 0xFF << 2,
+ EG_CF_ALU_WORD1__KCACHE_ADDR1_shift = 10, //8 bits
+ EG_CF_ALU_WORD1__KCACHE_ADDR1_mask = 0xFF << 10,
+ EG_CF_ALU_WORD1__COUNT_shift = 18, //7 bits
+ EG_CF_ALU_WORD1__COUNT_mask = 0x7F << 18,
+ EG_CF_ALU_WORD1__ALT_CONST_shift = 25, //1 bit
+ EG_CF_ALU_WORD1__ALT_CONST_bit = 1 << 25,
+ EG_CF_ALU_WORD1__CF_INST_shift = 26, //4 bits
+ EG_CF_ALU_WORD1__CF_INST_mask = 0xF << 26,
+ EG_CF_ALU_WORD1__WQM_shift = 30, //1 bit
+ EG_CF_ALU_WORD1__WQM_bit = 1 << 30,
+ EG_CF_ALU_WORD1__BARRIER_shift = 31, //1 bit
+ EG_CF_ALU_WORD1__BARRIER_bit = 1 << 31,
+
+ EG_CF_INST_ALU = 8,
+ EG_CF_INST_ALU_PUSH_BEFORE = 9,
+ EG_CF_INST_ALU_POP_AFTER = 10,
+ EG_CF_INST_ALU_POP2_AFTER = 11,
+ EG_CF_INST_ALU_EXTENDED = 12,
+ EG_CF_INST_ALU_CONTINUE = 13,
+ EG_CF_INST_ALU_BREAK = 14,
+ EG_CF_INST_ALU_ELSE_AFTER = 15,
+
+ EG_CF_ALU_WORD0_EXT__Reserved0_shift = 0, //4 bits
+ EG_CF_ALU_WORD0_EXT__Reserved0_mask = 0xF,
+ EG_CF_ALU_WORD0_EXT__KBIM0_shift = 4, //2 bits
+ EG_CF_ALU_WORD0_EXT__KBIM0_mask = 0x3 << 4,
+ EG_CF_ALU_WORD0_EXT__KBIM1_shift = 6, //2 bits
+ EG_CF_ALU_WORD0_EXT__KBIM1_mask = 0x3 << 6,
+ EG_CF_ALU_WORD0_EXT__KBIM2_shift = 8, //2 bits
+ EG_CF_ALU_WORD0_EXT__KBIM2_mask = 0x3 << 8,
+ EG_CF_ALU_WORD0_EXT__KBIM3_shift = 10,//2 bits
+ EG_CF_ALU_WORD0_EXT__KBIM3_mask = 0x3 << 10,
+ EG_CF_ALU_WORD0_EXT__Reserved12_shift = 12,//10 bits
+ EG_CF_ALU_WORD0_EXT__Reserved12_mask = 0x3FF << 12,
+ EG_CF_ALU_WORD0_EXT__KCACHE_BANK2_shift = 22,//4 bits
+ EG_CF_ALU_WORD0_EXT__KCACHE_BANK2_mask = 0xF << 22,
+ EG_CF_ALU_WORD0_EXT__KCACHE_BANK3_shift = 26,//4 bits
+ EG_CF_ALU_WORD0_EXT__KCACHE_BANK3_mask = 0xF << 26,
+ EG_CF_ALU_WORD0_EXT__KCACHE_MODE2_shift = 30,//2 btis
+ EG_CF_ALU_WORD0_EXT__KCACHE_MODE2_mask = 0x3 << 30,
+
+ EG_CF_ALU_WORD1_EXT__KCACHE_MODE3_shift = 0, //2 bits
+ EG_CF_ALU_WORD1_EXT__KCACHE_MODE3_mask = 0x3,
+ EG_CF_ALU_WORD1_EXT__KCACHE_ADDR2_shift = 2, //8 bits
+ EG_CF_ALU_WORD1_EXT__KCACHE_ADDR2_mask = 0xFF << 2,
+ EG_CF_ALU_WORD1_EXT__KCACHE_ADDR3_shift = 10, //8 bits
+ EG_CF_ALU_WORD1_EXT__KCACHE_ADDR3_mask = 0xFF << 10,
+ EG_CF_ALU_WORD1_EXT__Reserved18_shift = 18, //8 bits
+ EG_CF_ALU_WORD1_EXT__Reserved18_mask = 0xFF << 18,
+ EG_CF_ALU_WORD1_EXT__CF_INST_shift = 26, //4 bits
+ EG_CF_ALU_WORD1_EXT__CF_INST_mask = 0xF << 26,
+ EG_CF_ALU_WORD1_EXT__Reserved30_shift = 30, //1 bit
+ EG_CF_ALU_WORD1_EXT__Reserved30_bit = 1 << 30,
+ EG_CF_ALU_WORD1_EXT__BARRIER_shift = 31, //1 bit
+ EG_CF_ALU_WORD1_EXT__BARRIER_bit = 1 << 31,
+
+//ALU
+ EG_ALU_WORD0__SRC0_SEL_shift = 0, //9 bits
+ EG_ALU_WORD0__SRC0_SEL_mask = 0x1FF,
+ EG_ALU_WORD0__SRC1_SEL_shift = 13,//9 bits
+ EG_ALU_WORD0__SRC1_SEL_mask = 0x1FF << 13,
+ EG_ALU_WORD0__SRC0_REL_shift = 9, //1 bit
+ EG_ALU_WORD0__SRC0_REL_bit = 1 << 9,
+ EG_ALU_WORD0__SRC1_REL_shift = 22,//1 bit
+ EG_ALU_WORD0__SRC1_REL_bit = 1 << 22,
+ EG_ALU_WORD0__SRC0_CHAN_shift = 10,//2 bits
+ EG_ALU_WORD0__SRC0_CHAN_mask = 0x3 << 10,
+ EG_ALU_WORD0__SRC1_CHAN_shift = 23,//2 bits
+ EG_ALU_WORD0__SRC1_CHAN_mask = 0x3 << 23,
+ EG_ALU_WORD0__SRC0_NEG_shift = 12,//1 bit
+ EG_ALU_WORD0__SRC0_NEG_bit = 1 << 12,
+ EG_ALU_WORD0__SRC1_NEG_shift = 25,//1 bit
+ EG_ALU_WORD0__SRC1_NEG_bit = 1 << 25,
+ EG_ALU_WORD0__INDEX_MODE_shift = 26,//3 bits
+ EG_ALU_WORD0__INDEX_MODE_mask = 0x7 << 26,
+ EG_ALU_WORD0__PRED_SEL_shift = 29,//2 bits
+ EG_ALU_WORD0__PRED_SEL_mask = 0x3 << 29,
+ EG_ALU_WORD0__LAST_shift = 31,//1 bit
+ EG_ALU_WORD0__LAST_bit = 1 << 31,
+
+ EG_ALU_WORD1_OP2__SRC0_ABS_shift = 0, //1 bit
+ EG_ALU_WORD1_OP2__SRC0_ABS_bit = 1,
+ EG_ALU_WORD1_OP2__SRC1_ABS_shift = 1, //1 bit
+ EG_ALU_WORD1_OP2__SRC1_ABS_bit = 1 << 1,
+ EG_ALU_WORD1_OP2__UEM_shift = 2, //1 bit
+ EG_ALU_WORD1_OP2__UEM_bit = 1 << 2,
+ EG_ALU_WORD1_OP2__UPDATE_PRED_shift = 3, //1 bit
+ EG_ALU_WORD1_OP2__UPDATE_PRED_bit = 1 << 3,
+ EG_ALU_WORD1_OP2__WRITE_MASK_shift = 4, //1 bit
+ EG_ALU_WORD1_OP2__WRITE_MASK_bit = 1 << 4,
+ EG_ALU_WORD1_OP2__OMOD_shift = 5, //2 bits
+ EG_ALU_WORD1_OP2__OMOD_mask = 0x3 << 5,
+ EG_ALU_WORD1_OP2__ALU_INST_shift = 7, //11 bits
+ EG_ALU_WORD1_OP2__ALU_INST_mask = 0x7FF << 7,
+
+ EG_ALU_WORD1__BANK_SWIZZLE_shift = 18,//3 bits
+ EG_ALU_WORD1__BANK_SWIZZLE_mask = 0x7 << 18,
+ EG_ALU_WORD1__DST_GPR_shift = 21,//7 bits
+ EG_ALU_WORD1__DST_GPR_mask = 0x7F << 21,
+ EG_ALU_WORD1__DST_REL_shift = 28,//1 bit
+ EG_ALU_WORD1__DST_REL_mask = 1 << 28,
+ EG_ALU_WORD1__DST_CHAN_shift = 29,//2 bits
+ EG_ALU_WORD1__DST_CHAN_mask = 0x3 << 29,
+ EG_ALU_WORD1__CLAMP_shift = 31,//1 bits
+ EG_ALU_WORD1__CLAMP_mask = 1 << 31,
+
+ EG_ALU_WORD1_OP3__SRC2_SEL_shift = 0, //9 bits
+ EG_ALU_WORD1_OP3__SRC2_SEL_mask = 0x1FF,
+ EG_ALU_WORD1_OP3__SRC2_REL_shift = 9, //1 bit
+ EG_ALU_WORD1_OP3__SRC2_REL_bit = 1 << 9,
+ EG_ALU_WORD1_OP3__SRC2_CHAN_shift = 10,//2 bits
+ EG_ALU_WORD1_OP3__SRC2_CHAN_mask = 0x3 << 10,
+ EG_ALU_WORD1_OP3__SRC2_NEG_shift = 12,//1 bit
+ EG_ALU_WORD1_OP3__SRC2_NEG_bit = 1 << 12,
+ EG_ALU_WORD1_OP3__ALU_INST_shift = 13,//5 bits
+ EG_ALU_WORD1_OP3__ALU_INST_mask = 0x1F << 13,
+
+ EG_OP3_INST_BFE_UINT = 4,
+ EG_OP3_INST_BFE_INT = 5,
+ EG_OP3_INST_BFI_INT = 6,
+ EG_OP3_INST_FMA = 7,
+ EG_OP3_INST_CNDNE_64 = 9,
+ EG_OP3_INST_FMA_64 = 10,
+ EG_OP3_INST_LERP_UINT = 11,
+ EG_OP3_INST_BIT_ALIGN_INT = 12,
+ EG_OP3_INST_BYTE_ALIGN_INT = 13,
+ EG_OP3_INST_SAD_ACCUM_UINT = 14,
+ EG_OP3_INST_SAD_ACCUM_HI_UINT = 15,
+ EG_OP3_INST_MULADD_UINT24 = 16,
+ EG_OP3_INST_LDS_IDX_OP = 17,
+ EG_OP3_INST_MULADD = 20,
+ EG_OP3_INST_MULADD_M2 = 21,
+ EG_OP3_INST_MULADD_M4 = 22,
+ EG_OP3_INST_MULADD_D2 = 23,
+ EG_OP3_INST_MULADD_IEEE = 24,
+ EG_OP3_INST_CNDE = 25,
+ EG_OP3_INST_CNDGT = 26,
+ EG_OP3_INST_CNDGE = 27,
+ EG_OP3_INST_CNDE_INT = 28,
+ EG_OP3_INST_CMNDGT_INT = 29,
+ EG_OP3_INST_CMNDGE_INT = 30,
+ EG_OP3_INST_MUL_LIT = 31,
+
+ EG_OP2_INST_ADD = 0,
+ EG_OP2_INST_MUL = 1,
+ EG_OP2_INST_MUL_IEEE = 2,
+ EG_OP2_INST_MAX = 3,
+ EG_OP2_INST_MIN = 4,
+ EG_OP2_INST_MAX_DX10 = 5,
+ EG_OP2_INST_MIN_DX10 = 6,
+ EG_OP2_INST_SETE = 8,
+ EG_OP2_INST_SETGT = 9,
+ EG_OP2_INST_SETGE = 10,
+ EG_OP2_INST_SETNE = 11,
+ EG_OP2_INST_SETE_DX10 = 12,
+ EG_OP2_INST_SETGT_DX10 = 13,
+ EG_OP2_INST_SETGE_DX10 = 14,
+ EG_OP2_INST_SETNE_DX10 = 15,
+ EG_OP2_INST_FRACT = 16,
+ EG_OP2_INST_TRUNC = 17,
+ EG_OP2_INST_CEIL = 18,
+ EG_OP2_INST_RNDNE = 19,
+ EG_OP2_INST_FLOOR = 20,
+ EG_OP2_INST_ASHR_INT = 21,
+ EG_OP2_INST_LSHR_INT = 22,
+ EG_OP2_INST_LSHL_INT = 23,
+ EG_OP2_INST_MOV = 25,
+ EG_OP2_INST_NOP = 26,
+ EG_OP2_INST_MUL_64 = 27,
+ EG_OP2_INST_FLT64_TO_FLT32 = 28,
+ EG_OP2_INST_FLT32_TO_FLT64 = 29,
+ EG_OP2_INST_PRED_SETGT_UINT = 30,
+ EG_OP2_INST_PRED_SETGE_UINT = 31,
+ EG_OP2_INST_PRED_SETE = 32,
+ EG_OP2_INST_PRED_SETGT = 33,
+ EG_OP2_INST_PRED_SETGE = 34,
+ EG_OP2_INST_PRED_SETNE = 35,
+ EG_OP2_INST_PRED_SET_INV = 36,
+ EG_OP2_INST_PRED_SET_POP = 37,
+ EG_OP2_INST_PRED_SET_CLR = 38,
+ EG_OP2_INST_PRED_SET_RESTORE = 39,
+ EG_OP2_INST_PRED_SETE_PUSH = 40,
+ EG_OP2_INST_PRED_SETGT_PUSH = 41,
+ EG_OP2_INST_PRED_SETGE_PUSH = 42,
+ EG_OP2_INST_PRED_SETNE_PUSH = 43,
+ EG_OP2_INST_KILLE = 44,
+ EG_OP2_INST_KILLGT = 45,
+ EG_OP2_INST_KILLGE = 46,
+ EG_OP2_INST_KILLNE = 47,
+ EG_OP2_INST_AND_INT = 48,
+ EG_OP2_INST_OR_INT = 49,
+ EG_OP2_INST_XOR_INT = 50,
+ EG_OP2_INST_NOT_INT = 51,
+ EG_OP2_INST_ADD_INT = 52,
+ EG_OP2_INST_SUB_INT = 53,
+ EG_OP2_INST_MAX_INT = 54,
+ EG_OP2_INST_MIN_INT = 55,
+ EG_OP2_INST_MAX_UINT = 56,
+ EG_OP2_INST_MIN_UINT = 57,
+ EG_OP2_INST_SETE_INT = 58,
+ EG_OP2_INST_SETGT_INT = 59,
+ EG_OP2_INST_SETGE_INT = 60,
+ EG_OP2_INST_SETNE_INT = 61,
+ EG_OP2_INST_SETGT_UINT = 62,
+ EG_OP2_INST_SETGE_UINT = 63,
+ EG_OP2_INST_KILLGT_UINT = 64,
+ EG_OP2_INST_KILLGE_UINT = 65,
+ EG_OP2_INST_PREDE_INT = 66,
+ EG_OP2_INST_PRED_SETGT_INT = 67,
+ EG_OP2_INST_PRED_SETGE_INT = 68,
+ EG_OP2_INST_PRED_SETNE_INT = 69,
+ EG_OP2_INST_KILLE_INT = 70,
+ EG_OP2_INST_KILLGT_INT = 71,
+ EG_OP2_INST_KILLGE_INT = 72,
+ EG_OP2_INST_KILLNE_INT = 73,
+ EG_OP2_INST_PRED_SETE_PUSH_INT = 74,
+ EG_OP2_INST_PRED_SETGT_PUSH_INT = 75,
+ EG_OP2_INST_PRED_SETGE_PUSH_INT = 76,
+ EG_OP2_INST_PRED_SETNE_PUSH_INT = 77,
+ EG_OP2_INST_PRED_SETLT_PUSH_INT = 78,
+ EG_OP2_INST_PRED_SETLE_PUSH_INT = 79,
+ EG_OP2_INST_FLT_TO_INT = 80,
+ EG_OP2_INST_BFREV_INT = 81,
+ EG_OP2_INST_ADDC_UINT = 82,
+ EG_OP2_INST_SUBB_UINT = 83,
+ EG_OP2_INST_GROUP_BARRIER = 84,
+ EG_OP2_INST_GROUP_SEQ_BEGIN = 85,
+ EG_OP2_INST_GROUP_SEQ_END = 86,
+ EG_OP2_INST_SET_MODE = 87,
+ EG_OP2_INST_SET_CF_IDX0 = 88,
+ EG_OP2_INST_SET_CF_IDX1 = 89,
+ EG_OP2_INST_SET_LDS_SIZE = 90,
+ EG_OP2_INST_EXP_IEEE = 129,
+ EG_OP2_INST_LOG_CLAMPED = 130,
+ EG_OP2_INST_LOG_IEEE = 131,
+ EG_OP2_INST_RECIP_CLAMPED = 132,
+ EG_OP2_INST_RECIP_FF = 133,
+ EG_OP2_INST_RECIP_IEEE = 134,
+ EG_OP2_INST_RECIPSQRT_CLAMPED = 135,
+ EG_OP2_INST_RECIPSQRT_FF = 136,
+ EG_OP2_INST_RECIPSQRT_IEEE = 137,
+ EG_OP2_INST_SQRT_IEEE = 138,
+ EG_OP2_INST_SIN = 141,
+ EG_OP2_INST_COS = 142,
+ EG_OP2_INST_MULLO_INT = 143,
+ EG_OP2_INST_MULHI_INT = 144,
+ EG_OP2_INST_MULLO_UINT = 145,
+ EG_OP2_INST_MULHI_UINT = 146,
+ EG_OP2_INST_RECIP_INT = 147,
+ EG_OP2_INST_RECIP_UINT = 148,
+ EG_OP2_INST_RECIP_64 = 149,
+ EG_OP2_INST_RECIP_CLAMPED_64 = 150,
+ EG_OP2_INST_RECIPSQRT_64 = 151,
+ EG_OP2_INST_RECIPSQRT_CLAMPED_64 = 152,
+ EG_OP2_INST_SQRT_64 = 153,
+ EG_OP2_INST_FLT_TO_UINT = 154,
+ EG_OP2_INST_INT_TO_FLT = 155,
+ EG_OP2_INST_UINT_TO_FLT = 156,
+ EG_OP2_INST_BFM_INT = 160,
+ EG_OP2_INST_FLT32_TO_FLT16 = 162,
+ EG_OP2_INST_FLT16_TO_FLT32 = 163,
+ EG_OP2_INST_UBYTE0_FLT = 164,
+ EG_OP2_INST_UBYTE1_FLT = 165,
+ EG_OP2_INST_UBYTE2_FLT = 166,
+ EG_OP2_INST_UBYTE3_FLT = 167,
+ EG_OP2_INST_BCNT_INT = 170,
+ EG_OP2_INST_FFBH_UINT = 171,
+ EG_OP2_INST_FFBL_INT = 172,
+ EG_OP2_INST_FFBH_INT = 173,
+ EG_OP2_INST_FLT_TO_UINT4 = 174,
+ EG_OP2_INST_DOT_IEEE = 175,
+ EG_OP2_INST_FLT_TO_INT_RPI = 176,
+ EG_OP2_INST_FLT_TO_INT_FLOOR = 177,
+ EG_OP2_INST_MULHI_UINT24 = 178,
+ EG_OP2_INST_MBCNT_32HI_INT = 179,
+ EG_OP2_INST_OFFSET_TO_FLT = 180,
+ EG_OP2_INST_MUL_UINT24 = 181,
+ EG_OP2_INST_BCNT_ACCUM_PREV_INT = 182,
+ EG_OP2_INST_MBCNT_32LO_ACCUM_PREV_INT = 183,
+ EG_OP2_INST_SETE_64 = 184,
+ EG_OP2_INST_SETNE_64 = 185,
+ EG_OP2_INST_SETGT_64 = 186,
+ EG_OP2_INST_SETGE_64 = 187,
+ EG_OP2_INST_MIN_64 = 188,
+ EG_OP2_INST_MAX_64 = 189,
+ EG_OP2_INST_DOT4 = 190,
+ EG_OP2_INST_DOT4_IEEE = 191,
+ EG_OP2_INST_CUBE = 192,
+ EG_OP2_INST_MAX4 = 193,
+ EG_OP2_INST_FREXP_64 = 196,
+ EG_OP2_INST_LDEXP_64 = 197,
+ EG_OP2_INST_FRACT_64 = 198,
+ EG_OP2_INST_PRED_SETGT_64 = 199,
+ EG_OP2_INST_PRED_SETE_64 = 200,
+ EG_OP2_INST_PRED_SETGE_64 = 201,
+ EG_OP2_INST_MUL_64_2 = 202, //same as prev?
+ EG_OP2_INST_ADD_64 = 203,
+ EG_OP2_INST_MOVA_INT = 204,
+ EG_OP2_INST_FLT64_TO_FLT32_2 = 205, //same as prev?
+ EG_OP2_INST_FLT32_TO_FLT64_2 = 206, //same as prev?
+ EG_OP2_INST_SAD_ACCUM_PREV_UINT = 207,
+ EG_OP2_INST_DOT = 208,
+ EG_OP2_INST_MUL_PREV = 209,
+ EG_OP2_INST_MUL_IEEE_PREV = 210,
+ EG_OP2_INST_ADD_PREV = 211,
+ EG_OP2_INST_MULADD_PREV = 212,
+ EG_OP2_INST_MULADD_IEEE_PREV = 213,
+ EG_OP2_INST_INTERP_XY = 214,
+ EG_OP2_INST_INTERP_ZW = 215,
+ EG_OP2_INST_INTERP_X = 216,
+ EG_OP2_INST_INTERP_Z = 217,
+ EG_OP2_INST_STORE_FLAGS = 218,
+ EG_OP2_INST_LOAD_STORE_FLAGS = 219,
+ EG_OP2_INST_LDS_1A = 220,
+ EG_OP2_INST_LDS_1A1D = 221,
+ EG_OP2_INST_LDS_2A = 223,
+ EG_OP2_INST_INTERP_LOAD_P0 = 224,
+ EG_OP2_INST_INTERP_LOAD_P10 = 225,
+ EG_OP2_INST_INTERP_LOAD_P20 = 226,
+
+ EG_SRC_SEL__GPR_start = 0,
+ EG_SRC_SEL__GPR_end = 127,
+ EG_SRC_SEL__KCONST_BANK0_start = 128,
+ EG_SRC_SEL__KCONST_BANK0_end = 159,
+ EG_SRC_SEL__KCONST_BANK1_start = 160,
+ EG_SRC_SEL__KCONST_BANK1_end = 191,
+ EG_SRC_SEL__INLINE_satrt = 192,
+ EG_SRC_SEL__INLINE_end = 255,
+ EG_SRC_SEL__KCONST_BANK2_start = 256,
+ EG_SRC_SEL__KCONST_BANK2_end = 287,
+ EG_SRC_SEL__KCONST_BANK3_start = 288,
+ EG_SRC_SEL__KCONST_BANK3_end = 319,
+ EG_SRC_SEL__ALU_SRC_LDS_OQ_A = 219,
+ EG_SRC_SEL__ALU_SRC_LDS_OQ_B = 220,
+ EG_SRC_SEL__ALU_SRC_LDS_OQ_A_POP = 221,
+ EG_SRC_SEL__ALU_SRC_LDS_OQ_B_POP = 222,
+ EG_SRC_SEL__ALU_SRC_LDS_DIRECT_A = 223,
+ EG_SRC_SEL__ALU_SRC_LDS_DIRECT_B = 224,
+ EG_SRC_SEL__ALU_SRC_TIME_HI = 227,
+ EG_SRC_SEL__ALU_SRC_TIME_LO = 228,
+ EG_SRC_SEL__ALU_SRC_MASK_HI = 229,
+ EG_SRC_SEL__ALU_SRC_MASK_LO = 230,
+ EG_SRC_SEL__ALU_SRC_HW_WAVE_ID = 231,
+ EG_SRC_SEL__ALU_SRC_SIMD_ID = 232,
+ EG_SRC_SEL__ALU_SRC_SE_ID = 233,
+ EG_SRC_SEL__ALU_SRC_HW_THREADGRP_ID = 234,
+ EG_SRC_SEL__ALU_SRC_WAVE_ID_IN_GRP = 235,
+ EG_SRC_SEL__ALU_SRC_NUM_THREADGRP_WAVES = 236,
+ EG_SRC_SEL__ALU_SRC_HW_ALU_ODD = 237,
+ EG_SRC_SEL__ALU_SRC_LOOP_IDX = 238,
+ EG_SRC_SEL__ALU_SRC_PARAM_BASE_ADDR = 240,
+ EG_SRC_SEL__ALU_SRC_NEW_PRIM_MASK = 241,
+ EG_SRC_SEL__ALU_SRC_PRIM_MASK_HI = 242,
+ EG_SRC_SEL__ALU_SRC_PRIM_MASK_LO = 243,
+ EG_SRC_SEL__ALU_SRC_1_DBL_L = 244,
+ EG_SRC_SEL__ALU_SRC_1_DBL_M = 245,
+ EG_SRC_SEL__ALU_SRC_0_5_DBL_L = 246,
+ EG_SRC_SEL__ALU_SRC_0_5_DBL_M = 247,
+ EG_SRC_SEL__ALU_SRC_0 = 248,
+ EG_SRC_SEL__ALU_SRC_1 = 249,
+ EG_SRC_SEL__ALU_SRC_1_INT = 250,
+ EG_SRC_SEL__ALU_SRC_M_1_INT = 251,
+ EG_SRC_SEL__ALU_SRC_0_5 = 252,
+ EG_SRC_SEL__ALU_SRC_LITERAL = 253,
+ EG_SRC_SEL__ALU_SRC_PV = 254,
+ EG_SRC_SEL__ALU_SRC_PS = 255,
+
+//ALLOC_EXPORT
+ EG_CF_ALLOC_EXPORT_WORD0__ARRAY_BASE_shift = 0, //13 bits
+ EG_CF_ALLOC_EXPORT_WORD0__ARRAY_BASE_mask = 0x1FFF,
+ EG_CF_ALLOC_EXPORT_WORD0__TYPE_shift = 13,//2 bits
+ EG_CF_ALLOC_EXPORT_WORD0__TYPE_mask = 0x3 << 13,
+ EG_CF_ALLOC_EXPORT_WORD0__RW_GPR_shift = 15,//7 bits
+ EG_CF_ALLOC_EXPORT_WORD0__RW_GPR_mask = 0x7F << 15,
+ EG_CF_ALLOC_EXPORT_WORD0__RW_REL_shift = 22,//1 bit
+ EG_CF_ALLOC_EXPORT_WORD0__RW_REL_bit = 1 << 22,
+ EG_CF_ALLOC_EXPORT_WORD0__INDEX_GPR_shift = 23,//7 bits
+ EG_CF_ALLOC_EXPORT_WORD0__INDEX_GPR_mask = 0x7F << 23,
+ EG_CF_ALLOC_EXPORT_WORD0__ELEM_SIZE_shift = 30,//2 bits
+ EG_CF_ALLOC_EXPORT_WORD0__ELEM_SIZE_mask = 0x3 << 30,
+
+ EG_CF_ALLOC_EXPORT_WORD1_BUF__ARRAY_SIZE_shift = 0, //12 bits
+ EG_CF_ALLOC_EXPORT_WORD1_BUF__ARRAY_SIZE_mask = 0xFFF,
+ EG_CF_ALLOC_EXPORT_WORD1_BUF__COMP_MASK_shift = 12, //4 bits
+ EG_CF_ALLOC_EXPORT_WORD1_BUF__COMP_MASK_mask = 0xF << 12,
+
+ EG_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_X_shift = 0, //3 bits
+ EG_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_X_mask = 0x7,
+ EG_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_Y_shift = 3, //3 bits
+ EG_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_Y_mask = 0x7 << 3,
+ EG_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_Z_shift = 6, //3 bits
+ EG_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_Z_mask = 0x7 << 6,
+ EG_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_W_shift = 9, //3 bits
+ EG_CF_ALLOC_EXPORT_WORD1_SWIZ__SEL_W_mask = 0x7 << 9,
+ EG_CF_ALLOC_EXPORT_WORD1_SWIZ__Resreve_shift = 12,//4 bits
+ EG_CF_ALLOC_EXPORT_WORD1_SWIZ__Resreve_mask = 0xF << 12,
+
+ EG_CF_ALLOC_EXPORT_WORD1__BURST_COUNT_shift = 16, //4 bits
+ EG_CF_ALLOC_EXPORT_WORD1__BURST_COUNT_mask = 0xF << 16,
+ EG_CF_ALLOC_EXPORT_WORD1__VPM_shift = 20, //1 bit
+ EG_CF_ALLOC_EXPORT_WORD1__VPM_bit = 1 << 20,
+ EG_CF_ALLOC_EXPORT_WORD1__EOP_shift = 21, //1 bit
+ EG_CF_ALLOC_EXPORT_WORD1__EOP_bit = 1 << 21,
+ EG_CF_ALLOC_EXPORT_WORD1__CF_INST_shift = 22, //8 bits
+ EG_CF_ALLOC_EXPORT_WORD1__CF_INST_mask = 0xFF << 22,
+ EG_CF_ALLOC_EXPORT_WORD1__MARK_shift = 30, //1 bit
+ EG_CF_ALLOC_EXPORT_WORD1__MARK_bit = 1 << 30,
+ EG_CF_ALLOC_EXPORT_WORD1__BARRIER_shift = 31, //1 bit
+ EG_CF_ALLOC_EXPORT_WORD1__BARRIER_bit = 1 << 31,
+
+ EG_CF_INST_MEM_STREAM0_BUF0 = 64 ,
+ EG_CF_INST_MEM_STREAM0_BUF1 = 65,
+ EG_CF_INST_MEM_STREAM0_BUF2 = 66,
+ EG_CF_INST_MEM_STREAM0_BUF3 = 67,
+ EG_CF_INST_MEM_STREAM1_BUF0 = 68,
+ EG_CF_INST_MEM_STREAM1_BUF1 = 69,
+ EG_CF_INST_MEM_STREAM1_BUF2 = 70,
+ EG_CF_INST_MEM_STREAM1_BUF3 = 71,
+ EG_CF_INST_MEM_STREAM2_BUF0 = 72,
+ EG_CF_INST_MEM_STREAM2_BUF1 = 73,
+ EG_CF_INST_MEM_STREAM2_BUF2 = 74,
+ EG_CF_INST_MEM_STREAM2_BUF3 = 75,
+ EG_CF_INST_MEM_STREAM3_BUF0 = 76,
+ EG_CF_INST_MEM_STREAM3_BUF1 = 77,
+ EG_CF_INST_MEM_STREAM3_BUF2 = 78,
+ EG_CF_INST_MEM_STREAM3_BUF3 = 79,
+ EG_CF_INST_MEM_WR_SCRATCH = 80,
+ EG_CF_INST_MEM_RING = 82,
+ EG_CF_INST_EXPORT = 83,
+ EG_CF_INST_EXPORT_DONE = 84,
+ EG_CF_INST_MEM_EXPORT = 85,
+ EG_CF_INST_MEM_RAT = 86,
+ EG_CF_INST_MEM_RAT_CACHELESS = 87,
+ EG_CF_INST_MEM_RING1 = 88,
+ EG_CF_INST_MEM_RING2 = 89,
+ EG_CF_INST_MEM_RING3 = 90,
+ EG_CF_INST_MEM_EXPORT_COMBINED = 91,
+ EG_CF_INST_MEM_RAT_COMBINED_CACHELESS = 92,
+
+ EG_EXPORT_PIXEL = 0,
+ EG_EXPORT_WRITE = 0,
+ EG_EXPORT_POS = 1,
+ EG_EXPORT_WRITE_IND = 1,
+ EG_EXPORT_PARAM = 2,
+ EG_EXPORT_WRITE_ACK = 2,
+ EG_EXPORT_WRITE_IND_ACK = 3,
+
+ /* PS interp param source */
+ EG_ALU_SRC_PARAM_BASE = 0x000001c0,
+ EG_ALU_SRC_PARAM_SIZE = 0x00000021,
+};
+
+#endif //_EVERGREEN_SQ_H_
+
+
diff --git a/src/mesa/drivers/dri/r600/evergreen_state.c b/src/mesa/drivers/dri/r600/evergreen_state.c
new file mode 100644
index 0000000000..931478caa5
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/evergreen_state.c
@@ -0,0 +1,1878 @@
+/*
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/imports.h"
+#include "main/enums.h"
+#include "main/macros.h"
+#include "main/context.h"
+#include "main/dd.h"
+#include "main/simple_list.h"
+
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "main/api_arrayelt.h"
+#include "main/framebuffer.h"
+#include "drivers/common/meta.h"
+#include "program/prog_parameter.h"
+#include "program/prog_statevars.h"
+
+#include "vbo/vbo.h"
+
+#include "r600_context.h"
+
+#include "evergreen_state.h"
+#include "evergreen_diff.h"
+#include "evergreen_vertprog.h"
+#include "evergreen_fragprog.h"
+#include "evergreen_tex.h"
+
+void evergreenUpdateStateParameters(GLcontext * ctx, GLuint new_state); //same
+
+void evergreenUpdateShaders(GLcontext * ctx)
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+
+ /* should only happenen once, just after context is created */
+ /* TODO: shouldn't we fallback to sw here? */
+ if (!ctx->FragmentProgram._Current) {
+ fprintf(stderr, "No ctx->FragmentProgram._Current!!\n");
+ return;
+ }
+
+ evergreenSelectFragmentShader(ctx);
+
+ evergreenSelectVertexShader(ctx);
+ evergreenUpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
+ context->radeon.NewGLState = 0;
+}
+
+void evergreeUpdateShaders(GLcontext * ctx)
+{
+ context_t *context = R700_CONTEXT(ctx);
+
+ /* should only happenen once, just after context is created */
+ /* TODO: shouldn't we fallback to sw here? */
+ if (!ctx->FragmentProgram._Current) {
+ fprintf(stderr, "No ctx->FragmentProgram._Current!!\n");
+ return;
+ }
+
+ evergreenSelectFragmentShader(ctx);
+
+ evergreenSelectVertexShader(ctx);
+ evergreenUpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
+ context->radeon.NewGLState = 0;
+}
+
+/*
+ * To correctly position primitives:
+ */
+void evergreenUpdateViewportOffset(GLcontext * ctx) //------------------
+{
+ context_t *context = R700_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+ __DRIdrawable *dPriv = radeon_get_drawable(&context->radeon);
+ GLfloat xoffset = (GLfloat) dPriv->x;
+ GLfloat yoffset = (GLfloat) dPriv->y + dPriv->h;
+ const GLfloat *v = ctx->Viewport._WindowMap.m;
+ int id = 0;
+
+ GLfloat tx = v[MAT_TX] + xoffset;
+ GLfloat ty = (-v[MAT_TY]) + yoffset;
+
+ if (evergreen->viewport[id].PA_CL_VPORT_XOFFSET.f32All != tx ||
+ evergreen->viewport[id].PA_CL_VPORT_YOFFSET.f32All != ty) {
+ /* Note: this should also modify whatever data the context reset
+ * code uses...
+ */
+ EVERGREEN_STATECHANGE(context, pa);
+ evergreen->viewport[id].PA_CL_VPORT_XOFFSET.f32All = tx;
+ evergreen->viewport[id].PA_CL_VPORT_YOFFSET.f32All = ty;
+ }
+
+ radeonUpdateScissor(ctx);
+}
+
+void evergreenUpdateStateParameters(GLcontext * ctx, GLuint new_state) //same
+{
+ struct evergreen_fragment_program *fp =
+ (struct evergreen_fragment_program *)ctx->FragmentProgram._Current;
+ struct gl_program_parameter_list *paramList;
+
+ if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS)))
+ return;
+
+ if (!ctx->FragmentProgram._Current || !fp)
+ return;
+
+ paramList = ctx->FragmentProgram._Current->Base.Parameters;
+
+ if (!paramList)
+ return;
+
+ _mesa_load_state_parameters(ctx, paramList);
+
+}
+
+/**
+ * Called by Mesa after an internal state update.
+ */
+static void evergreenInvalidateState(GLcontext * ctx, GLuint new_state) //same
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+
+ _swrast_InvalidateState(ctx, new_state);
+ _swsetup_InvalidateState(ctx, new_state);
+ _vbo_InvalidateState(ctx, new_state);
+ _tnl_InvalidateState(ctx, new_state);
+ _ae_invalidate_state(ctx, new_state);
+
+ if (new_state & _NEW_BUFFERS) {
+ _mesa_update_framebuffer(ctx);
+ /* this updates the DrawBuffer's Width/Height if it's a FBO */
+ _mesa_update_draw_buffer_bounds(ctx);
+
+ EVERGREEN_STATECHANGE(context, cb);
+ EVERGREEN_STATECHANGE(context, db);
+ }
+
+ if (new_state & (_NEW_LIGHT)) {
+ EVERGREEN_STATECHANGE(context, pa);
+ if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION)
+ SETbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, PROVOKING_VTX_LAST_bit);
+ else
+ CLEARbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, PROVOKING_VTX_LAST_bit);
+ }
+
+ evergreenUpdateStateParameters(ctx, new_state);
+
+ EVERGREEN_STATECHANGE(context, pa);
+ EVERGREEN_STATECHANGE(context, spi);
+
+ if(GL_TRUE == evergreen->bEnablePerspective)
+ {
+ /* Do scale XY and Z by 1/W0 for perspective correction on pos. For orthogonal case, set both to one. */
+ CLEARbit(evergreen->PA_CL_VTE_CNTL.u32All, VTX_XY_FMT_bit);
+ CLEARbit(evergreen->PA_CL_VTE_CNTL.u32All, VTX_Z_FMT_bit);
+
+ SETbit(evergreen->PA_CL_VTE_CNTL.u32All, VTX_W0_FMT_bit);
+
+ SETbit(evergreen->SPI_PS_IN_CONTROL_0.u32All, PERSP_GRADIENT_ENA_bit);
+ CLEARbit(evergreen->SPI_PS_IN_CONTROL_0.u32All, LINEAR_GRADIENT_ENA_bit);
+
+ SETfield(evergreen->SPI_BARYC_CNTL.u32All, 1,
+ EG_SPI_BARYC_CNTL__PERSP_CENTROID_ENA_shift,
+ EG_SPI_BARYC_CNTL__PERSP_CENTROID_ENA_mask);
+ }
+ else
+ {
+ /* For orthogonal case. */
+ SETbit(evergreen->PA_CL_VTE_CNTL.u32All, VTX_XY_FMT_bit);
+ SETbit(evergreen->PA_CL_VTE_CNTL.u32All, VTX_Z_FMT_bit);
+
+ SETbit(evergreen->PA_CL_VTE_CNTL.u32All, VTX_W0_FMT_bit);
+
+ CLEARbit(evergreen->SPI_PS_IN_CONTROL_0.u32All, PERSP_GRADIENT_ENA_bit);
+ SETbit(evergreen->SPI_PS_IN_CONTROL_0.u32All, LINEAR_GRADIENT_ENA_bit);
+
+ SETfield(evergreen->SPI_BARYC_CNTL.u32All, 1,
+ EG_SPI_BARYC_CNTL__LINEAR_CENTROID_ENA_shift,
+ EG_SPI_BARYC_CNTL__LINEAR_CENTROID_ENA_mask);
+ }
+
+ context->radeon.NewGLState |= new_state;
+}
+
+static void evergreenSetAlphaState(GLcontext * ctx) //same
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+ uint32_t alpha_func = REF_ALWAYS;
+ GLboolean really_enabled = ctx->Color.AlphaEnabled;
+
+ EVERGREEN_STATECHANGE(context, sx);
+
+ switch (ctx->Color.AlphaFunc) {
+ case GL_NEVER:
+ alpha_func = REF_NEVER;
+ break;
+ case GL_LESS:
+ alpha_func = REF_LESS;
+ break;
+ case GL_EQUAL:
+ alpha_func = REF_EQUAL;
+ break;
+ case GL_LEQUAL:
+ alpha_func = REF_LEQUAL;
+ break;
+ case GL_GREATER:
+ alpha_func = REF_GREATER;
+ break;
+ case GL_NOTEQUAL:
+ alpha_func = REF_NOTEQUAL;
+ break;
+ case GL_GEQUAL:
+ alpha_func = REF_GEQUAL;
+ break;
+ case GL_ALWAYS:
+ /*alpha_func = REF_ALWAYS; */
+ really_enabled = GL_FALSE;
+ break;
+ }
+
+ if (really_enabled) {
+ SETfield(evergreen->SX_ALPHA_TEST_CONTROL.u32All, alpha_func,
+ ALPHA_FUNC_shift, ALPHA_FUNC_mask);
+ SETbit(evergreen->SX_ALPHA_TEST_CONTROL.u32All, ALPHA_TEST_ENABLE_bit);
+ evergreen->SX_ALPHA_REF.f32All = ctx->Color.AlphaRef;
+ } else {
+ CLEARbit(evergreen->SX_ALPHA_TEST_CONTROL.u32All, ALPHA_TEST_ENABLE_bit);
+ }
+}
+
+static void evergreenAlphaFunc(GLcontext * ctx, GLenum func, GLfloat ref) //same
+{
+ (void)func;
+ (void)ref;
+ evergreenSetAlphaState(ctx);
+}
+
+static void evergreenBlendColor(GLcontext * ctx, const GLfloat cf[4]) //same
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+
+ EVERGREEN_STATECHANGE(context, cb);
+
+ evergreen->CB_BLEND_RED.f32All = cf[0];
+ evergreen->CB_BLEND_GREEN.f32All = cf[1];
+ evergreen->CB_BLEND_BLUE.f32All = cf[2];
+ evergreen->CB_BLEND_ALPHA.f32All = cf[3];
+}
+
+static int evergreenblend_factor(GLenum factor, GLboolean is_src) //same
+{
+ switch (factor) {
+ case GL_ZERO:
+ return BLEND_ZERO;
+ break;
+ case GL_ONE:
+ return BLEND_ONE;
+ break;
+ case GL_DST_COLOR:
+ return BLEND_DST_COLOR;
+ break;
+ case GL_ONE_MINUS_DST_COLOR:
+ return BLEND_ONE_MINUS_DST_COLOR;
+ break;
+ case GL_SRC_COLOR:
+ return BLEND_SRC_COLOR;
+ break;
+ case GL_ONE_MINUS_SRC_COLOR:
+ return BLEND_ONE_MINUS_SRC_COLOR;
+ break;
+ case GL_SRC_ALPHA:
+ return BLEND_SRC_ALPHA;
+ break;
+ case GL_ONE_MINUS_SRC_ALPHA:
+ return BLEND_ONE_MINUS_SRC_ALPHA;
+ break;
+ case GL_DST_ALPHA:
+ return BLEND_DST_ALPHA;
+ break;
+ case GL_ONE_MINUS_DST_ALPHA:
+ return BLEND_ONE_MINUS_DST_ALPHA;
+ break;
+ case GL_SRC_ALPHA_SATURATE:
+ return (is_src) ? BLEND_SRC_ALPHA_SATURATE : BLEND_ZERO;
+ break;
+ case GL_CONSTANT_COLOR:
+ return BLEND_CONSTANT_COLOR;
+ break;
+ case GL_ONE_MINUS_CONSTANT_COLOR:
+ return BLEND_ONE_MINUS_CONSTANT_COLOR;
+ break;
+ case GL_CONSTANT_ALPHA:
+ return BLEND_CONSTANT_ALPHA;
+ break;
+ case GL_ONE_MINUS_CONSTANT_ALPHA:
+ return BLEND_ONE_MINUS_CONSTANT_ALPHA;
+ break;
+ default:
+ fprintf(stderr, "unknown blend factor %x\n", factor);
+ return (is_src) ? BLEND_ONE : BLEND_ZERO;
+ break;
+ }
+}
+
+static void evergreenSetBlendState(GLcontext * ctx) //diff : CB_COLOR_CONTROL, CB_BLEND0_CONTROL bits
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+ uint32_t blend_reg = 0, eqn, eqnA;
+
+ EVERGREEN_STATECHANGE(context, cb);
+
+ if (RGBA_LOGICOP_ENABLED(ctx) || !ctx->Color.BlendEnabled) {
+ SETfield(blend_reg,
+ BLEND_ONE, COLOR_SRCBLEND_shift, COLOR_SRCBLEND_mask);
+ SETfield(blend_reg,
+ BLEND_ZERO, COLOR_DESTBLEND_shift, COLOR_DESTBLEND_mask);
+ SETfield(blend_reg,
+ COMB_DST_PLUS_SRC, COLOR_COMB_FCN_shift, COLOR_COMB_FCN_mask);
+ SETfield(blend_reg,
+ BLEND_ONE, ALPHA_SRCBLEND_shift, ALPHA_SRCBLEND_mask);
+ SETfield(blend_reg,
+ BLEND_ZERO, ALPHA_DESTBLEND_shift, ALPHA_DESTBLEND_mask);
+ SETfield(blend_reg,
+ COMB_DST_PLUS_SRC, ALPHA_COMB_FCN_shift, ALPHA_COMB_FCN_mask);
+ //if (context->radeon.radeonScreen->chip_family == CHIP_FAMILY_R600)
+ // evergreen->CB_BLEND_CONTROL.u32All = blend_reg;
+ //else
+ evergreen->CB_BLEND0_CONTROL.u32All = blend_reg;
+ return;
+ }
+
+ SETfield(blend_reg,
+ evergreenblend_factor(ctx->Color.BlendSrcRGB, GL_TRUE),
+ COLOR_SRCBLEND_shift, COLOR_SRCBLEND_mask);
+ SETfield(blend_reg,
+ evergreenblend_factor(ctx->Color.BlendDstRGB, GL_FALSE),
+ COLOR_DESTBLEND_shift, COLOR_DESTBLEND_mask);
+
+ switch (ctx->Color.BlendEquationRGB) {
+ case GL_FUNC_ADD:
+ eqn = COMB_DST_PLUS_SRC;
+ break;
+ case GL_FUNC_SUBTRACT:
+ eqn = COMB_SRC_MINUS_DST;
+ break;
+ case GL_FUNC_REVERSE_SUBTRACT:
+ eqn = COMB_DST_MINUS_SRC;
+ break;
+ case GL_MIN:
+ eqn = COMB_MIN_DST_SRC;
+ SETfield(blend_reg,
+ BLEND_ONE,
+ COLOR_SRCBLEND_shift, COLOR_SRCBLEND_mask);
+ SETfield(blend_reg,
+ BLEND_ONE,
+ COLOR_DESTBLEND_shift, COLOR_DESTBLEND_mask);
+ break;
+ case GL_MAX:
+ eqn = COMB_MAX_DST_SRC;
+ SETfield(blend_reg,
+ BLEND_ONE,
+ COLOR_SRCBLEND_shift, COLOR_SRCBLEND_mask);
+ SETfield(blend_reg,
+ BLEND_ONE,
+ COLOR_DESTBLEND_shift, COLOR_DESTBLEND_mask);
+ break;
+
+ default:
+ fprintf(stderr,
+ "[%s:%u] Invalid RGB blend equation (0x%04x).\n",
+ __FUNCTION__, __LINE__, ctx->Color.BlendEquationRGB);
+ return;
+ }
+ SETfield(blend_reg,
+ eqn, COLOR_COMB_FCN_shift, COLOR_COMB_FCN_mask);
+
+ SETfield(blend_reg,
+ evergreenblend_factor(ctx->Color.BlendSrcA, GL_TRUE),
+ ALPHA_SRCBLEND_shift, ALPHA_SRCBLEND_mask);
+ SETfield(blend_reg,
+ evergreenblend_factor(ctx->Color.BlendDstA, GL_FALSE),
+ ALPHA_DESTBLEND_shift, ALPHA_DESTBLEND_mask);
+
+ switch (ctx->Color.BlendEquationA) {
+ case GL_FUNC_ADD:
+ eqnA = COMB_DST_PLUS_SRC;
+ break;
+ case GL_FUNC_SUBTRACT:
+ eqnA = COMB_SRC_MINUS_DST;
+ break;
+ case GL_FUNC_REVERSE_SUBTRACT:
+ eqnA = COMB_DST_MINUS_SRC;
+ break;
+ case GL_MIN:
+ eqnA = COMB_MIN_DST_SRC;
+ SETfield(blend_reg,
+ BLEND_ONE,
+ ALPHA_SRCBLEND_shift, ALPHA_SRCBLEND_mask);
+ SETfield(blend_reg,
+ BLEND_ONE,
+ ALPHA_DESTBLEND_shift, ALPHA_DESTBLEND_mask);
+ break;
+ case GL_MAX:
+ eqnA = COMB_MAX_DST_SRC;
+ SETfield(blend_reg,
+ BLEND_ONE,
+ ALPHA_SRCBLEND_shift, ALPHA_SRCBLEND_mask);
+ SETfield(blend_reg,
+ BLEND_ONE,
+ ALPHA_DESTBLEND_shift, ALPHA_DESTBLEND_mask);
+ break;
+ default:
+ fprintf(stderr,
+ "[%s:%u] Invalid A blend equation (0x%04x).\n",
+ __FUNCTION__, __LINE__, ctx->Color.BlendEquationA);
+ return;
+ }
+
+ SETfield(blend_reg,
+ eqnA, ALPHA_COMB_FCN_shift, ALPHA_COMB_FCN_mask);
+
+ SETbit(blend_reg, SEPARATE_ALPHA_BLEND_bit);
+
+ SETbit(blend_reg, EG_CB_BLENDX_CONTROL_ENABLE_bit);
+
+ evergreen->CB_BLEND0_CONTROL.u32All = blend_reg;
+}
+
+static void evergreenBlendEquationSeparate(GLcontext * ctx,
+ GLenum modeRGB, GLenum modeA) //same
+{
+ evergreenSetBlendState(ctx);
+}
+
+static void evergreenBlendFuncSeparate(GLcontext * ctx,
+ GLenum sfactorRGB, GLenum dfactorRGB,
+ GLenum sfactorA, GLenum dfactorA) //same
+{
+ evergreenSetBlendState(ctx);
+}
+
+static GLuint evergreen_translate_logicop(GLenum logicop) //same
+{
+ switch (logicop) {
+ case GL_CLEAR:
+ return 0x00;
+ case GL_SET:
+ return 0xff;
+ case GL_COPY:
+ return 0xcc;
+ case GL_COPY_INVERTED:
+ return 0x33;
+ case GL_NOOP:
+ return 0xaa;
+ case GL_INVERT:
+ return 0x55;
+ case GL_AND:
+ return 0x88;
+ case GL_NAND:
+ return 0x77;
+ case GL_OR:
+ return 0xee;
+ case GL_NOR:
+ return 0x11;
+ case GL_XOR:
+ return 0x66;
+ case GL_EQUIV:
+ return 0x99;
+ case GL_AND_REVERSE:
+ return 0x44;
+ case GL_AND_INVERTED:
+ return 0x22;
+ case GL_OR_REVERSE:
+ return 0xdd;
+ case GL_OR_INVERTED:
+ return 0xbb;
+ default:
+ fprintf(stderr, "unknown blend logic operation %x\n", logicop);
+ return 0xcc;
+ }
+}
+
+static void evergreenSetLogicOpState(GLcontext *ctx) //diff : CB_COLOR_CONTROL.ROP3 is actually same bits.
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+
+ EVERGREEN_STATECHANGE(context, cb);
+
+ if (RGBA_LOGICOP_ENABLED(ctx))
+ SETfield(evergreen->CB_COLOR_CONTROL.u32All,
+ evergreen_translate_logicop(ctx->Color.LogicOp),
+ EG_CB_COLOR_CONTROL__ROP3_shift,
+ EG_CB_COLOR_CONTROL__ROP3_mask);
+ else
+ SETfield(evergreen->CB_COLOR_CONTROL.u32All, 0xCC,
+ EG_CB_COLOR_CONTROL__ROP3_shift,
+ EG_CB_COLOR_CONTROL__ROP3_mask);
+}
+
+static void evergreenClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq ) //same , but PA_CL_UCP_0_ offset diff
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+ GLint p;
+ GLint *ip;
+
+ p = (GLint) plane - (GLint) GL_CLIP_PLANE0;
+ ip = (GLint *)ctx->Transform._ClipUserPlane[p];
+
+ EVERGREEN_STATECHANGE(context, pa);
+
+ evergreen->ucp[p].PA_CL_UCP_0_X.u32All = ip[0];
+ evergreen->ucp[p].PA_CL_UCP_0_Y.u32All = ip[1];
+ evergreen->ucp[p].PA_CL_UCP_0_Z.u32All = ip[2];
+ evergreen->ucp[p].PA_CL_UCP_0_W.u32All = ip[3];
+}
+
+static void evergreenSetClipPlaneState(GLcontext * ctx, GLenum cap, GLboolean state) //diff in func calls
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+ GLuint p;
+
+ p = cap - GL_CLIP_PLANE0;
+
+ EVERGREEN_STATECHANGE(context, pa);
+
+ if (state) {
+ evergreen->PA_CL_CLIP_CNTL.u32All |= (UCP_ENA_0_bit << p);
+ evergreen->ucp[p].enabled = GL_TRUE;
+ evergreenClipPlane(ctx, cap, NULL);
+ } else {
+ evergreen->PA_CL_CLIP_CNTL.u32All &= ~(UCP_ENA_0_bit << p);
+ evergreen->ucp[p].enabled = GL_FALSE;
+ }
+}
+
+static void evergreenSetDBRenderState(GLcontext * ctx)
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+ struct evergreen_fragment_program *fp =
+ (struct evergreen_fragment_program *)(ctx->FragmentProgram._Current);
+
+ EVERGREEN_STATECHANGE(context, db);
+
+ SETbit(evergreen->DB_SHADER_CONTROL.u32All,
+ DUAL_EXPORT_ENABLE_bit);
+ SETfield(evergreen->DB_SHADER_CONTROL.u32All, EARLY_Z_THEN_LATE_Z,
+ Z_ORDER_shift,
+ Z_ORDER_mask);
+ /* XXX need to enable htile for hiz/s */
+ SETfield(evergreen->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE,
+ FORCE_HIZ_ENABLE_shift,
+ FORCE_HIZ_ENABLE_mask);
+ SETfield(evergreen->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE,
+ FORCE_HIS_ENABLE0_shift,
+ FORCE_HIS_ENABLE0_mask);
+ SETfield(evergreen->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE,
+ FORCE_HIS_ENABLE1_shift,
+ FORCE_HIS_ENABLE1_mask);
+
+ if (context->radeon.query.current)
+ {
+ SETbit(evergreen->DB_RENDER_OVERRIDE.u32All, NOOP_CULL_DISABLE_bit);
+ SETbit(evergreen->DB_COUNT_CONTROL.u32All,
+ EG_DB_COUNT_CONTROL__PERFECT_ZPASS_COUNTS_bit);
+ }
+ else
+ {
+ CLEARbit(evergreen->DB_RENDER_OVERRIDE.u32All, NOOP_CULL_DISABLE_bit);
+ CLEARbit(evergreen->DB_COUNT_CONTROL.u32All,
+ EG_DB_COUNT_CONTROL__PERFECT_ZPASS_COUNTS_bit);
+ }
+
+ if (fp)
+ {
+ if (fp->r700Shader.killIsUsed)
+ {
+ SETbit(evergreen->DB_SHADER_CONTROL.u32All, KILL_ENABLE_bit);
+ }
+ else
+ {
+ CLEARbit(evergreen->DB_SHADER_CONTROL.u32All, KILL_ENABLE_bit);
+ }
+
+ if (fp->r700Shader.depthIsExported)
+ {
+ SETbit(evergreen->DB_SHADER_CONTROL.u32All, Z_EXPORT_ENABLE_bit);
+ }
+ else
+ {
+ CLEARbit(evergreen->DB_SHADER_CONTROL.u32All, Z_EXPORT_ENABLE_bit);
+ }
+ }
+}
+
+void evergreenUpdateShaderStates(GLcontext * ctx)
+{
+ evergreenSetDBRenderState(ctx);
+ evergreenUpdateTextureState(ctx);
+}
+
+static void evergreenSetDepthState(GLcontext * ctx) //same
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+
+ EVERGREEN_STATECHANGE(context, db);
+
+ if (ctx->Depth.Test)
+ {
+ SETbit(evergreen->DB_DEPTH_CONTROL.u32All, Z_ENABLE_bit);
+ if (ctx->Depth.Mask)
+ {
+ SETbit(evergreen->DB_DEPTH_CONTROL.u32All, Z_WRITE_ENABLE_bit);
+ }
+ else
+ {
+ CLEARbit(evergreen->DB_DEPTH_CONTROL.u32All, Z_WRITE_ENABLE_bit);
+ }
+
+ switch (ctx->Depth.Func)
+ {
+ case GL_NEVER:
+ SETfield(evergreen->DB_DEPTH_CONTROL.u32All, FRAG_NEVER,
+ ZFUNC_shift, ZFUNC_mask);
+ break;
+ case GL_LESS:
+ SETfield(evergreen->DB_DEPTH_CONTROL.u32All, FRAG_LESS,
+ ZFUNC_shift, ZFUNC_mask);
+ break;
+ case GL_EQUAL:
+ SETfield(evergreen->DB_DEPTH_CONTROL.u32All, FRAG_EQUAL,
+ ZFUNC_shift, ZFUNC_mask);
+ break;
+ case GL_LEQUAL:
+ SETfield(evergreen->DB_DEPTH_CONTROL.u32All, FRAG_LEQUAL,
+ ZFUNC_shift, ZFUNC_mask);
+ break;
+ case GL_GREATER:
+ SETfield(evergreen->DB_DEPTH_CONTROL.u32All, FRAG_GREATER,
+ ZFUNC_shift, ZFUNC_mask);
+ break;
+ case GL_NOTEQUAL:
+ SETfield(evergreen->DB_DEPTH_CONTROL.u32All, FRAG_NOTEQUAL,
+ ZFUNC_shift, ZFUNC_mask);
+ break;
+ case GL_GEQUAL:
+ SETfield(evergreen->DB_DEPTH_CONTROL.u32All, FRAG_GEQUAL,
+ ZFUNC_shift, ZFUNC_mask);
+ break;
+ case GL_ALWAYS:
+ SETfield(evergreen->DB_DEPTH_CONTROL.u32All, FRAG_ALWAYS,
+ ZFUNC_shift, ZFUNC_mask);
+ break;
+ default:
+ SETfield(evergreen->DB_DEPTH_CONTROL.u32All, FRAG_ALWAYS,
+ ZFUNC_shift, ZFUNC_mask);
+ break;
+ }
+ }
+ else
+ {
+ CLEARbit(evergreen->DB_DEPTH_CONTROL.u32All, Z_ENABLE_bit);
+ CLEARbit(evergreen->DB_DEPTH_CONTROL.u32All, Z_WRITE_ENABLE_bit);
+ }
+}
+
+static void evergreenSetStencilState(GLcontext * ctx, GLboolean state) //same
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+ GLboolean hw_stencil = GL_FALSE;
+
+ if (ctx->DrawBuffer) {
+ struct radeon_renderbuffer *rrbStencil
+ = radeon_get_renderbuffer(ctx->DrawBuffer, BUFFER_STENCIL);
+ hw_stencil = (rrbStencil && rrbStencil->bo);
+ }
+
+ if (hw_stencil) {
+ EVERGREEN_STATECHANGE(context, db);
+ if (state) {
+ SETbit(evergreen->DB_DEPTH_CONTROL.u32All, STENCIL_ENABLE_bit);
+ SETbit(evergreen->DB_DEPTH_CONTROL.u32All, BACKFACE_ENABLE_bit);
+ } else
+ CLEARbit(evergreen->DB_DEPTH_CONTROL.u32All, STENCIL_ENABLE_bit);
+ }
+}
+
+static void evergreenUpdateCulling(GLcontext * ctx) //same
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+
+ EVERGREEN_STATECHANGE(context, pa);
+
+ CLEARbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, FACE_bit);
+ CLEARbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, CULL_FRONT_bit);
+ CLEARbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, CULL_BACK_bit);
+
+ if (ctx->Polygon.CullFlag)
+ {
+ switch (ctx->Polygon.CullFaceMode)
+ {
+ case GL_FRONT:
+ SETbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, CULL_FRONT_bit);
+ CLEARbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, CULL_BACK_bit);
+ break;
+ case GL_BACK:
+ CLEARbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, CULL_FRONT_bit);
+ SETbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, CULL_BACK_bit);
+ break;
+ case GL_FRONT_AND_BACK:
+ SETbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, CULL_FRONT_bit);
+ SETbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, CULL_BACK_bit);
+ break;
+ default:
+ CLEARbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, CULL_FRONT_bit);
+ CLEARbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, CULL_BACK_bit);
+ break;
+ }
+ }
+
+ switch (ctx->Polygon.FrontFace)
+ {
+ case GL_CW:
+ SETbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, FACE_bit);
+ break;
+ case GL_CCW:
+ CLEARbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, FACE_bit);
+ break;
+ default:
+ CLEARbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, FACE_bit); /* default: ccw */
+ break;
+ }
+
+ /* Winding is inverted when rendering to FBO */
+ if (ctx->DrawBuffer && ctx->DrawBuffer->Name)
+ evergreen->PA_SU_SC_MODE_CNTL.u32All ^= FACE_bit;
+}
+
+static void evergreenSetPolygonOffsetState(GLcontext * ctx, GLboolean state) //same
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+
+ EVERGREEN_STATECHANGE(context, pa);
+
+ if (state) {
+ SETbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, POLY_OFFSET_FRONT_ENABLE_bit);
+ SETbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, POLY_OFFSET_BACK_ENABLE_bit);
+ SETbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, POLY_OFFSET_PARA_ENABLE_bit);
+ } else {
+ CLEARbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, POLY_OFFSET_FRONT_ENABLE_bit);
+ CLEARbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, POLY_OFFSET_BACK_ENABLE_bit);
+ CLEARbit(evergreen->PA_SU_SC_MODE_CNTL.u32All, POLY_OFFSET_PARA_ENABLE_bit);
+ }
+}
+
+static void evergreenUpdateLineStipple(GLcontext * ctx) //diff
+{
+ /* TODO */
+}
+
+void evergreenSetScissor(context_t *context) //diff
+{
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+ unsigned x1, y1, x2, y2;
+ int id = 0;
+ struct radeon_renderbuffer *rrb;
+
+ rrb = radeon_get_colorbuffer(&context->radeon);
+ if (!rrb || !rrb->bo) {
+ return;
+ }
+ if (context->radeon.state.scissor.enabled) {
+ x1 = context->radeon.state.scissor.rect.x1;
+ y1 = context->radeon.state.scissor.rect.y1;
+ x2 = context->radeon.state.scissor.rect.x2;
+ y2 = context->radeon.state.scissor.rect.y2;
+ /* r600 has exclusive BR scissors */
+ if (context->radeon.radeonScreen->kernel_mm) {
+ x2++;
+ y2++;
+ }
+ } else {
+ if (context->radeon.radeonScreen->driScreen->dri2.enabled) {
+ x1 = 0;
+ y1 = 0;
+ x2 = rrb->base.Width;
+ y2 = rrb->base.Height;
+ } else {
+ x1 = rrb->dPriv->x;
+ y1 = rrb->dPriv->y;
+ x2 = rrb->dPriv->x + rrb->dPriv->w;
+ y2 = rrb->dPriv->y + rrb->dPriv->h;
+ }
+ }
+
+ EVERGREEN_STATECHANGE(context, pa);
+
+ /* screen */
+ /* TODO : check WINDOW_OFFSET_DISABLE */
+ //SETbit(evergreen->PA_SC_SCREEN_SCISSOR_TL.u32All, WINDOW_OFFSET_DISABLE_bit);
+ SETfield(evergreen->PA_SC_SCREEN_SCISSOR_TL.u32All, x1,
+ PA_SC_SCREEN_SCISSOR_TL__TL_X_shift, EG_PA_SC_SCREEN_SCISSOR_TL__TL_X_mask);
+ SETfield(evergreen->PA_SC_SCREEN_SCISSOR_TL.u32All, y1,
+ PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift, EG_PA_SC_SCREEN_SCISSOR_TL__TL_Y_mask);
+
+ SETfield(evergreen->PA_SC_SCREEN_SCISSOR_BR.u32All, x2,
+ PA_SC_SCREEN_SCISSOR_BR__BR_X_shift, EG_PA_SC_SCREEN_SCISSOR_BR__BR_X_mask);
+ SETfield(evergreen->PA_SC_SCREEN_SCISSOR_BR.u32All, y2,
+ PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift, EG_PA_SC_SCREEN_SCISSOR_BR__BR_Y_mask);
+
+ /* window */
+ SETbit(evergreen->PA_SC_WINDOW_SCISSOR_TL.u32All, WINDOW_OFFSET_DISABLE_bit);
+ SETfield(evergreen->PA_SC_WINDOW_SCISSOR_TL.u32All, x1,
+ PA_SC_WINDOW_SCISSOR_TL__TL_X_shift, EG_PA_SC_WINDOW_SCISSOR_TL__TL_X_mask);
+ SETfield(evergreen->PA_SC_WINDOW_SCISSOR_TL.u32All, y1,
+ PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift, EG_PA_SC_WINDOW_SCISSOR_TL__TL_Y_mask);
+
+ SETfield(evergreen->PA_SC_WINDOW_SCISSOR_BR.u32All, x2,
+ PA_SC_WINDOW_SCISSOR_BR__BR_X_shift, EG_PA_SC_WINDOW_SCISSOR_BR__BR_X_mask);
+ SETfield(evergreen->PA_SC_WINDOW_SCISSOR_BR.u32All, y2,
+ PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift, EG_PA_SC_WINDOW_SCISSOR_BR__BR_Y_mask);
+
+
+ SETfield(evergreen->PA_SC_CLIPRECT_0_TL.u32All, x1,
+ PA_SC_CLIPRECT_0_TL__TL_X_shift, EG_PA_SC_CLIPRECT_0_TL__TL_X_mask);
+ SETfield(evergreen->PA_SC_CLIPRECT_0_TL.u32All, y1,
+ PA_SC_CLIPRECT_0_TL__TL_Y_shift, EG_PA_SC_CLIPRECT_0_TL__TL_Y_mask);
+ SETfield(evergreen->PA_SC_CLIPRECT_0_BR.u32All, x2,
+ PA_SC_CLIPRECT_0_BR__BR_X_shift, EG_PA_SC_CLIPRECT_0_BR__BR_X_mask);
+ SETfield(evergreen->PA_SC_CLIPRECT_0_BR.u32All, y2,
+ PA_SC_CLIPRECT_0_BR__BR_Y_shift, EG_PA_SC_CLIPRECT_0_BR__BR_Y_mask);
+
+ evergreen->PA_SC_CLIPRECT_1_TL.u32All = evergreen->PA_SC_CLIPRECT_0_TL.u32All;
+ evergreen->PA_SC_CLIPRECT_1_BR.u32All = evergreen->PA_SC_CLIPRECT_0_BR.u32All;
+ evergreen->PA_SC_CLIPRECT_2_TL.u32All = evergreen->PA_SC_CLIPRECT_0_TL.u32All;
+ evergreen->PA_SC_CLIPRECT_2_BR.u32All = evergreen->PA_SC_CLIPRECT_0_BR.u32All;
+ evergreen->PA_SC_CLIPRECT_3_TL.u32All = evergreen->PA_SC_CLIPRECT_0_TL.u32All;
+ evergreen->PA_SC_CLIPRECT_3_BR.u32All = evergreen->PA_SC_CLIPRECT_0_BR.u32All;
+
+ /* more....2d clip */
+ SETbit(evergreen->PA_SC_GENERIC_SCISSOR_TL.u32All, WINDOW_OFFSET_DISABLE_bit);
+ SETfield(evergreen->PA_SC_GENERIC_SCISSOR_TL.u32All, x1,
+ PA_SC_GENERIC_SCISSOR_TL__TL_X_shift, EG_PA_SC_GENERIC_SCISSOR_TL__TL_X_mask);
+ SETfield(evergreen->PA_SC_GENERIC_SCISSOR_TL.u32All, y1,
+ PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift, EG_PA_SC_GENERIC_SCISSOR_TL__TL_Y_mask);
+ SETfield(evergreen->PA_SC_GENERIC_SCISSOR_BR.u32All, x2,
+ PA_SC_GENERIC_SCISSOR_BR__BR_X_shift, EG_PA_SC_GENERIC_SCISSOR_BR__BR_X_mask);
+ SETfield(evergreen->PA_SC_GENERIC_SCISSOR_BR.u32All, y2,
+ PA_SC_GENERIC_SCISSOR_BR__BR_Y_shift, EG_PA_SC_GENERIC_SCISSOR_BR__BR_Y_mask);
+
+ SETbit(evergreen->viewport[id].PA_SC_VPORT_SCISSOR_0_TL.u32All, WINDOW_OFFSET_DISABLE_bit);
+ SETfield(evergreen->viewport[id].PA_SC_VPORT_SCISSOR_0_TL.u32All, x1,
+ PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift, EG_PA_SC_VPORT_SCISSOR_0_TL__TL_X_mask);
+ SETfield(evergreen->viewport[id].PA_SC_VPORT_SCISSOR_0_TL.u32All, y1,
+ PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift, EG_PA_SC_VPORT_SCISSOR_0_TL__TL_Y_mask);
+ SETfield(evergreen->viewport[id].PA_SC_VPORT_SCISSOR_0_BR.u32All, x2,
+ PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift, EG_PA_SC_VPORT_SCISSOR_0_BR__BR_X_mask);
+ SETfield(evergreen->viewport[id].PA_SC_VPORT_SCISSOR_0_BR.u32All, y2,
+ PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift, EG_PA_SC_VPORT_SCISSOR_0_BR__BR_Y_mask);
+
+ id = 1;
+ SETbit(evergreen->viewport[id].PA_SC_VPORT_SCISSOR_0_TL.u32All, WINDOW_OFFSET_DISABLE_bit);
+ SETfield(evergreen->viewport[id].PA_SC_VPORT_SCISSOR_0_TL.u32All, x1,
+ PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift, EG_PA_SC_VPORT_SCISSOR_0_TL__TL_X_mask);
+ SETfield(evergreen->viewport[id].PA_SC_VPORT_SCISSOR_0_TL.u32All, y1,
+ PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift, EG_PA_SC_VPORT_SCISSOR_0_TL__TL_Y_mask);
+ SETfield(evergreen->viewport[id].PA_SC_VPORT_SCISSOR_0_BR.u32All, x2,
+ PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift, EG_PA_SC_VPORT_SCISSOR_0_BR__BR_X_mask);
+ SETfield(evergreen->viewport[id].PA_SC_VPORT_SCISSOR_0_BR.u32All, y2,
+ PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift, EG_PA_SC_VPORT_SCISSOR_0_BR__BR_Y_mask);
+
+ evergreen->viewport[id].enabled = GL_TRUE;
+}
+
+static void evergreenUpdateWindow(GLcontext * ctx, int id) //diff in calling evergreenSetScissor
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+ __DRIdrawable *dPriv = radeon_get_drawable(&context->radeon);
+ GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0;
+ GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0;
+ const GLfloat *v = ctx->Viewport._WindowMap.m;
+ const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
+ const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0);
+ GLfloat y_scale, y_bias;
+
+ if (render_to_fbo) {
+ y_scale = 1.0;
+ y_bias = 0;
+ } else {
+ y_scale = -1.0;
+ y_bias = yoffset;
+ }
+
+ GLfloat sx = v[MAT_SX];
+ GLfloat tx = v[MAT_TX] + xoffset;
+ GLfloat sy = v[MAT_SY] * y_scale;
+ GLfloat ty = (v[MAT_TY] * y_scale) + y_bias;
+ GLfloat sz = v[MAT_SZ] * depthScale;
+ GLfloat tz = v[MAT_TZ] * depthScale;
+
+ EVERGREEN_STATECHANGE(context, pa);
+
+
+ evergreen->viewport[id].PA_CL_VPORT_XSCALE.f32All = sx;
+ evergreen->viewport[id].PA_CL_VPORT_XOFFSET.f32All = tx;
+
+ evergreen->viewport[id].PA_CL_VPORT_YSCALE.f32All = sy;
+ evergreen->viewport[id].PA_CL_VPORT_YOFFSET.f32All = ty;
+
+ evergreen->viewport[id].PA_CL_VPORT_ZSCALE.f32All = sz;
+ evergreen->viewport[id].PA_CL_VPORT_ZOFFSET.f32All = tz;
+
+ if (ctx->Transform.DepthClamp) {
+ evergreen->viewport[id].PA_SC_VPORT_ZMIN_0.f32All = MIN2(ctx->Viewport.Near, ctx->Viewport.Far);
+ evergreen->viewport[id].PA_SC_VPORT_ZMAX_0.f32All = MAX2(ctx->Viewport.Near, ctx->Viewport.Far);
+ SETbit(evergreen->PA_CL_CLIP_CNTL.u32All, ZCLIP_NEAR_DISABLE_bit);
+ SETbit(evergreen->PA_CL_CLIP_CNTL.u32All, ZCLIP_FAR_DISABLE_bit);
+ } else {
+ evergreen->viewport[id].PA_SC_VPORT_ZMIN_0.f32All = 0.0;
+ evergreen->viewport[id].PA_SC_VPORT_ZMAX_0.f32All = 1.0;
+ CLEARbit(evergreen->PA_CL_CLIP_CNTL.u32All, ZCLIP_NEAR_DISABLE_bit);
+ CLEARbit(evergreen->PA_CL_CLIP_CNTL.u32All, ZCLIP_FAR_DISABLE_bit);
+ }
+
+ evergreen->viewport[id].enabled = GL_TRUE;
+
+ evergreenSetScissor(context);
+}
+
+static void evergreenEnable(GLcontext * ctx, GLenum cap, GLboolean state) //diff in func calls
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+
+ switch (cap) {
+ case GL_TEXTURE_1D:
+ case GL_TEXTURE_2D:
+ case GL_TEXTURE_3D:
+ /* empty */
+ break;
+ case GL_FOG:
+ /* empty */
+ break;
+ case GL_ALPHA_TEST:
+ evergreenSetAlphaState(ctx);
+ break;
+ case GL_COLOR_LOGIC_OP:
+ evergreenSetLogicOpState(ctx);
+ /* fall-through, because logic op overrides blending */
+ case GL_BLEND:
+ evergreenSetBlendState(ctx);
+ break;
+ case GL_CLIP_PLANE0:
+ case GL_CLIP_PLANE1:
+ case GL_CLIP_PLANE2:
+ case GL_CLIP_PLANE3:
+ case GL_CLIP_PLANE4:
+ case GL_CLIP_PLANE5:
+ evergreenSetClipPlaneState(ctx, cap, state);
+ break;
+ case GL_DEPTH_TEST:
+ evergreenSetDepthState(ctx);
+ break;
+ case GL_STENCIL_TEST:
+ evergreenSetStencilState(ctx, state);
+ break;
+ case GL_CULL_FACE:
+ evergreenUpdateCulling(ctx);
+ break;
+ case GL_POLYGON_OFFSET_POINT:
+ case GL_POLYGON_OFFSET_LINE:
+ case GL_POLYGON_OFFSET_FILL:
+ evergreenSetPolygonOffsetState(ctx, state);
+ break;
+ case GL_SCISSOR_TEST:
+ radeon_firevertices(&context->radeon);
+ context->radeon.state.scissor.enabled = state;
+ radeonUpdateScissor(ctx);
+ break;
+ case GL_LINE_STIPPLE:
+ evergreenUpdateLineStipple(ctx);
+ break;
+ case GL_DEPTH_CLAMP:
+ evergreenUpdateWindow(ctx, 0);
+ break;
+ default:
+ break;
+ }
+
+}
+
+static void evergreenColorMask(GLcontext * ctx,
+ GLboolean r, GLboolean g, GLboolean b, GLboolean a) //same
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+ unsigned int mask = ((r ? 1 : 0) |
+ (g ? 2 : 0) |
+ (b ? 4 : 0) |
+ (a ? 8 : 0));
+
+ if (mask != evergreen->CB_TARGET_MASK.u32All) {
+ EVERGREEN_STATECHANGE(context, cb);
+ SETfield(evergreen->CB_TARGET_MASK.u32All, mask, TARGET0_ENABLE_shift, TARGET0_ENABLE_mask);
+ }
+}
+
+static void evergreenDepthFunc(GLcontext * ctx, GLenum func) //same
+{
+ evergreenSetDepthState(ctx);
+}
+
+static void evergreenDepthMask(GLcontext * ctx, GLboolean mask) //same
+{
+ evergreenSetDepthState(ctx);
+}
+
+static void evergreenCullFace(GLcontext * ctx, GLenum mode) //same
+{
+ evergreenUpdateCulling(ctx);
+}
+
+static void evergreenFogfv(GLcontext * ctx, GLenum pname, const GLfloat * param) //same
+{
+}
+
+static void evergreenUpdatePolygonMode(GLcontext * ctx) //same
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+
+ EVERGREEN_STATECHANGE(context, pa);
+
+ SETfield(evergreen->PA_SU_SC_MODE_CNTL.u32All, X_DISABLE_POLY_MODE, POLY_MODE_shift, POLY_MODE_mask);
+
+ /* Only do something if a polygon mode is wanted, default is GL_FILL */
+ if (ctx->Polygon.FrontMode != GL_FILL ||
+ ctx->Polygon.BackMode != GL_FILL) {
+ GLenum f, b;
+
+ /* Handle GL_CW (clock wise and GL_CCW (counter clock wise)
+ * correctly by selecting the correct front and back face
+ */
+ f = ctx->Polygon.FrontMode;
+ b = ctx->Polygon.BackMode;
+
+ /* Enable polygon mode */
+ SETfield(evergreen->PA_SU_SC_MODE_CNTL.u32All, X_DUAL_MODE, POLY_MODE_shift, POLY_MODE_mask);
+
+ switch (f) {
+ case GL_LINE:
+ SETfield(evergreen->PA_SU_SC_MODE_CNTL.u32All, X_DRAW_LINES,
+ POLYMODE_FRONT_PTYPE_shift, POLYMODE_FRONT_PTYPE_mask);
+ break;
+ case GL_POINT:
+ SETfield(evergreen->PA_SU_SC_MODE_CNTL.u32All, X_DRAW_POINTS,
+ POLYMODE_FRONT_PTYPE_shift, POLYMODE_FRONT_PTYPE_mask);
+ break;
+ case GL_FILL:
+ SETfield(evergreen->PA_SU_SC_MODE_CNTL.u32All, X_DRAW_TRIANGLES,
+ POLYMODE_FRONT_PTYPE_shift, POLYMODE_FRONT_PTYPE_mask);
+ break;
+ }
+
+ switch (b) {
+ case GL_LINE:
+ SETfield(evergreen->PA_SU_SC_MODE_CNTL.u32All, X_DRAW_LINES,
+ POLYMODE_BACK_PTYPE_shift, POLYMODE_BACK_PTYPE_mask);
+ break;
+ case GL_POINT:
+ SETfield(evergreen->PA_SU_SC_MODE_CNTL.u32All, X_DRAW_POINTS,
+ POLYMODE_BACK_PTYPE_shift, POLYMODE_BACK_PTYPE_mask);
+ break;
+ case GL_FILL:
+ SETfield(evergreen->PA_SU_SC_MODE_CNTL.u32All, X_DRAW_TRIANGLES,
+ POLYMODE_BACK_PTYPE_shift, POLYMODE_BACK_PTYPE_mask);
+ break;
+ }
+ }
+}
+
+static void evergreenFrontFace(GLcontext * ctx, GLenum mode) //same
+{
+ evergreenUpdateCulling(ctx);
+ evergreenUpdatePolygonMode(ctx);
+}
+
+static void evergreenShadeModel(GLcontext * ctx, GLenum mode) //same
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+
+ EVERGREEN_STATECHANGE(context, spi);
+
+ /* also need to set/clear FLAT_SHADE bit per param in SPI_PS_INPUT_CNTL_[0-31] */
+ switch (mode) {
+ case GL_FLAT:
+ SETbit(evergreen->SPI_INTERP_CONTROL_0.u32All, FLAT_SHADE_ENA_bit);
+ break;
+ case GL_SMOOTH:
+ CLEARbit(evergreen->SPI_INTERP_CONTROL_0.u32All, FLAT_SHADE_ENA_bit);
+ break;
+ default:
+ return;
+ }
+}
+
+static void evergreenLogicOpcode(GLcontext *ctx, GLenum logicop) //diff
+{
+ if (RGBA_LOGICOP_ENABLED(ctx))
+ evergreenSetLogicOpState(ctx);
+}
+
+static void evergreenPointSize(GLcontext * ctx, GLfloat size) //same
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+
+ EVERGREEN_STATECHANGE(context, pa);
+
+ /* We need to clamp to user defined range here, because
+ * the HW clamping happens only for per vertex point size. */
+ size = CLAMP(size, ctx->Point.MinSize, ctx->Point.MaxSize);
+
+ /* same size limits for AA, non-AA points */
+ size = CLAMP(size, ctx->Const.MinPointSize, ctx->Const.MaxPointSize);
+
+ /* format is 12.4 fixed point */
+ SETfield(evergreen->PA_SU_POINT_SIZE.u32All, (int)(size * 8.0),
+ PA_SU_POINT_SIZE__HEIGHT_shift, PA_SU_POINT_SIZE__HEIGHT_mask);
+ SETfield(evergreen->PA_SU_POINT_SIZE.u32All, (int)(size * 8.0),
+ PA_SU_POINT_SIZE__WIDTH_shift, PA_SU_POINT_SIZE__WIDTH_mask);
+
+}
+
+static void evergreenPointParameter(GLcontext * ctx, GLenum pname, const GLfloat * param) //same
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+
+ EVERGREEN_STATECHANGE(context, pa);
+
+ /* format is 12.4 fixed point */
+ switch (pname) {
+ case GL_POINT_SIZE_MIN:
+ SETfield(evergreen->PA_SU_POINT_MINMAX.u32All, (int)(ctx->Point.MinSize * 8.0),
+ MIN_SIZE_shift, MIN_SIZE_mask);
+ evergreenPointSize(ctx, ctx->Point.Size);
+ break;
+ case GL_POINT_SIZE_MAX:
+ SETfield(evergreen->PA_SU_POINT_MINMAX.u32All, (int)(ctx->Point.MaxSize * 8.0),
+ MAX_SIZE_shift, MAX_SIZE_mask);
+ evergreenPointSize(ctx, ctx->Point.Size);
+ break;
+ case GL_POINT_DISTANCE_ATTENUATION:
+ break;
+ case GL_POINT_FADE_THRESHOLD_SIZE:
+ break;
+ default:
+ break;
+ }
+}
+
+static int evergreen_translate_stencil_func(int func) //same
+{
+ switch (func) {
+ case GL_NEVER:
+ return REF_NEVER;
+ case GL_LESS:
+ return REF_LESS;
+ case GL_EQUAL:
+ return REF_EQUAL;
+ case GL_LEQUAL:
+ return REF_LEQUAL;
+ case GL_GREATER:
+ return REF_GREATER;
+ case GL_NOTEQUAL:
+ return REF_NOTEQUAL;
+ case GL_GEQUAL:
+ return REF_GEQUAL;
+ case GL_ALWAYS:
+ return REF_ALWAYS;
+ }
+ return 0;
+}
+
+static void evergreenStencilFuncSeparate(GLcontext * ctx, GLenum face,
+ GLenum func, GLint ref, GLuint mask) //same
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+ const unsigned back = ctx->Stencil._BackFace;
+
+
+ EVERGREEN_STATECHANGE(context, db);
+
+ //front
+ SETfield(evergreen->DB_STENCILREFMASK.u32All, ctx->Stencil.Ref[0],
+ STENCILREF_shift, STENCILREF_mask);
+ SETfield(evergreen->DB_STENCILREFMASK.u32All, ctx->Stencil.ValueMask[0],
+ STENCILMASK_shift, STENCILMASK_mask);
+
+ SETfield(evergreen->DB_DEPTH_CONTROL.u32All, evergreen_translate_stencil_func(ctx->Stencil.Function[0]),
+ STENCILFUNC_shift, STENCILFUNC_mask);
+
+ //back
+ SETfield(evergreen->DB_STENCILREFMASK_BF.u32All, ctx->Stencil.Ref[back],
+ STENCILREF_BF_shift, STENCILREF_BF_mask);
+ SETfield(evergreen->DB_STENCILREFMASK_BF.u32All, ctx->Stencil.ValueMask[back],
+ STENCILMASK_BF_shift, STENCILMASK_BF_mask);
+
+ SETfield(evergreen->DB_DEPTH_CONTROL.u32All, evergreen_translate_stencil_func(ctx->Stencil.Function[back]),
+ STENCILFUNC_BF_shift, STENCILFUNC_BF_mask);
+}
+
+static void evergreenStencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask) //same
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+ const unsigned back = ctx->Stencil._BackFace;
+
+ EVERGREEN_STATECHANGE(context, db);
+
+ // front
+ SETfield(evergreen->DB_STENCILREFMASK.u32All, ctx->Stencil.WriteMask[0],
+ STENCILWRITEMASK_shift, STENCILWRITEMASK_mask);
+
+ // back
+ SETfield(evergreen->DB_STENCILREFMASK_BF.u32All, ctx->Stencil.WriteMask[back],
+ STENCILWRITEMASK_BF_shift, STENCILWRITEMASK_BF_mask);
+
+}
+
+static int evergreen_translate_stencil_op(int op) //same
+{
+ switch (op) {
+ case GL_KEEP:
+ return STENCIL_KEEP;
+ case GL_ZERO:
+ return STENCIL_ZERO;
+ case GL_REPLACE:
+ return STENCIL_REPLACE;
+ case GL_INCR:
+ return STENCIL_INCR_CLAMP;
+ case GL_DECR:
+ return STENCIL_DECR_CLAMP;
+ case GL_INCR_WRAP_EXT:
+ return STENCIL_INCR_WRAP;
+ case GL_DECR_WRAP_EXT:
+ return STENCIL_DECR_WRAP;
+ case GL_INVERT:
+ return STENCIL_INVERT;
+ default:
+ WARN_ONCE("Do not know how to translate stencil op");
+ return STENCIL_KEEP;
+ }
+ return 0;
+}
+
+static void evergreenStencilOpSeparate(GLcontext * ctx, GLenum face,
+ GLenum fail, GLenum zfail, GLenum zpass) //same
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+ const unsigned back = ctx->Stencil._BackFace;
+
+ EVERGREEN_STATECHANGE(context, db);
+
+ SETfield(evergreen->DB_DEPTH_CONTROL.u32All, evergreen_translate_stencil_op(ctx->Stencil.FailFunc[0]),
+ STENCILFAIL_shift, STENCILFAIL_mask);
+ SETfield(evergreen->DB_DEPTH_CONTROL.u32All, evergreen_translate_stencil_op(ctx->Stencil.ZFailFunc[0]),
+ STENCILZFAIL_shift, STENCILZFAIL_mask);
+ SETfield(evergreen->DB_DEPTH_CONTROL.u32All, evergreen_translate_stencil_op(ctx->Stencil.ZPassFunc[0]),
+ STENCILZPASS_shift, STENCILZPASS_mask);
+
+ SETfield(evergreen->DB_DEPTH_CONTROL.u32All, evergreen_translate_stencil_op(ctx->Stencil.FailFunc[back]),
+ STENCILFAIL_BF_shift, STENCILFAIL_BF_mask);
+ SETfield(evergreen->DB_DEPTH_CONTROL.u32All, evergreen_translate_stencil_op(ctx->Stencil.ZFailFunc[back]),
+ STENCILZFAIL_BF_shift, STENCILZFAIL_BF_mask);
+ SETfield(evergreen->DB_DEPTH_CONTROL.u32All, evergreen_translate_stencil_op(ctx->Stencil.ZPassFunc[back]),
+ STENCILZPASS_BF_shift, STENCILZPASS_BF_mask);
+}
+
+static void evergreenViewport(GLcontext * ctx,
+ GLint x,
+ GLint y,
+ GLsizei width,
+ GLsizei height) //diff in evergreenUpdateWindow
+{
+ evergreenUpdateWindow(ctx, 0);
+
+ radeon_viewport(ctx, x, y, width, height);
+}
+
+static void evergreenDepthRange(GLcontext * ctx, GLclampd nearval, GLclampd farval) //diff in evergreenUpdateWindow
+{
+ evergreenUpdateWindow(ctx, 0);
+}
+
+static void evergreenLineWidth(GLcontext * ctx, GLfloat widthf) //same
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+ uint32_t lineWidth = (uint32_t)((widthf * 0.5) * (1 << 4));
+
+ EVERGREEN_STATECHANGE(context, pa);
+
+ if (lineWidth > 0xFFFF)
+ lineWidth = 0xFFFF;
+ SETfield(evergreen->PA_SU_LINE_CNTL.u32All,(uint16_t)lineWidth,
+ PA_SU_LINE_CNTL__WIDTH_shift, PA_SU_LINE_CNTL__WIDTH_mask);
+}
+
+static void evergreenLineStipple(GLcontext *ctx, GLint factor, GLushort pattern) //same
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+
+ EVERGREEN_STATECHANGE(context, pa);
+
+ SETfield(evergreen->PA_SC_LINE_STIPPLE.u32All, pattern, LINE_PATTERN_shift, LINE_PATTERN_mask);
+ SETfield(evergreen->PA_SC_LINE_STIPPLE.u32All, (factor-1), REPEAT_COUNT_shift, REPEAT_COUNT_mask);
+ SETfield(evergreen->PA_SC_LINE_STIPPLE.u32All, 1, AUTO_RESET_CNTL_shift, AUTO_RESET_CNTL_mask);
+}
+
+static void evergreenPolygonOffset(GLcontext * ctx, GLfloat factor, GLfloat units) //diff :
+ //all register here offset diff, bits same
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+ GLfloat constant = units;
+ GLchar depth = 0;
+
+ EVERGREEN_STATECHANGE(context, pa);
+
+ switch (ctx->Visual.depthBits) {
+ case 16:
+ constant *= 4.0;
+ depth = -16;
+ break;
+ case 24:
+ constant *= 2.0;
+ depth = -24;
+ break;
+ }
+
+ factor *= 12.0;
+ SETfield(evergreen->PA_SU_POLY_OFFSET_DB_FMT_CNTL.u32All, depth,
+ POLY_OFFSET_NEG_NUM_DB_BITS_shift, POLY_OFFSET_NEG_NUM_DB_BITS_mask);
+ //evergreen->PA_SU_POLY_OFFSET_CLAMP.f32All = constant; //???
+ evergreen->PA_SU_POLY_OFFSET_FRONT_SCALE.f32All = factor;
+ evergreen->PA_SU_POLY_OFFSET_FRONT_OFFSET.f32All = constant;
+ evergreen->PA_SU_POLY_OFFSET_BACK_SCALE.f32All = factor;
+ evergreen->PA_SU_POLY_OFFSET_BACK_OFFSET.f32All = constant;
+}
+
+static void evergreenPolygonMode(GLcontext * ctx, GLenum face, GLenum mode) //same
+{
+ (void)face;
+ (void)mode;
+
+ evergreenUpdatePolygonMode(ctx);
+}
+
+static void evergreenRenderMode(GLcontext * ctx, GLenum mode) //same
+{
+}
+
+//TODO : move to kernel.
+static void evergreenInitSQConfig(GLcontext * ctx)
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+
+ uint32_t uSqNumCfInsts, uMaxGPRs, uMaxThreads, uMaxStackEntries, uPSThreadCount, uOtherThreadCount;
+ uint32_t NUM_PS_GPRS, NUM_VS_GPRS, NUM_GS_GPRS, NUM_ES_GPRS, NUM_HS_GPRS, NUM_LS_GPRS, NUM_CLAUSE_TEMP_GPRS;
+ GLboolean bVC_ENABLE = GL_TRUE;
+
+ R600_STATECHANGE(context, sq);
+
+ switch (context->radeon.radeonScreen->chip_family)
+ {
+ case CHIP_FAMILY_CEDAR:
+ uSqNumCfInsts = 1;
+ bVC_ENABLE = GL_FALSE;
+ uMaxGPRs = 256;
+ uPSThreadCount = 96;
+ uMaxThreads = 192;
+ uMaxStackEntries = 256;
+ break;
+ case CHIP_FAMILY_REDWOOD:
+ uSqNumCfInsts = 2;
+ bVC_ENABLE = GL_TRUE;
+ uMaxGPRs = 256;
+ uPSThreadCount = 128;
+ uMaxThreads = 248;
+ uMaxStackEntries = 256;
+ break;
+ case CHIP_FAMILY_JUNIPER:
+ uSqNumCfInsts = 2;
+ bVC_ENABLE = GL_TRUE;
+ uMaxGPRs = 256;
+ uPSThreadCount = 128;
+ uMaxThreads = 248;
+ uMaxStackEntries = 512;
+ break;
+ case CHIP_FAMILY_CYPRESS:
+ uSqNumCfInsts = 2;
+ bVC_ENABLE = GL_TRUE;
+ uMaxGPRs = 256;
+ uPSThreadCount = 128;
+ uMaxThreads = 248;
+ uMaxStackEntries = 512;
+ break;
+ case CHIP_FAMILY_HEMLOCK:
+ uSqNumCfInsts = 2;//?
+ bVC_ENABLE = GL_TRUE;
+ uMaxGPRs = 256;
+ uPSThreadCount = 128;
+ uMaxThreads = 248;
+ uMaxStackEntries = 512;
+ break;
+ default:
+ uSqNumCfInsts = 2;
+ bVC_ENABLE = GL_TRUE;
+ uMaxGPRs = 256;
+ uPSThreadCount = 128;
+ uMaxThreads = 248;
+ uMaxStackEntries = 512;
+ break;
+ }
+
+ evergreen->evergreen_config.SQ_DYN_GPR_CNTL_PS_FLUSH_REQ.u32All = 0;
+
+ evergreen->evergreen_config.SPI_CONFIG_CNTL.u32All = 0;
+ evergreen->evergreen_config.SPI_CONFIG_CNTL_1.u32All = 0;
+ SETfield(evergreen->evergreen_config.SPI_CONFIG_CNTL_1.u32All, 4,
+ EG_SPI_CONFIG_CNTL_1__VTX_DONE_DELAY_shift,
+ EG_SPI_CONFIG_CNTL_1__VTX_DONE_DELAY_mask);
+
+ evergreen->evergreen_config.CP_PERFMON_CNTL.u32All = 0;
+
+ evergreen->evergreen_config.SQ_MS_FIFO_SIZES.u32All = 0;
+ SETfield(evergreen->evergreen_config.SQ_MS_FIFO_SIZES.u32All, 16 * uSqNumCfInsts,
+ EG_SQ_MS_FIFO_SIZES__CACHE_FIFO_SIZE_shift,
+ EG_SQ_MS_FIFO_SIZES__CACHE_FIFO_SIZE_mask);
+ SETfield(evergreen->evergreen_config.SQ_MS_FIFO_SIZES.u32All, 0x4,
+ EG_SQ_MS_FIFO_SIZES__FETCH_FIFO_HIWATER_shift,
+ EG_SQ_MS_FIFO_SIZES__FETCH_FIFO_HIWATER_mask);
+ SETfield(evergreen->evergreen_config.SQ_MS_FIFO_SIZES.u32All, 0xE0,
+ EG_SQ_MS_FIFO_SIZES__DONE_FIFO_HIWATER_shift,
+ EG_SQ_MS_FIFO_SIZES__DONE_FIFO_HIWATER_mask);
+ SETfield(evergreen->evergreen_config.SQ_MS_FIFO_SIZES.u32All, 0x8,
+ EG_SQ_MS_FIFO_SIZES__ALU_UPDATE_FIFO_HIWATER_shift,
+ EG_SQ_MS_FIFO_SIZES__ALU_UPDATE_FIFO_HIWATER_mask);
+
+ if(bVC_ENABLE == GL_TRUE)
+ {
+ SETbit(evergreen->evergreen_config.SQ_CONFIG.u32All,
+ EG_SQ_CONFIG__VC_ENABLE_bit);
+ }
+ else
+ {
+ CLEARbit(evergreen->evergreen_config.SQ_CONFIG.u32All,
+ EG_SQ_CONFIG__VC_ENABLE_bit);
+ }
+ SETbit(evergreen->evergreen_config.SQ_CONFIG.u32All,
+ EG_SQ_CONFIG__EXPORT_SRC_C_bit);
+ SETfield(evergreen->evergreen_config.SQ_CONFIG.u32All, 0,
+ EG_SQ_CONFIG__PS_PRIO_shift,
+ EG_SQ_CONFIG__PS_PRIO_mask);
+ SETfield(evergreen->evergreen_config.SQ_CONFIG.u32All, 1,
+ EG_SQ_CONFIG__VS_PRIO_shift,
+ EG_SQ_CONFIG__VS_PRIO_mask);
+ SETfield(evergreen->evergreen_config.SQ_CONFIG.u32All, 2,
+ EG_SQ_CONFIG__GS_PRIO_shift,
+ EG_SQ_CONFIG__GS_PRIO_mask);
+ SETfield(evergreen->evergreen_config.SQ_CONFIG.u32All, 3,
+ EG_SQ_CONFIG__ES_PRIO_shift,
+ EG_SQ_CONFIG__ES_PRIO_mask);
+
+ NUM_CLAUSE_TEMP_GPRS = 4;
+ NUM_PS_GPRS = ((uMaxGPRs-(4*2))*12/32); // 93
+ NUM_VS_GPRS = ((uMaxGPRs-(4*2))*6/32); // 46
+ NUM_GS_GPRS = ((uMaxGPRs-(4*2))*4/32); // 31
+ NUM_ES_GPRS = ((uMaxGPRs-(4*2))*4/32); // 31
+ NUM_HS_GPRS = ((uMaxGPRs-(4*2))*3/32); // 23
+ NUM_LS_GPRS = ((uMaxGPRs-(4*2))*3/32); // 23
+
+ evergreen->evergreen_config.SQ_GPR_RESOURCE_MGMT_1.u32All = 0;
+ evergreen->evergreen_config.SQ_GPR_RESOURCE_MGMT_2.u32All = 0;
+ evergreen->evergreen_config.SQ_GPR_RESOURCE_MGMT_3.u32All = 0;
+
+ SETfield(evergreen->evergreen_config.SQ_GPR_RESOURCE_MGMT_1.u32All, NUM_PS_GPRS,
+ NUM_PS_GPRS_shift, NUM_PS_GPRS_mask);
+ SETfield(evergreen->evergreen_config.SQ_GPR_RESOURCE_MGMT_1.u32All, NUM_VS_GPRS,
+ NUM_VS_GPRS_shift, NUM_VS_GPRS_mask);
+ SETfield(evergreen->evergreen_config.SQ_GPR_RESOURCE_MGMT_1.u32All, NUM_CLAUSE_TEMP_GPRS,
+ NUM_CLAUSE_TEMP_GPRS_shift, NUM_CLAUSE_TEMP_GPRS_mask);
+ SETfield(evergreen->evergreen_config.SQ_GPR_RESOURCE_MGMT_2.u32All, NUM_GS_GPRS,
+ NUM_GS_GPRS_shift, NUM_GS_GPRS_mask);
+ SETfield(evergreen->evergreen_config.SQ_GPR_RESOURCE_MGMT_2.u32All, NUM_ES_GPRS,
+ NUM_ES_GPRS_shift, NUM_ES_GPRS_mask);
+ SETfield(evergreen->evergreen_config.SQ_GPR_RESOURCE_MGMT_3.u32All, NUM_HS_GPRS,
+ NUM_PS_GPRS_shift, NUM_PS_GPRS_mask);
+ SETfield(evergreen->evergreen_config.SQ_GPR_RESOURCE_MGMT_3.u32All, NUM_LS_GPRS,
+ NUM_VS_GPRS_shift, NUM_VS_GPRS_mask);
+
+ uOtherThreadCount = (((uMaxThreads-uPSThreadCount)/6)/8)*8;
+ evergreen->evergreen_config.SQ_THREAD_RESOURCE_MGMT.u32All = 0;
+ evergreen->evergreen_config.SQ_THREAD_RESOURCE_MGMT_2.u32All = 0;
+ SETfield(evergreen->evergreen_config.SQ_THREAD_RESOURCE_MGMT.u32All, uPSThreadCount,
+ NUM_PS_THREADS_shift, NUM_PS_THREADS_mask);
+ SETfield(evergreen->evergreen_config.SQ_THREAD_RESOURCE_MGMT.u32All, uOtherThreadCount,
+ NUM_VS_THREADS_shift, NUM_VS_THREADS_mask);
+ SETfield(evergreen->evergreen_config.SQ_THREAD_RESOURCE_MGMT.u32All, uOtherThreadCount,
+ NUM_GS_THREADS_shift, NUM_GS_THREADS_mask);
+ SETfield(evergreen->evergreen_config.SQ_THREAD_RESOURCE_MGMT.u32All, uOtherThreadCount,
+ NUM_ES_THREADS_shift, NUM_ES_THREADS_mask);
+ SETfield(evergreen->evergreen_config.SQ_THREAD_RESOURCE_MGMT_2.u32All, uOtherThreadCount,
+ NUM_PS_THREADS_shift, NUM_PS_THREADS_mask);
+ SETfield(evergreen->evergreen_config.SQ_THREAD_RESOURCE_MGMT_2.u32All, uOtherThreadCount,
+ NUM_VS_THREADS_shift, NUM_VS_THREADS_mask);
+
+ uMaxStackEntries = ((uMaxStackEntries*1)/6);
+ evergreen->evergreen_config.SQ_STACK_RESOURCE_MGMT_1.u32All = 0;
+ evergreen->evergreen_config.SQ_STACK_RESOURCE_MGMT_2.u32All = 0;
+ evergreen->evergreen_config.SQ_STACK_RESOURCE_MGMT_3.u32All = 0;
+ SETfield(evergreen->evergreen_config.SQ_STACK_RESOURCE_MGMT_1.u32All, uMaxStackEntries,
+ NUM_PS_STACK_ENTRIES_shift, NUM_PS_STACK_ENTRIES_mask);
+ SETfield(evergreen->evergreen_config.SQ_STACK_RESOURCE_MGMT_1.u32All, uMaxStackEntries,
+ NUM_VS_STACK_ENTRIES_shift, NUM_VS_STACK_ENTRIES_mask);
+ SETfield(evergreen->evergreen_config.SQ_STACK_RESOURCE_MGMT_2.u32All, uMaxStackEntries,
+ NUM_GS_STACK_ENTRIES_shift, NUM_GS_STACK_ENTRIES_mask);
+ SETfield(evergreen->evergreen_config.SQ_STACK_RESOURCE_MGMT_2.u32All, uMaxStackEntries,
+ NUM_ES_STACK_ENTRIES_shift, NUM_ES_STACK_ENTRIES_mask);
+ SETfield(evergreen->evergreen_config.SQ_STACK_RESOURCE_MGMT_3.u32All, uMaxStackEntries,
+ NUM_PS_STACK_ENTRIES_shift, NUM_PS_STACK_ENTRIES_mask);
+ SETfield(evergreen->evergreen_config.SQ_STACK_RESOURCE_MGMT_3.u32All, uMaxStackEntries,
+ NUM_VS_STACK_ENTRIES_shift, NUM_VS_STACK_ENTRIES_mask);
+
+ evergreen->evergreen_config.PA_SC_FORCE_EOV_MAX_CNTS.u32All = 0;
+ SETfield(evergreen->evergreen_config.PA_SC_FORCE_EOV_MAX_CNTS.u32All, 4095,
+ EG_PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_CLK_CNT_shift,
+ EG_PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_CLK_CNT_mask);
+ SETfield(evergreen->evergreen_config.PA_SC_FORCE_EOV_MAX_CNTS.u32All, 255,
+ EG_PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_REZ_CNT_shift,
+ EG_PA_SC_FORCE_EOV_MAX_CNTS__FORCE_EOV_MAX_REZ_CNT_mask);
+
+ evergreen->evergreen_config.VGT_CACHE_INVALIDATION.u32All = 0;
+ SETfield(evergreen->evergreen_config.VGT_CACHE_INVALIDATION.u32All, 2,
+ EG_VGT_CACHE_INVALIDATION__CACHE_INVALIDATION_shift,
+ EG_VGT_CACHE_INVALIDATION__CACHE_INVALIDATION_mask);
+
+ evergreen->evergreen_config.VGT_GS_VERTEX_REUSE.u32All = 0;
+ SETfield(evergreen->evergreen_config.VGT_GS_VERTEX_REUSE.u32All, 16,
+ VERT_REUSE_shift,
+ VERT_REUSE_mask);
+
+ evergreen->evergreen_config.PA_SC_LINE_STIPPLE_STATE.u32All = 0;
+
+ evergreen->evergreen_config.PA_CL_ENHANCE.u32All = 0;
+ SETbit(evergreen->evergreen_config.PA_CL_ENHANCE.u32All,
+ CLIP_VTX_REORDER_ENA_bit);
+ SETfield(evergreen->evergreen_config.PA_CL_ENHANCE.u32All, 3,
+ NUM_CLIP_SEQ_shift,
+ NUM_CLIP_SEQ_mask);
+}
+
+void evergreenInitState(GLcontext * ctx) //diff
+{
+ context_t *context = R700_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+
+ int id = 0;
+
+ //calloc should have done this
+ memset(evergreen, 0, sizeof(EVERGREEN_CHIP_CONTEXT));
+
+ // Disable window clipping and offset:
+ SETfield(evergreen->PA_SC_WINDOW_OFFSET.u32All, 0,
+ EG_PA_SC_WINDOW_OFFSET__WINDOW_X_OFFSET_shift, EG_PA_SC_WINDOW_OFFSET__WINDOW_X_OFFSET_mask);
+ SETfield(evergreen->PA_SC_WINDOW_OFFSET.u32All, 0,
+ EG_PA_SC_WINDOW_OFFSET__WINDOW_Y_OFFSET_shift, EG_PA_SC_WINDOW_OFFSET__WINDOW_Y_OFFSET_mask);
+
+ SETbit(evergreen->PA_SC_WINDOW_SCISSOR_TL.u32All, WINDOW_OFFSET_DISABLE_bit);
+
+ evergreen->PA_SC_CLIPRECT_RULE.u32All = 0x0000FFFF;
+
+ evergreen->PA_SC_EDGERULE.u32All = 0xAAAAAAAA;
+
+ // Set up Z min/max:
+ evergreen->viewport[id].PA_SC_VPORT_ZMIN_0.f32All = 0.0;
+ evergreen->viewport[id].PA_SC_VPORT_ZMAX_0.f32All = 1.0;
+
+ SETfield(evergreen->CB_TARGET_MASK.u32All, 0xF, TARGET0_ENABLE_shift, TARGET0_ENABLE_mask);
+ SETfield(evergreen->CB_SHADER_MASK.u32All, 0xF, OUTPUT0_ENABLE_shift, OUTPUT0_ENABLE_mask);
+
+ SETfield(evergreen->SPI_BARYC_CNTL.u32All, 1,
+ EG_SPI_BARYC_CNTL__PERSP_CENTROID_ENA_shift,
+ EG_SPI_BARYC_CNTL__PERSP_CENTROID_ENA_mask);
+ SETfield(evergreen->SPI_BARYC_CNTL.u32All, 1,
+ EG_SPI_BARYC_CNTL__LINEAR_CENTROID_ENA_shift,
+ EG_SPI_BARYC_CNTL__LINEAR_CENTROID_ENA_mask);
+
+ // Turn off vgt reuse:
+ evergreen->VGT_REUSE_OFF.u32All = 0;
+ SETbit(evergreen->VGT_REUSE_OFF.u32All, REUSE_OFF_bit);
+
+ // Specify offsetting and clamp values for vertices:
+ evergreen->VGT_MAX_VTX_INDX.u32All = 0xFFFFFF;
+ evergreen->VGT_MIN_VTX_INDX.u32All = 0;
+ evergreen->VGT_INDX_OFFSET.u32All = 0;
+
+ evergreen->VGT_DMA_NUM_INSTANCES.u32All = 1;
+
+ // Do not alpha blend:
+ SETfield(evergreen->SX_ALPHA_TEST_CONTROL.u32All, REF_NEVER,
+ ALPHA_FUNC_shift, ALPHA_FUNC_mask);
+ CLEARbit(evergreen->SX_ALPHA_TEST_CONTROL.u32All, ALPHA_TEST_ENABLE_bit);
+
+ evergreen->SPI_VS_OUT_ID_0.u32All = 0x03020100;
+ evergreen->SPI_VS_OUT_ID_1.u32All = 0x07060504;
+
+ evergreen->SPI_PS_INPUT_CNTL[0].u32All = 0x00000800;
+ evergreen->SPI_PS_INPUT_CNTL[1].u32All = 0x00000801;
+ evergreen->SPI_PS_INPUT_CNTL[2].u32All = 0x00000802;
+
+
+ // Depth buffer currently disabled:
+ evergreen->DB_DEPTH_CONTROL.u32All = 0;
+ SETbit(evergreen->DB_DEPTH_CONTROL.u32All, Z_WRITE_ENABLE_bit);
+ SETfield(evergreen->DB_DEPTH_CONTROL.u32All, FRAG_ALWAYS,
+ ZFUNC_shift, ZFUNC_mask);
+
+ evergreen->DB_Z_READ_BASE.u32All = 0;
+ evergreen->DB_Z_WRITE_BASE.u32All = 0;
+
+ evergreen->DB_DEPTH_CLEAR.f32All = 1.0;
+
+ evergreen->DB_DEPTH_VIEW.u32All = 0;
+
+ evergreen->DB_SHADER_CONTROL.u32All = 0;
+ SETbit(evergreen->DB_SHADER_CONTROL.u32All, EG_DB_SHADER_CONTROL__DUAL_EXPORT_ENABLE_bit);
+
+ evergreen->DB_Z_INFO.u32All = 0;
+ SETfield(evergreen->DB_Z_INFO.u32All , ARRAY_1D_TILED_THIN1,
+ EG_DB_Z_INFO__ARRAY_MODE_shift, EG_DB_Z_INFO__ARRAY_MODE_mask);
+ SETfield(evergreen->DB_Z_INFO.u32All , EG_Z_24,
+ EG_DB_Z_INFO__FORMAT_shift, EG_DB_Z_INFO__FORMAT_mask);
+ SETfield(evergreen->DB_Z_INFO.u32All , EG_ADDR_SURF_TILE_SPLIT_256B,
+ EG_DB_Z_INFO__TILE_SPLIT_shift, EG_DB_Z_INFO__TILE_SPLIT_mask);
+ SETfield(evergreen->DB_Z_INFO.u32All , EG_ADDR_SURF_8_BANK,
+ EG_DB_Z_INFO__NUM_BANKS_shift, EG_DB_Z_INFO__NUM_BANKS_mask);
+ SETfield(evergreen->DB_Z_INFO.u32All , EG_ADDR_SURF_BANK_WIDTH_1,
+ EG_DB_Z_INFO__BANK_WIDTH_shift, EG_DB_Z_INFO__BANK_WIDTH_mask);
+ SETfield(evergreen->DB_Z_INFO.u32All , EG_ADDR_SURF_BANK_HEIGHT_1,
+ EG_DB_Z_INFO__BANK_HEIGHT_shift, EG_DB_Z_INFO__BANK_HEIGHT_mask);
+
+ evergreen->DB_STENCIL_INFO.u32All = 0;
+ CLEARbit(evergreen->DB_STENCIL_INFO.u32All, EG_DB_STENCIL_INFO__FORMAT_bit);
+ SETfield(evergreen->DB_STENCIL_INFO.u32All, EG_ADDR_SURF_TILE_SPLIT_256B,
+ EG_DB_STENCIL_INFO__TILE_SPLIT_shift, EG_DB_STENCIL_INFO__TILE_SPLIT_mask);
+
+ evergreen->DB_RENDER_CONTROL.u32All = 0;
+
+ evergreen->DB_RENDER_OVERRIDE.u32All = 0;
+ SETfield(evergreen->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIZ_ENABLE_shift, FORCE_HIZ_ENABLE_mask);
+ SETfield(evergreen->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE0_shift, FORCE_HIS_ENABLE0_mask);
+ SETfield(evergreen->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE1_shift, FORCE_HIS_ENABLE1_mask);
+
+ // Disable ROP3 modes by setting src to dst copy:
+ SETfield(evergreen->CB_COLOR_CONTROL.u32All, 0xCC,
+ EG_CB_COLOR_CONTROL__ROP3_shift,
+ EG_CB_COLOR_CONTROL__ROP3_mask);
+ SETfield(evergreen->CB_COLOR_CONTROL.u32All, EG_CB_NORMAL,
+ EG_CB_COLOR_CONTROL__MODE_shift,
+ EG_CB_COLOR_CONTROL__MODE_mask);
+
+ SETfield(evergreen->CB_BLEND0_CONTROL.u32All,
+ BLEND_ONE, COLOR_SRCBLEND_shift, COLOR_SRCBLEND_mask);
+
+ SETfield(evergreen->CB_BLEND0_CONTROL.u32All,
+ BLEND_ONE, ALPHA_SRCBLEND_shift, ALPHA_SRCBLEND_mask);
+
+ //evergreen->PA_CL_CLIP_CNTL.CLIP_DISABLE = 1;
+
+ SETbit(evergreen->PA_CL_CLIP_CNTL.u32All, DX_LINEAR_ATTR_CLIP_ENA_bit);
+
+ // Set up the culling control register:
+ SETfield(evergreen->PA_SU_SC_MODE_CNTL.u32All, 2,
+ POLYMODE_FRONT_PTYPE_shift, POLYMODE_FRONT_PTYPE_mask); // draw using triangles
+ SETfield(evergreen->PA_SU_SC_MODE_CNTL.u32All, 2,
+ POLYMODE_BACK_PTYPE_shift, POLYMODE_BACK_PTYPE_mask); // draw using triangles
+
+ // Do scale XY or X by 1/W0. eg:
+ evergreen->bEnablePerspective = GL_TRUE;
+
+ CLEARbit(evergreen->PA_CL_VTE_CNTL.u32All, VTX_XY_FMT_bit);
+ CLEARbit(evergreen->PA_CL_VTE_CNTL.u32All, VTX_Z_FMT_bit);
+ SETbit(evergreen->PA_CL_VTE_CNTL.u32All, VTX_W0_FMT_bit);
+
+ // Enable viewport scaling for all three axis:
+ SETbit(evergreen->PA_CL_VTE_CNTL.u32All, VPORT_X_SCALE_ENA_bit);
+ SETbit(evergreen->PA_CL_VTE_CNTL.u32All, VPORT_X_OFFSET_ENA_bit);
+ SETbit(evergreen->PA_CL_VTE_CNTL.u32All, VPORT_Y_SCALE_ENA_bit);
+ SETbit(evergreen->PA_CL_VTE_CNTL.u32All, VPORT_Y_OFFSET_ENA_bit);
+ SETbit(evergreen->PA_CL_VTE_CNTL.u32All, VPORT_Z_SCALE_ENA_bit);
+ SETbit(evergreen->PA_CL_VTE_CNTL.u32All, VPORT_Z_OFFSET_ENA_bit);
+
+ // Set up point sizes and min/max values:
+ SETfield(evergreen->PA_SU_POINT_SIZE.u32All, 0x8,
+ PA_SU_POINT_SIZE__HEIGHT_shift, PA_SU_POINT_SIZE__HEIGHT_mask);
+ SETfield(evergreen->PA_SU_POINT_SIZE.u32All, 0x8,
+ PA_SU_POINT_SIZE__WIDTH_shift, PA_SU_POINT_SIZE__WIDTH_mask);
+ CLEARfield(evergreen->PA_SU_POINT_MINMAX.u32All, MIN_SIZE_mask);
+ SETfield(evergreen->PA_SU_POINT_MINMAX.u32All, 0x8000, MAX_SIZE_shift, MAX_SIZE_mask);
+ SETfield(evergreen->PA_SU_LINE_CNTL.u32All,0x8,
+ PA_SU_LINE_CNTL__WIDTH_shift, PA_SU_LINE_CNTL__WIDTH_mask);
+
+ // Set up line control:
+ evergreen->PA_SC_LINE_CNTL.u32All = 0;
+ CLEARbit(evergreen->PA_SC_LINE_CNTL.u32All, EXPAND_LINE_WIDTH_bit);
+ SETbit(evergreen->PA_SC_LINE_CNTL.u32All, LAST_PIXEL_bit);
+
+ // Set up vertex control:
+ evergreen->PA_SU_VTX_CNTL.u32All = 0;
+ CLEARfield(evergreen->PA_SU_VTX_CNTL.u32All, QUANT_MODE_mask);
+ SETbit(evergreen->PA_SU_VTX_CNTL.u32All, PIX_CENTER_bit);
+ SETfield(evergreen->PA_SU_VTX_CNTL.u32All, X_ROUND_TO_EVEN,
+ PA_SU_VTX_CNTL__ROUND_MODE_shift, PA_SU_VTX_CNTL__ROUND_MODE_mask);
+
+ // to 1.0 = no guard band:
+ evergreen->PA_CL_GB_VERT_CLIP_ADJ.u32All = 0x3F800000; // 1.0
+ evergreen->PA_CL_GB_VERT_DISC_ADJ.u32All = 0x3F800000; // 1.0
+ evergreen->PA_CL_GB_HORZ_CLIP_ADJ.u32All = 0x3F800000; // 1.0
+ evergreen->PA_CL_GB_HORZ_DISC_ADJ.u32All = 0x3F800000; // 1.0
+
+ // Diable color compares:
+ SETfield(evergreen->CB_CLRCMP_CONTROL.u32All, CLRCMP_DRAW_ALWAYS,
+ CLRCMP_FCN_SRC_shift, CLRCMP_FCN_SRC_mask);
+ SETfield(evergreen->CB_CLRCMP_CONTROL.u32All, CLRCMP_DRAW_ALWAYS,
+ CLRCMP_FCN_DST_shift, CLRCMP_FCN_DST_mask);
+ SETfield(evergreen->CB_CLRCMP_CONTROL.u32All, CLRCMP_SEL_SRC,
+ CLRCMP_FCN_SEL_shift, CLRCMP_FCN_SEL_mask);
+
+ // Zero out source:
+ evergreen->CB_CLRCMP_SRC.u32All = 0x00000000;
+
+ // Put a compare color in for error checking:
+ evergreen->CB_CLRCMP_DST.u32All = 0x000000FF;
+
+ // Set up color compare mask:
+ evergreen->CB_CLRCMP_MSK.u32All = 0xFFFFFFFF;
+
+ // Enable all samples for multi-sample anti-aliasing:
+ evergreen->PA_SC_AA_MASK.u32All = 0xFFFFFFFF;
+ // Turn off AA:
+ evergreen->PA_SC_AA_CONFIG.u32All = 0;
+
+ SETfield(evergreen->VGT_OUT_DEALLOC_CNTL.u32All, 16,
+ DEALLOC_DIST_shift, DEALLOC_DIST_mask);
+ SETfield(evergreen->VGT_VERTEX_REUSE_BLOCK_CNTL.u32All, 14,
+ VTX_REUSE_DEPTH_shift, VTX_REUSE_DEPTH_mask);
+
+ evergreen->SX_MISC.u32All = 0;
+
+ SETfield(evergreen->render_target[id].CB_COLOR0_INFO.u32All, 1,
+ EG_CB_COLOR0_INFO__SOURCE_FORMAT_shift, EG_CB_COLOR0_INFO__SOURCE_FORMAT_mask);
+ SETbit(evergreen->render_target[id].CB_COLOR0_INFO.u32All, EG_CB_COLOR0_INFO__BLEND_CLAMP_bit);
+ SETfield(evergreen->render_target[id].CB_COLOR0_INFO.u32All, 0,
+ EG_CB_COLOR0_INFO__NUMBER_TYPE_shift, EG_CB_COLOR0_INFO__NUMBER_TYPE_mask);
+
+ SETfield(evergreen->render_target[id].CB_COLOR0_INFO.u32All, SWAP_STD,
+ EG_CB_COLOR0_INFO__COMP_SWAP_shift, EG_CB_COLOR0_INFO__COMP_SWAP_mask);
+
+ evergreen->render_target[id].CB_COLOR0_VIEW.u32All = 0;
+ evergreen->render_target[id].CB_COLOR0_CMASK.u32All = 0;
+ evergreen->render_target[id].CB_COLOR0_FMASK.u32All = 0;
+ evergreen->render_target[id].CB_COLOR0_FMASK_SLICE.u32All = 0;
+
+ evergreenInitSQConfig(ctx);
+
+ context->radeon.hw.all_dirty = GL_TRUE;
+}
+
+void evergreenInitStateFuncs(radeonContextPtr radeon, struct dd_function_table *functions)
+{
+ functions->UpdateState = evergreenInvalidateState;
+ functions->AlphaFunc = evergreenAlphaFunc;
+ functions->BlendColor = evergreenBlendColor;
+ functions->BlendEquationSeparate = evergreenBlendEquationSeparate;
+ functions->BlendFuncSeparate = evergreenBlendFuncSeparate;
+ functions->Enable = evergreenEnable;
+ functions->ColorMask = evergreenColorMask;
+ functions->DepthFunc = evergreenDepthFunc;
+ functions->DepthMask = evergreenDepthMask;
+ functions->CullFace = evergreenCullFace;
+ functions->Fogfv = evergreenFogfv;
+ functions->FrontFace = evergreenFrontFace;
+ functions->ShadeModel = evergreenShadeModel;
+ functions->LogicOpcode = evergreenLogicOpcode;
+
+ /* ARB_point_parameters */
+ functions->PointParameterfv = evergreenPointParameter;
+
+ /* Stencil related */
+ functions->StencilFuncSeparate = evergreenStencilFuncSeparate;
+ functions->StencilMaskSeparate = evergreenStencilMaskSeparate;
+ functions->StencilOpSeparate = evergreenStencilOpSeparate;
+
+ /* Viewport related */
+ functions->Viewport = evergreenViewport;
+ functions->DepthRange = evergreenDepthRange;
+ functions->PointSize = evergreenPointSize;
+ functions->LineWidth = evergreenLineWidth;
+ functions->LineStipple = evergreenLineStipple;
+
+ functions->PolygonOffset = evergreenPolygonOffset;
+ functions->PolygonMode = evergreenPolygonMode;
+
+ functions->RenderMode = evergreenRenderMode;
+
+ functions->ClipPlane = evergreenClipPlane;
+
+ functions->Scissor = radeonScissor;
+
+ functions->DrawBuffer = radeonDrawBuffer;
+ functions->ReadBuffer = radeonReadBuffer;
+
+ if (radeon->radeonScreen->kernel_mm) {
+ functions->CopyPixels = _mesa_meta_CopyPixels;
+ functions->DrawPixels = _mesa_meta_DrawPixels;
+ functions->ReadPixels = radeonReadPixels;
+ }
+}
+
+
diff --git a/src/mesa/drivers/dri/r600/evergreen_state.h b/src/mesa/drivers/dri/r600/evergreen_state.h
new file mode 100644
index 0000000000..ffdb56b38a
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/evergreen_state.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#ifndef _EVERGREEN_STATE_H_
+#define _EVERGREEN_STATE_H_
+
+#include "main/mtypes.h"
+
+#include "r600_context.h"
+
+extern void evergreenUpdateStateParameters(GLcontext * ctx, GLuint new_state);
+extern void evergreenUpdateShaders(GLcontext * ctx);
+extern void evergreenUpdateShaderStates(GLcontext * ctx);
+
+extern void evergreeUpdateShaders(GLcontext * ctx);
+
+extern void evergreenUpdateViewportOffset(GLcontext * ctx);
+
+extern void evergreenInitState(GLcontext * ctx);
+extern void evergreenInitStateFuncs (radeonContextPtr radeon, struct dd_function_table *functions);
+
+extern void evergreenSetScissor(context_t *context);
+
+#endif /* _EVERGREEN_STATE_H_ */
diff --git a/src/mesa/drivers/dri/r600/evergreen_tex.c b/src/mesa/drivers/dri/r600/evergreen_tex.c
new file mode 100644
index 0000000000..8b42045ebb
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/evergreen_tex.c
@@ -0,0 +1,1551 @@
+/*
+ * Copyright (C) 2008-2010 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/colormac.h"
+#include "main/context.h"
+#include "main/enums.h"
+#include "main/image.h"
+#include "main/teximage.h"
+#include "main/mipmap.h"
+#include "main/simple_list.h"
+#include "main/texstore.h"
+#include "main/texobj.h"
+
+#include "texmem.h"
+
+#include "r600_context.h"
+#include "radeon_mipmap_tree.h"
+#include "evergreen_diff.h"
+#include "evergreen_tex.h"
+#include "evergreen_fragprog.h"
+#include "evergreen_vertprog.h"
+
+#include "r600_tex.h"
+
+static unsigned int evergreen_translate_wrap_mode(GLenum wrapmode)
+{
+ switch(wrapmode) {
+ case GL_REPEAT: return SQ_TEX_WRAP;
+ case GL_CLAMP: return SQ_TEX_CLAMP_HALF_BORDER;
+ case GL_CLAMP_TO_EDGE: return SQ_TEX_CLAMP_LAST_TEXEL;
+ case GL_CLAMP_TO_BORDER: return SQ_TEX_CLAMP_BORDER;
+ case GL_MIRRORED_REPEAT: return SQ_TEX_MIRROR;
+ case GL_MIRROR_CLAMP_EXT: return SQ_TEX_MIRROR_ONCE_HALF_BORDER;
+ case GL_MIRROR_CLAMP_TO_EDGE_EXT: return SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
+ case GL_MIRROR_CLAMP_TO_BORDER_EXT: return SQ_TEX_MIRROR_ONCE_BORDER;
+ default:
+ radeon_error("bad wrap mode in %s", __FUNCTION__);
+ return 0;
+ }
+}
+
+static GLboolean evergreenGetTexFormat(struct gl_texture_object *tObj, gl_format mesa_format)
+{
+ radeonTexObj *t = radeon_tex_obj(tObj);
+
+ CLEARfield(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ CLEARfield(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ CLEARfield(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ CLEARfield(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ CLEARbit(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED,
+ FORMAT_COMP_X_shift,
+ FORMAT_COMP_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED,
+ FORMAT_COMP_Y_shift,
+ FORMAT_COMP_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED,
+ FORMAT_COMP_Z_shift,
+ FORMAT_COMP_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED,
+ FORMAT_COMP_W_shift,
+ FORMAT_COMP_W_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE1, ARRAY_LINEAR_GENERAL,
+ EG_SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_shift,
+ EG_SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_mask);
+
+ switch (mesa_format) /* This is mesa format. */
+ {
+ case MESA_FORMAT_RGBA8888:
+ case MESA_FORMAT_SIGNED_RGBA8888:
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_8_8_8_8,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ if (mesa_format == MESA_FORMAT_SIGNED_RGBA8888) {
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED,
+ FORMAT_COMP_X_shift, FORMAT_COMP_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED,
+ FORMAT_COMP_Y_shift, FORMAT_COMP_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED,
+ FORMAT_COMP_Z_shift, FORMAT_COMP_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED,
+ FORMAT_COMP_W_shift, FORMAT_COMP_W_mask);
+ }
+ break;
+ case MESA_FORMAT_RGBA8888_REV:
+ case MESA_FORMAT_SIGNED_RGBA8888_REV:
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_8_8_8_8,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ if (mesa_format == MESA_FORMAT_SIGNED_RGBA8888_REV) {
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED,
+ FORMAT_COMP_X_shift, FORMAT_COMP_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED,
+ FORMAT_COMP_Y_shift, FORMAT_COMP_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED,
+ FORMAT_COMP_Z_shift, FORMAT_COMP_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_SIGNED,
+ FORMAT_COMP_W_shift, FORMAT_COMP_W_mask);
+ }
+ break;
+ case MESA_FORMAT_ARGB8888:
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_8_8_8_8,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_XRGB8888:
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_8_8_8_8,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_XRGB8888_REV:
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_8_8_8_8,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_ARGB8888_REV:
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_8_8_8_8,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_RGB888:
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_8_8_8,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_RGB565:
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_5_6_5,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_RGB565_REV:
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_5_6_5,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_ARGB4444:
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_4_4_4_4,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_ARGB4444_REV:
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_4_4_4_4,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_ARGB1555:
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_1_5_5_5,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_ARGB1555_REV:
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_1_5_5_5,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_AL88:
+ case MESA_FORMAT_AL88_REV: /* TODO : Check this. */
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_8_8,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_RGB332:
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_3_3_2,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_A8: /* ZERO, ZERO, ZERO, X */
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_8,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_L8: /* X, X, X, ONE */
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_8,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_I8: /* X, X, X, X */
+ case MESA_FORMAT_CI8:
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_8,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_RGB_DXT1: /* not supported yet */
+ case MESA_FORMAT_RGBA_DXT1: /* not supported yet */
+ case MESA_FORMAT_RGBA_DXT3: /* not supported yet */
+ case MESA_FORMAT_RGBA_DXT5: /* not supported yet */
+ return GL_FALSE;
+
+ case MESA_FORMAT_RGBA_FLOAT32:
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_32_32_32_32_FLOAT,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_RGBA_FLOAT16:
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_16_16_16_16_FLOAT,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_RGB_FLOAT32: /* X, Y, Z, ONE */
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_32_32_32_FLOAT,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_RGB_FLOAT16:
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_16_16_16_FLOAT,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_ALPHA_FLOAT32: /* ZERO, ZERO, ZERO, X */
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_32_FLOAT,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_ALPHA_FLOAT16: /* ZERO, ZERO, ZERO, X */
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_16_FLOAT,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_LUMINANCE_FLOAT32: /* X, X, X, ONE */
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_32_FLOAT,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_LUMINANCE_FLOAT16: /* X, X, X, ONE */
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_16_FLOAT,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32:
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_32_32_FLOAT,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT16:
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_16_16_FLOAT,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_INTENSITY_FLOAT32: /* X, X, X, X */
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_32_FLOAT,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_INTENSITY_FLOAT16: /* X, X, X, X */
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_16_FLOAT,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case MESA_FORMAT_Z16:
+ case MESA_FORMAT_X8_Z24:
+ case MESA_FORMAT_S8_Z24:
+ case MESA_FORMAT_Z24_S8:
+ case MESA_FORMAT_Z32:
+ case MESA_FORMAT_S8:
+ CLEARbit(t->SQ_TEX_RESOURCE0, EG_SQ_TEX_RESOURCE_WORD0_0__NDTO_bit);
+ SETfield(t->SQ_TEX_RESOURCE1, ARRAY_1D_TILED_THIN1,
+ EG_SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_shift,
+ EG_SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_mask);
+ switch (mesa_format) {
+ case MESA_FORMAT_Z16:
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_16,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+ break;
+ case MESA_FORMAT_X8_Z24:
+ case MESA_FORMAT_S8_Z24:
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_8_24,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+ break;
+ case MESA_FORMAT_Z24_S8:
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_24_8,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+ break;
+ case MESA_FORMAT_Z32:
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_32,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+ break;
+ case MESA_FORMAT_S8:
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_8,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+ break;
+ default:
+ break;
+ };
+ switch (tObj->DepthMode) {
+ case GL_LUMINANCE: /* X, X, X, ONE */
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case GL_INTENSITY: /* X, X, X, X */
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ case GL_ALPHA: /* ZERO, ZERO, ZERO, X */
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_0,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ break;
+ default:
+ return GL_FALSE;
+ }
+ break;
+ /* EXT_texture_sRGB */
+ case MESA_FORMAT_SRGBA8:
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_8_8_8_8,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ SETbit(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit);
+ break;
+ case MESA_FORMAT_SLA8:
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_8_8,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ SETbit(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit);
+ break;
+ case MESA_FORMAT_SL8: /* X, X, X, ONE */
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_8,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ SETbit(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit);
+ break;
+ default:
+ /* Not supported format */
+ return GL_FALSE;
+ };
+
+ return GL_TRUE;
+}
+
+static GLuint evergreen_translate_shadow_func(GLenum func)
+{
+ switch (func) {
+ case GL_NEVER:
+ return SQ_TEX_DEPTH_COMPARE_NEVER;
+ case GL_LESS:
+ return SQ_TEX_DEPTH_COMPARE_LESS;
+ case GL_LEQUAL:
+ return SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
+ case GL_GREATER:
+ return SQ_TEX_DEPTH_COMPARE_GREATER;
+ case GL_GEQUAL:
+ return SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
+ case GL_NOTEQUAL:
+ return SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
+ case GL_EQUAL:
+ return SQ_TEX_DEPTH_COMPARE_EQUAL;
+ case GL_ALWAYS:
+ return SQ_TEX_DEPTH_COMPARE_ALWAYS;
+ default:
+ WARN_ONCE("Unknown shadow compare function! %d", func);
+ return 0;
+ }
+}
+
+static void evergreenUpdateTexWrap(radeonTexObjPtr t)
+{
+ struct gl_texture_object *tObj = &t->base;
+
+ SETfield(t->SQ_TEX_SAMPLER0, evergreen_translate_wrap_mode(tObj->WrapS),
+ EG_SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift,
+ EG_SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_mask);
+
+ if (tObj->Target != GL_TEXTURE_1D)
+ {
+ SETfield(t->SQ_TEX_SAMPLER0, evergreen_translate_wrap_mode(tObj->WrapT),
+ EG_SQ_TEX_SAMPLER_WORD0_0__CLAMP_Y_shift,
+ EG_SQ_TEX_SAMPLER_WORD0_0__CLAMP_Y_mask);
+
+ if (tObj->Target == GL_TEXTURE_3D)
+ SETfield(t->SQ_TEX_SAMPLER0, evergreen_translate_wrap_mode(tObj->WrapR),
+ EG_SQ_TEX_SAMPLER_WORD0_0__CLAMP_Z_shift,
+ EG_SQ_TEX_SAMPLER_WORD0_0__CLAMP_Z_mask);
+ }
+}
+
+static void evergreenSetTexDefaultState(radeonTexObjPtr t)
+{
+ /* Init text object to default states. */
+ t->SQ_TEX_RESOURCE0 = 0;
+ t->SQ_TEX_RESOURCE1 = 0;
+ t->SQ_TEX_RESOURCE2 = 0;
+ t->SQ_TEX_RESOURCE3 = 0;
+ t->SQ_TEX_RESOURCE4 = 0;
+ t->SQ_TEX_RESOURCE5 = 0;
+ t->SQ_TEX_RESOURCE6 = 0;
+ t->SQ_TEX_RESOURCE7 = 0;
+
+ SETfield(t->SQ_TEX_RESOURCE0, SQ_TEX_DIM_2D,
+ EG_SQ_TEX_RESOURCE_WORD0_0__DIM_shift,
+ EG_SQ_TEX_RESOURCE_WORD0_0__DIM_mask);
+
+ CLEARbit(t->SQ_TEX_RESOURCE0, EG_SQ_TEX_RESOURCE_WORD0_0__NDTO_bit);
+
+ SETfield(t->SQ_TEX_RESOURCE1, ARRAY_LINEAR_GENERAL,
+ EG_SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_shift,
+ EG_SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED,
+ FORMAT_COMP_X_shift, FORMAT_COMP_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED,
+ FORMAT_COMP_Y_shift, FORMAT_COMP_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED,
+ FORMAT_COMP_Z_shift, FORMAT_COMP_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_FORMAT_COMP_UNSIGNED,
+ FORMAT_COMP_W_shift, FORMAT_COMP_W_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_NUM_FORMAT_NORM,
+ SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift, SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_mask);
+ CLEARbit(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit);
+ CLEARbit(t->SQ_TEX_RESOURCE4, SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_ENDIAN_NONE,
+ SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift, SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, 0,
+ BASE_LEVEL_shift,
+ BASE_LEVEL_mask); /* mip-maps */
+
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_8_8_8_8,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+ SETfield(t->SQ_TEX_RESOURCE7, SQ_TEX_VTX_VALID_TEXTURE,
+ EG_SQ_TEX_RESOURCE_WORD7_0__TYPE_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__TYPE_mask);
+
+ /* Initialize sampler registers */
+ t->SQ_TEX_SAMPLER0 = 0;
+ SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_WRAP,
+ EG_SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift,
+ EG_SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_mask);
+ SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_WRAP,
+ EG_SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift,
+ EG_SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_mask);
+ SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_WRAP,
+ EG_SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift,
+ EG_SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_mask);
+ SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_XY_FILTER_POINT,
+ EG_SQ_TEX_SAMPLER_WORD0_0__XY_MAG_FILTER_shift,
+ EG_SQ_TEX_SAMPLER_WORD0_0__XY_MAG_FILTER_mask);
+ SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_XY_FILTER_POINT,
+ EG_SQ_TEX_SAMPLER_WORD0_0__XY_MIN_FILTER_shift,
+ EG_SQ_TEX_SAMPLER_WORD0_0__XY_MIN_FILTER_mask);
+ SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_Z_FILTER_NONE,
+ EG_SQ_TEX_SAMPLER_WORD0_0__Z_FILTER_shift,
+ EG_SQ_TEX_SAMPLER_WORD0_0__Z_FILTER_mask);
+ SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_Z_FILTER_NONE,
+ EG_SQ_TEX_SAMPLER_WORD0_0__MIP_FILTER_shift,
+ EG_SQ_TEX_SAMPLER_WORD0_0__MIP_FILTER_mask);
+ SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_BORDER_COLOR_TRANS_BLACK,
+ EG_SQ_TEX_SAMPLER_WORD0_0__BORDER_COLOR_TYPE_shift,
+ EG_SQ_TEX_SAMPLER_WORD0_0__BORDER_COLOR_TYPE_mask);
+
+ t->SQ_TEX_SAMPLER1 = 0;
+ SETfield(t->SQ_TEX_SAMPLER1, 0x7ff,
+ EG_SQ_TEX_SAMPLER_WORD1_0__MAX_LOD_shift,
+ EG_SQ_TEX_SAMPLER_WORD1_0__MAX_LOD_mask);
+
+ t->SQ_TEX_SAMPLER2 = 0;
+ SETbit(t->SQ_TEX_SAMPLER2, EG_SQ_TEX_SAMPLER_WORD2_0__TYPE_bit);
+}
+
+static void evergreenSetTexFilter(radeonTexObjPtr t, GLenum minf, GLenum magf, GLfloat anisotropy)
+{
+ /* Force revalidation to account for switches from/to mipmapping. */
+ t->validated = GL_FALSE;
+
+ /* Note that EXT_texture_filter_anisotropic is extremely vague about
+ * how anisotropic filtering interacts with the "normal" filter modes.
+ * When anisotropic filtering is enabled, we override min and mag
+ * filter settings completely. This includes driconf's settings.
+ */
+ if (anisotropy >= 2.0 && (minf != GL_NEAREST) && (magf != GL_NEAREST)) {
+ /*t->pp_txfilter |= R300_TX_MAG_FILTER_ANISO
+ | R300_TX_MIN_FILTER_ANISO
+ | R300_TX_MIN_FILTER_MIP_LINEAR
+ | aniso_filter(anisotropy);*/
+ radeon_print(RADEON_TEXTURE, RADEON_NORMAL, "Using maximum anisotropy of %f\n", anisotropy);
+ return;
+ }
+
+ switch (minf)
+ {
+ case GL_NEAREST:
+ SETfield(t->SQ_TEX_SAMPLER0, TEX_XYFilter_Point,
+ EG_SQ_TEX_SAMPLER_WORD0_0__XY_MIN_FILTER_shift,
+ EG_SQ_TEX_SAMPLER_WORD0_0__XY_MIN_FILTER_mask);
+ SETfield(t->SQ_TEX_SAMPLER0, TEX_MipFilter_None,
+ EG_SQ_TEX_SAMPLER_WORD0_0__MIP_FILTER_shift,
+ EG_SQ_TEX_SAMPLER_WORD0_0__MIP_FILTER_mask);
+ break;
+ case GL_LINEAR:
+ SETfield(t->SQ_TEX_SAMPLER0, TEX_XYFilter_Linear,
+ EG_SQ_TEX_SAMPLER_WORD0_0__XY_MIN_FILTER_shift,
+ EG_SQ_TEX_SAMPLER_WORD0_0__XY_MIN_FILTER_mask);
+ SETfield(t->SQ_TEX_SAMPLER0, TEX_MipFilter_None,
+ EG_SQ_TEX_SAMPLER_WORD0_0__MIP_FILTER_shift,
+ EG_SQ_TEX_SAMPLER_WORD0_0__MIP_FILTER_mask);
+ break;
+ case GL_NEAREST_MIPMAP_NEAREST:
+ SETfield(t->SQ_TEX_SAMPLER0, TEX_XYFilter_Point,
+ EG_SQ_TEX_SAMPLER_WORD0_0__XY_MIN_FILTER_shift,
+ EG_SQ_TEX_SAMPLER_WORD0_0__XY_MIN_FILTER_mask);
+ SETfield(t->SQ_TEX_SAMPLER0, TEX_MipFilter_Point,
+ EG_SQ_TEX_SAMPLER_WORD0_0__MIP_FILTER_shift,
+ EG_SQ_TEX_SAMPLER_WORD0_0__MIP_FILTER_mask);
+ break;
+ case GL_NEAREST_MIPMAP_LINEAR:
+ SETfield(t->SQ_TEX_SAMPLER0, TEX_XYFilter_Point,
+ EG_SQ_TEX_SAMPLER_WORD0_0__XY_MIN_FILTER_shift,
+ EG_SQ_TEX_SAMPLER_WORD0_0__XY_MIN_FILTER_mask);
+ SETfield(t->SQ_TEX_SAMPLER0, TEX_MipFilter_Linear,
+ EG_SQ_TEX_SAMPLER_WORD0_0__MIP_FILTER_shift,
+ EG_SQ_TEX_SAMPLER_WORD0_0__MIP_FILTER_mask);
+ break;
+ case GL_LINEAR_MIPMAP_NEAREST:
+ SETfield(t->SQ_TEX_SAMPLER0, TEX_XYFilter_Linear,
+ EG_SQ_TEX_SAMPLER_WORD0_0__XY_MIN_FILTER_shift,
+ EG_SQ_TEX_SAMPLER_WORD0_0__XY_MIN_FILTER_mask);
+ SETfield(t->SQ_TEX_SAMPLER0, TEX_MipFilter_Point,
+ EG_SQ_TEX_SAMPLER_WORD0_0__MIP_FILTER_shift,
+ EG_SQ_TEX_SAMPLER_WORD0_0__MIP_FILTER_mask);
+ break;
+ case GL_LINEAR_MIPMAP_LINEAR:
+ SETfield(t->SQ_TEX_SAMPLER0, TEX_XYFilter_Linear,
+ EG_SQ_TEX_SAMPLER_WORD0_0__XY_MIN_FILTER_shift,
+ EG_SQ_TEX_SAMPLER_WORD0_0__XY_MIN_FILTER_mask);
+ SETfield(t->SQ_TEX_SAMPLER0, TEX_MipFilter_Linear,
+ EG_SQ_TEX_SAMPLER_WORD0_0__MIP_FILTER_shift,
+ EG_SQ_TEX_SAMPLER_WORD0_0__MIP_FILTER_mask);
+ break;
+ }
+
+ /* Note we don't have 3D mipmaps so only use the mag filter setting
+ * to set the 3D texture filter mode.
+ */
+ switch (magf)
+ {
+ case GL_NEAREST:
+ SETfield(t->SQ_TEX_SAMPLER0, TEX_XYFilter_Point,
+ EG_SQ_TEX_SAMPLER_WORD0_0__XY_MAG_FILTER_shift,
+ EG_SQ_TEX_SAMPLER_WORD0_0__XY_MAG_FILTER_mask);
+ break;
+ case GL_LINEAR:
+ SETfield(t->SQ_TEX_SAMPLER0, TEX_XYFilter_Linear,
+ EG_SQ_TEX_SAMPLER_WORD0_0__XY_MAG_FILTER_shift,
+ EG_SQ_TEX_SAMPLER_WORD0_0__XY_MAG_FILTER_mask);
+ break;
+ }
+}
+
+static void evergreenSetTexBorderColor(radeonTexObjPtr t, const GLfloat color[4])
+{
+ t->TD_PS_SAMPLER0_BORDER_ALPHA = *((uint32_t*)&(color[3]));
+ t->TD_PS_SAMPLER0_BORDER_RED = *((uint32_t*)&(color[2]));
+ t->TD_PS_SAMPLER0_BORDER_GREEN = *((uint32_t*)&(color[1]));
+ t->TD_PS_SAMPLER0_BORDER_BLUE = *((uint32_t*)&(color[0]));
+
+ SETfield(t->SQ_TEX_SAMPLER0, SQ_TEX_BORDER_COLOR_REGISTER,
+ EG_SQ_TEX_SAMPLER_WORD0_0__BORDER_COLOR_TYPE_shift,
+ EG_SQ_TEX_SAMPLER_WORD0_0__BORDER_COLOR_TYPE_mask);
+}
+
+static void evergreenSetDepthTexMode(struct gl_texture_object *tObj)
+{
+ radeonTexObjPtr t;
+
+ if (!tObj)
+ return;
+
+ t = radeon_tex_obj(tObj);
+
+ if(!evergreenGetTexFormat(tObj, tObj->Image[0][tObj->BaseLevel]->TexFormat))
+ t->validated = GL_FALSE;
+}
+
+static INLINE uint32_t
+EG_S_FIXED(float value, uint32_t frac_bits)
+{
+ return value * (1 << frac_bits);
+}
+
+static GLboolean evergreen_setup_hardware_state(GLcontext * ctx, struct gl_texture_object *texObj, int unit)
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ radeonTexObj *t = radeon_tex_obj(texObj);
+ const struct gl_texture_image *firstImage;
+ GLuint uTexelPitch, row_align;
+
+ if (context->radeon.radeonScreen->driScreen->dri2.enabled &&
+ t->image_override &&
+ t->bo)
+ return GL_TRUE;
+
+ firstImage = t->base.Image[0][t->minLod];
+
+ if (!t->image_override) {
+ if (!evergreenGetTexFormat(texObj, firstImage->TexFormat)) {
+ radeon_warning("unsupported texture format in %s\n",
+ __FUNCTION__);
+ return GL_FALSE;
+ }
+ }
+
+ switch (texObj->Target)
+ {
+ case GL_TEXTURE_1D:
+ SETfield(t->SQ_TEX_RESOURCE0, SQ_TEX_DIM_1D,
+ EG_SQ_TEX_RESOURCE_WORD0_0__DIM_shift,
+ EG_SQ_TEX_RESOURCE_WORD0_0__DIM_mask);
+ SETfield(t->SQ_TEX_RESOURCE1, 0,
+ EG_SQ_TEX_RESOURCE_WORD1_0__TEX_DEPTH_shift,
+ EG_SQ_TEX_RESOURCE_WORD1_0__TEX_DEPTH_mask);
+ break;
+ case GL_TEXTURE_2D:
+ case GL_TEXTURE_RECTANGLE_NV:
+ SETfield(t->SQ_TEX_RESOURCE0, SQ_TEX_DIM_2D,
+ EG_SQ_TEX_RESOURCE_WORD0_0__DIM_shift,
+ EG_SQ_TEX_RESOURCE_WORD0_0__DIM_mask);
+ SETfield(t->SQ_TEX_RESOURCE1, 0,
+ EG_SQ_TEX_RESOURCE_WORD1_0__TEX_DEPTH_shift,
+ EG_SQ_TEX_RESOURCE_WORD1_0__TEX_DEPTH_mask);
+ break;
+ case GL_TEXTURE_3D:
+ SETfield(t->SQ_TEX_RESOURCE0, SQ_TEX_DIM_3D,
+ EG_SQ_TEX_RESOURCE_WORD0_0__DIM_shift,
+ EG_SQ_TEX_RESOURCE_WORD0_0__DIM_mask);
+ SETfield(t->SQ_TEX_RESOURCE1, (firstImage->Depth - 1), // ???
+ EG_SQ_TEX_RESOURCE_WORD1_0__TEX_DEPTH_shift,
+ EG_SQ_TEX_RESOURCE_WORD1_0__TEX_DEPTH_mask);
+ break;
+ case GL_TEXTURE_CUBE_MAP:
+ SETfield(t->SQ_TEX_RESOURCE0, SQ_TEX_DIM_CUBEMAP,
+ EG_SQ_TEX_RESOURCE_WORD0_0__DIM_shift,
+ EG_SQ_TEX_RESOURCE_WORD0_0__DIM_mask);
+ SETfield(t->SQ_TEX_RESOURCE1, 0,
+ EG_SQ_TEX_RESOURCE_WORD1_0__TEX_DEPTH_shift,
+ EG_SQ_TEX_RESOURCE_WORD1_0__TEX_DEPTH_mask);
+ break;
+ default:
+ radeon_error("unexpected texture target type in %s\n", __FUNCTION__);
+ return GL_FALSE;
+ }
+
+ row_align = context->radeon.texture_row_align - 1;
+ uTexelPitch = (_mesa_format_row_stride(firstImage->TexFormat, firstImage->Width) + row_align) & ~row_align;
+ uTexelPitch = uTexelPitch / _mesa_get_format_bytes(firstImage->TexFormat);
+ uTexelPitch = (uTexelPitch + R700_TEXEL_PITCH_ALIGNMENT_MASK)
+ & ~R700_TEXEL_PITCH_ALIGNMENT_MASK;
+
+ /* min pitch is 8 */
+ if (uTexelPitch < 8)
+ uTexelPitch = 8;
+
+ SETfield(t->SQ_TEX_RESOURCE0, (uTexelPitch/8)-1,
+ EG_SQ_TEX_RESOURCE_WORD0_0__PITCH_shift,
+ EG_SQ_TEX_RESOURCE_WORD0_0__PITCH_mask);
+ SETfield(t->SQ_TEX_RESOURCE0, firstImage->Width - 1,
+ EG_SQ_TEX_RESOURCE_WORD0_0__TEX_WIDTH_shift,
+ EG_SQ_TEX_RESOURCE_WORD0_0__TEX_WIDTH_mask);
+ SETfield(t->SQ_TEX_RESOURCE1, firstImage->Height - 1,
+ EG_SQ_TEX_RESOURCE_WORD1_0__TEX_HEIGHT_shift,
+ EG_SQ_TEX_RESOURCE_WORD1_0__TEX_HEIGHT_mask);
+
+ t->SQ_TEX_RESOURCE2 = get_base_teximage_offset(t) / 256;
+
+ t->SQ_TEX_RESOURCE3 = radeon_miptree_image_offset(t->mt, 0, t->minLod + 1) / 256;
+
+ SETfield(t->SQ_TEX_RESOURCE4, 0, BASE_LEVEL_shift, BASE_LEVEL_mask);
+ SETfield(t->SQ_TEX_RESOURCE5, t->maxLod - t->minLod, LAST_LEVEL_shift, LAST_LEVEL_mask);
+
+ SETfield(t->SQ_TEX_SAMPLER1,
+ EG_S_FIXED(CLAMP(t->base.MinLod - t->minLod, 0, 15), 6),
+ EG_SQ_TEX_SAMPLER_WORD1_0__MIN_LOD_shift,
+ EG_SQ_TEX_SAMPLER_WORD1_0__MIN_LOD_mask);
+ SETfield(t->SQ_TEX_SAMPLER1,
+ EG_S_FIXED(CLAMP(t->base.MaxLod - t->minLod, 0, 15), 6),
+ EG_SQ_TEX_SAMPLER_WORD1_0__MAX_LOD_shift,
+ EG_SQ_TEX_SAMPLER_WORD1_0__MAX_LOD_mask);
+ SETfield(t->SQ_TEX_SAMPLER2,
+ EG_S_FIXED(CLAMP(ctx->Texture.Unit[unit].LodBias + t->base.LodBias, -16, 16), 6),
+ EG_SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_shift,
+ EG_SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_mask);
+
+ if(texObj->CompareMode == GL_COMPARE_R_TO_TEXTURE_ARB)
+ {
+ SETfield(t->SQ_TEX_SAMPLER0, evergreen_translate_shadow_func(texObj->CompareFunc),
+ EG_SQ_TEX_SAMPLER_WORD0_0__DCF_shift,
+ EG_SQ_TEX_SAMPLER_WORD0_0__DCF_mask);
+ }
+ else
+ {
+ CLEARfield(t->SQ_TEX_SAMPLER0, EG_SQ_TEX_SAMPLER_WORD0_0__DCF_mask);
+ }
+
+ return GL_TRUE;
+}
+
+void evergreenSetTexOffset(__DRIcontext * pDRICtx, GLint texname,
+ unsigned long long offset, GLint depth, GLuint pitch)
+{
+ context_t *rmesa = pDRICtx->driverPrivate;
+ struct gl_texture_object *tObj =
+ _mesa_lookup_texture(rmesa->radeon.glCtx, texname);
+ radeonTexObjPtr t = radeon_tex_obj(tObj);
+ const struct gl_texture_image *firstImage;
+ uint32_t pitch_val, size, row_align;
+
+ if (!tObj)
+ return;
+
+ t->image_override = GL_TRUE;
+
+ if (!offset)
+ return;
+
+ firstImage = t->base.Image[0][t->minLod];
+ row_align = rmesa->radeon.texture_row_align - 1;
+ size = ((_mesa_format_row_stride(firstImage->TexFormat, firstImage->Width) + row_align) & ~row_align) * firstImage->Height;
+ if (t->bo) {
+ radeon_bo_unref(t->bo);
+ t->bo = NULL;
+ }
+ t->bo = radeon_legacy_bo_alloc_fake(rmesa->radeon.radeonScreen->bom, size, offset);
+ t->override_offset = offset;
+ pitch_val = pitch;
+ switch (depth) {
+ case 32:
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_8_8_8_8,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ pitch_val /= 4;
+ break;
+ case 24:
+ default:
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_8_8_8_8,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ pitch_val /= 4;
+ break;
+ case 16:
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_5_6_5,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ pitch_val /= 2;
+ break;
+ }
+
+ pitch_val = (pitch_val + R700_TEXEL_PITCH_ALIGNMENT_MASK)
+ & ~R700_TEXEL_PITCH_ALIGNMENT_MASK;
+
+ /* min pitch is 8 */
+ if (pitch_val < 8)
+ pitch_val = 8;
+
+ SETfield(t->SQ_TEX_RESOURCE0, (pitch_val/8)-1,
+ EG_SQ_TEX_RESOURCE_WORD0_0__PITCH_shift,
+ EG_SQ_TEX_RESOURCE_WORD0_0__PITCH_mask);
+}
+
+void evergreenSetTexBuffer(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_format, __DRIdrawable *dPriv)
+{
+ struct gl_texture_unit *texUnit;
+ struct gl_texture_object *texObj;
+ struct gl_texture_image *texImage;
+ struct radeon_renderbuffer *rb;
+ radeon_texture_image *rImage;
+ radeonContextPtr radeon;
+ context_t *rmesa;
+ struct radeon_framebuffer *rfb;
+ radeonTexObjPtr t;
+ uint32_t pitch_val;
+ uint32_t internalFormat, type, format;
+
+ type = GL_BGRA;
+ format = GL_UNSIGNED_BYTE;
+ internalFormat = (glx_texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4);
+
+ radeon = pDRICtx->driverPrivate;
+ rmesa = pDRICtx->driverPrivate;
+
+ rfb = dPriv->driverPrivate;
+ texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit];
+ texObj = _mesa_select_tex_object(radeon->glCtx, texUnit, target);
+ texImage = _mesa_get_tex_image(radeon->glCtx, texObj, target, 0);
+
+ rImage = get_radeon_texture_image(texImage);
+ t = radeon_tex_obj(texObj);
+ if (t == NULL) {
+ return;
+ }
+
+ radeon_update_renderbuffers(pDRICtx, dPriv, GL_TRUE);
+ rb = rfb->color_rb[0];
+ if (rb->bo == NULL) {
+ /* Failed to BO for the buffer */
+ return;
+ }
+
+ _mesa_lock_texture(radeon->glCtx, texObj);
+ if (t->bo) {
+ radeon_bo_unref(t->bo);
+ t->bo = NULL;
+ }
+ if (rImage->bo) {
+ radeon_bo_unref(rImage->bo);
+ rImage->bo = NULL;
+ }
+
+ radeon_miptree_unreference(&t->mt);
+ radeon_miptree_unreference(&rImage->mt);
+
+ _mesa_init_teximage_fields(radeon->glCtx, target, texImage,
+ rb->base.Width, rb->base.Height, 1, 0, rb->cpp);
+ texImage->RowStride = rb->pitch / rb->cpp;
+
+ rImage->bo = rb->bo;
+ radeon_bo_ref(rImage->bo);
+ t->bo = rb->bo;
+ radeon_bo_ref(t->bo);
+ t->image_override = GL_TRUE;
+ t->override_offset = 0;
+ pitch_val = rb->pitch;
+ switch (rb->cpp) {
+ case 4:
+ if (glx_texture_format == __DRI_TEXTURE_FORMAT_RGB) {
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_8_8_8_8,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ } else {
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_8_8_8_8,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ }
+ pitch_val /= 4;
+ break;
+ case 3:
+ default:
+ // FMT_8_8_8 ???
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_8_8_8_8,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_W,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ pitch_val /= 4;
+ break;
+ case 2:
+ SETfield(t->SQ_TEX_RESOURCE7, FMT_5_6_5,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift,
+ EG_SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask);
+
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Z,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_Y,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_X,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask);
+ SETfield(t->SQ_TEX_RESOURCE4, SQ_SEL_1,
+ SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift, SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask);
+ pitch_val /= 2;
+ break;
+ }
+
+ pitch_val = (pitch_val + R700_TEXEL_PITCH_ALIGNMENT_MASK)
+ & ~R700_TEXEL_PITCH_ALIGNMENT_MASK;
+
+ /* min pitch is 8 */
+ if (pitch_val < 8)
+ pitch_val = 8;
+
+ SETfield(t->SQ_TEX_RESOURCE0, (pitch_val/8)-1,
+ EG_SQ_TEX_RESOURCE_WORD0_0__PITCH_shift,
+ EG_SQ_TEX_RESOURCE_WORD0_0__PITCH_mask);
+ SETfield(t->SQ_TEX_RESOURCE0, rb->base.Width - 1,
+ EG_SQ_TEX_RESOURCE_WORD0_0__TEX_WIDTH_shift,
+ EG_SQ_TEX_RESOURCE_WORD0_0__TEX_WIDTH_mask);
+ SETfield(t->SQ_TEX_RESOURCE1, rb->base.Height - 1,
+ EG_SQ_TEX_RESOURCE_WORD1_0__TEX_HEIGHT_shift,
+ EG_SQ_TEX_RESOURCE_WORD1_0__TEX_HEIGHT_mask);
+
+ t->validated = GL_TRUE;
+ _mesa_unlock_texture(radeon->glCtx, texObj);
+ return;
+}
+
+void evergreenUpdateTextureState(GLcontext * ctx)
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT * evergreen = GET_EVERGREEN_CHIP(context);
+ struct gl_texture_unit *texUnit;
+ struct radeon_tex_obj *t;
+ GLuint unit;
+
+ EVERGREEN_STATECHANGE(context, tx);
+
+ for (unit = 0; unit < R700_MAX_TEXTURE_UNITS; unit++) {
+ texUnit = &ctx->Texture.Unit[unit];
+ t = radeon_tex_obj(ctx->Texture.Unit[unit]._Current);
+ evergreen->textures[unit] = NULL;
+ if (texUnit->_ReallyEnabled) {
+ if (!t)
+ continue;
+ evergreen->textures[unit] = t;
+ }
+ }
+}
+
+static GLboolean evergreen_validate_texture(GLcontext * ctx, struct gl_texture_object *texObj, int unit)
+{
+ radeonTexObj *t = radeon_tex_obj(texObj);
+
+ if (!radeon_validate_texture_miptree(ctx, texObj))
+ return GL_FALSE;
+
+ /* Configure the hardware registers (more precisely, the cached version
+ * of the hardware registers). */
+ if (!evergreen_setup_hardware_state(ctx, texObj, unit))
+ return GL_FALSE;
+
+ t->validated = GL_TRUE;
+ return GL_TRUE;
+}
+
+GLboolean evergreenValidateBuffers(GLcontext * ctx)
+{
+ context_t *rmesa = EVERGREEN_CONTEXT(ctx);
+ struct radeon_renderbuffer *rrb;
+ struct radeon_bo *pbo;
+ int i;
+ int ret;
+
+ radeon_cs_space_reset_bos(rmesa->radeon.cmdbuf.cs);
+
+ rrb = radeon_get_colorbuffer(&rmesa->radeon);
+ /* color buffer */
+ if (rrb && rrb->bo) {
+ radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs,
+ rrb->bo, 0,
+ RADEON_GEM_DOMAIN_VRAM);
+ }
+
+ /* depth buffer */
+ rrb = radeon_get_depthbuffer(&rmesa->radeon);
+ if (rrb && rrb->bo) {
+ radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs,
+ rrb->bo, 0,
+ RADEON_GEM_DOMAIN_VRAM);
+ }
+
+ for (i = 0; i < ctx->Const.MaxTextureImageUnits; ++i) {
+ radeonTexObj *t;
+
+ if (!ctx->Texture.Unit[i]._ReallyEnabled)
+ continue;
+
+ if (!evergreen_validate_texture(ctx, ctx->Texture.Unit[i]._Current, i)) {
+ radeon_warning("failed to validate texture for unit %d.\n", i);
+ }
+ t = radeon_tex_obj(ctx->Texture.Unit[i]._Current);
+ if (t->image_override && t->bo)
+ radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs,
+ t->bo,
+ RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
+ else if (t->mt->bo)
+ radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs,
+ t->mt->bo,
+ RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
+ }
+
+ pbo = (struct radeon_bo *)evergreenGetActiveFpShaderBo(ctx);
+ if (pbo) {
+ radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, pbo,
+ RADEON_GEM_DOMAIN_GTT, 0);
+ }
+
+ pbo = (struct radeon_bo *)evergreenGetActiveVpShaderBo(ctx);
+ if (pbo) {
+ radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, pbo,
+ RADEON_GEM_DOMAIN_GTT, 0);
+ }
+
+ pbo = (struct radeon_bo *)evergreenGetActiveFpShaderConstBo(ctx);
+ if (pbo) {
+ radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, pbo,
+ RADEON_GEM_DOMAIN_GTT, 0);
+ }
+
+ pbo = (struct radeon_bo *)evergreenGetActiveVpShaderConstBo(ctx);
+ if (pbo) {
+ radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, pbo,
+ RADEON_GEM_DOMAIN_GTT, 0);
+ }
+
+ ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, first_elem(&rmesa->radeon.dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0);
+ if (ret)
+ return GL_FALSE;
+ return GL_TRUE;
+}
+
+static struct gl_texture_object *evergreenNewTextureObject(GLcontext * ctx,
+ GLuint name,
+ GLenum target)
+{
+ context_t* rmesa = EVERGREEN_CONTEXT(ctx);
+ radeonTexObj * t = CALLOC_STRUCT(radeon_tex_obj);
+
+
+ radeon_print(RADEON_STATE | RADEON_TEXTURE, RADEON_NORMAL,
+ "%s( %p (target = %s) )\n", __FUNCTION__,
+ t, _mesa_lookup_enum_by_nr(target));
+
+ _mesa_initialize_texture_object(&t->base, name, target);
+ t->base.MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy;
+
+ evergreenSetTexDefaultState(t);
+ evergreenUpdateTexWrap(t);
+ evergreenSetTexFilter(t, t->base.MinFilter, t->base.MagFilter, t->base.MaxAnisotropy);
+ evergreenSetTexBorderColor(t, t->base.BorderColor.f);
+
+ return &t->base;
+}
+
+static void evergreenDeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
+{
+ context_t * rmesa = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT * evergreen = GET_EVERGREEN_CHIP(rmesa);
+ radeonTexObj* t = radeon_tex_obj(texObj);
+
+ radeon_print(RADEON_STATE | RADEON_TEXTURE, RADEON_NORMAL,
+ "%s( %p (target = %s) )\n", __FUNCTION__,
+ (void *)texObj,
+ _mesa_lookup_enum_by_nr(texObj->Target));
+
+ if (rmesa) {
+ int i;
+ radeon_firevertices(&rmesa->radeon);
+
+ for(i = 0; i < R700_MAX_TEXTURE_UNITS; ++i)
+ if (evergreen->textures[i] == t)
+ evergreen->textures[i] = 0;
+ }
+
+ if (t->bo) {
+ radeon_bo_unref(t->bo);
+ t->bo = NULL;
+ }
+
+ radeon_miptree_unreference(&t->mt);
+
+ _mesa_delete_texture_object(ctx, texObj);
+}
+
+static void evergreenTexParameter(GLcontext * ctx, GLenum target,
+ struct gl_texture_object *texObj,
+ GLenum pname, const GLfloat * params)
+{
+ radeonTexObj* t = radeon_tex_obj(texObj);
+ GLenum baseFormat;
+
+ radeon_print(RADEON_STATE | RADEON_TEXTURE, RADEON_VERBOSE,
+ "%s( %s )\n", __FUNCTION__,
+ _mesa_lookup_enum_by_nr(pname));
+
+ switch (pname) {
+ case GL_TEXTURE_MIN_FILTER:
+ case GL_TEXTURE_MAG_FILTER:
+ case GL_TEXTURE_MAX_ANISOTROPY_EXT:
+ evergreenSetTexFilter(t, texObj->MinFilter, texObj->MagFilter, texObj->MaxAnisotropy);
+ break;
+
+ case GL_TEXTURE_WRAP_S:
+ case GL_TEXTURE_WRAP_T:
+ case GL_TEXTURE_WRAP_R:
+ evergreenUpdateTexWrap(t);
+ break;
+
+ case GL_TEXTURE_BORDER_COLOR:
+ evergreenSetTexBorderColor(t, texObj->BorderColor.f);
+ break;
+
+ case GL_TEXTURE_BASE_LEVEL:
+ case GL_TEXTURE_MAX_LEVEL:
+ case GL_TEXTURE_MIN_LOD:
+ case GL_TEXTURE_MAX_LOD:
+ t->validated = GL_FALSE;
+ break;
+
+ case GL_DEPTH_TEXTURE_MODE:
+ if (!texObj->Image[0][texObj->BaseLevel])
+ return;
+ baseFormat = texObj->Image[0][texObj->BaseLevel]->_BaseFormat;
+ if (baseFormat == GL_DEPTH_COMPONENT ||
+ baseFormat == GL_DEPTH_STENCIL) {
+ evergreenSetDepthTexMode(texObj);
+ break;
+ } else {
+ /* If the texture isn't a depth texture, changing this
+ * state won't cause any changes to the hardware.
+ * Don't force a flush of texture state.
+ */
+ return;
+ }
+
+ default:
+ return;
+ }
+}
+
+void evergreenInitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *functions)
+{
+ /* Note: we only plug in the functions we implement in the driver
+ * since _mesa_init_driver_functions() was already called.
+ */
+ functions->NewTextureImage = radeonNewTextureImage;
+ functions->FreeTexImageData = radeonFreeTexImageData;
+ functions->MapTexture = radeonMapTexture;
+ functions->UnmapTexture = radeonUnmapTexture;
+
+ functions->ChooseTextureFormat = radeonChooseTextureFormat_mesa;
+ functions->TexImage1D = radeonTexImage1D;
+ functions->TexImage2D = radeonTexImage2D;
+ functions->TexImage3D = radeonTexImage3D;
+ functions->TexSubImage1D = radeonTexSubImage1D;
+ functions->TexSubImage2D = radeonTexSubImage2D;
+ functions->TexSubImage3D = radeonTexSubImage3D;
+ functions->GetTexImage = radeonGetTexImage;
+ functions->GetCompressedTexImage = radeonGetCompressedTexImage;
+ functions->NewTextureObject = evergreenNewTextureObject;
+ functions->DeleteTexture = evergreenDeleteTexture;
+ functions->IsTextureResident = driIsTextureResident;
+
+ functions->TexParameter = evergreenTexParameter;
+
+ functions->CompressedTexImage2D = radeonCompressedTexImage2D;
+ functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D;
+
+ if (radeon->radeonScreen->kernel_mm) {
+ functions->CopyTexImage2D = radeonCopyTexImage2D;
+ functions->CopyTexSubImage2D = radeonCopyTexSubImage2D;
+ }
+
+ functions->GenerateMipmap = radeonGenerateMipmap;
+
+ driInitTextureFormats();
+}
diff --git a/src/mesa/drivers/dri/r600/evergreen_tex.h b/src/mesa/drivers/dri/r600/evergreen_tex.h
new file mode 100644
index 0000000000..b43508a9ea
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/evergreen_tex.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2008-2010 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+#ifndef _EVERGREEN_TEX_H_
+#define _EVERGREEN_TEX_H_
+
+extern GLboolean evergreenValidateBuffers(GLcontext * ctx);
+
+extern void evergreenUpdateTextureState(GLcontext * ctx);
+extern void evergreenInitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *functions);
+extern void evergreenSetTexOffset(__DRIcontext * pDRICtx, GLint texname,
+ unsigned long long offset, GLint depth, GLuint pitch);
+extern void evergreenSetTexBuffer(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_format, __DRIdrawable *dPriv);
+
+#endif /* _EVERGREEN_TEX_H_ */
diff --git a/src/mesa/drivers/dri/r600/evergreen_vertprog.c b/src/mesa/drivers/dri/r600/evergreen_vertprog.c
new file mode 100644
index 0000000000..4f3db00c7d
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/evergreen_vertprog.c
@@ -0,0 +1,736 @@
+/*
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#include "main/imports.h"
+#include "main/mtypes.h"
+
+#include "tnl/t_context.h"
+#include "program/program.h"
+#include "program/prog_parameter.h"
+#include "program/prog_statevars.h"
+
+#include "radeon_debug.h"
+#include "r600_context.h"
+#include "r600_cmdbuf.h"
+#include "r600_emit.h"
+#include "program/programopt.h"
+
+#include "evergreen_vertprog.h"
+
+unsigned int evergreen_Map_Vertex_Output(r700_AssemblerBase *pAsm,
+ struct gl_vertex_program *mesa_vp,
+ unsigned int unStart)
+{
+ unsigned int i;
+ unsigned int unBit;
+ unsigned int unTotal = unStart;
+
+ //!!!!!!! THE ORDER MATCH FS INPUT
+
+ unBit = 1 << VERT_RESULT_HPOS;
+ if(mesa_vp->Base.OutputsWritten & unBit)
+ {
+ pAsm->ucVP_OutputMap[VERT_RESULT_HPOS] = unTotal++;
+ }
+
+ unBit = 1 << VERT_RESULT_COL0;
+ if(mesa_vp->Base.OutputsWritten & unBit)
+ {
+ pAsm->ucVP_OutputMap[VERT_RESULT_COL0] = unTotal++;
+ }
+
+ unBit = 1 << VERT_RESULT_COL1;
+ if(mesa_vp->Base.OutputsWritten & unBit)
+ {
+ pAsm->ucVP_OutputMap[VERT_RESULT_COL1] = unTotal++;
+ }
+
+ //TODO : dealing back face.
+ unBit = 1 << VERT_RESULT_BFC0;
+ if(mesa_vp->Base.OutputsWritten & unBit)
+ {
+ pAsm->ucVP_OutputMap[VERT_RESULT_BFC0] = unTotal++;
+ }
+
+ unBit = 1 << VERT_RESULT_BFC1;
+ if(mesa_vp->Base.OutputsWritten & unBit)
+ {
+ pAsm->ucVP_OutputMap[VERT_RESULT_BFC1] = unTotal++;
+ }
+
+ //TODO : dealing fog.
+ unBit = 1 << VERT_RESULT_FOGC;
+ if(mesa_vp->Base.OutputsWritten & unBit)
+ {
+ pAsm->ucVP_OutputMap[VERT_RESULT_FOGC] = unTotal++;
+ }
+
+ //TODO : dealing point size.
+ unBit = 1 << VERT_RESULT_PSIZ;
+ if(mesa_vp->Base.OutputsWritten & unBit)
+ {
+ pAsm->ucVP_OutputMap[VERT_RESULT_PSIZ] = unTotal++;
+ }
+
+ for(i=0; i<8; i++)
+ {
+ unBit = 1 << (VERT_RESULT_TEX0 + i);
+ if(mesa_vp->Base.OutputsWritten & unBit)
+ {
+ pAsm->ucVP_OutputMap[VERT_RESULT_TEX0 + i] = unTotal++;
+ }
+ }
+
+ for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
+ {
+ unBit = 1 << i;
+ if(mesa_vp->Base.OutputsWritten & unBit)
+ {
+ pAsm->ucVP_OutputMap[i] = unTotal++;
+ }
+ }
+
+ return (unTotal - unStart);
+}
+
+unsigned int evergreen_Map_Vertex_Input(r700_AssemblerBase *pAsm,
+ struct gl_vertex_program *mesa_vp,
+ unsigned int unStart)
+{
+ int i;
+ unsigned int unBit;
+ unsigned int unTotal = unStart;
+ for(i=0; i<VERT_ATTRIB_MAX; i++)
+ {
+ unBit = 1 << i;
+ if(mesa_vp->Base.InputsRead & unBit)
+ {
+ pAsm->ucVP_AttributeMap[i] = unTotal++;
+ }
+ }
+ return (unTotal - unStart);
+}
+
+GLboolean evergreen_Process_Vertex_Program_Vfetch_Instructions(
+ struct evergreen_vertex_program *vp,
+ struct gl_vertex_program *mesa_vp)
+{
+ int i;
+ unsigned int unBit;
+ VTX_FETCH_METHOD vtxFetchMethod;
+ vtxFetchMethod.bEnableMini = GL_FALSE;
+ vtxFetchMethod.mega_fetch_remainder = 0;
+
+ for(i=0; i<VERT_ATTRIB_MAX; i++)
+ {
+ unBit = 1 << i;
+ if(mesa_vp->Base.InputsRead & unBit)
+ {
+ assemble_vfetch_instruction(&vp->r700AsmCode,
+ i,
+ vp->r700AsmCode.ucVP_AttributeMap[i],
+ vp->aos_desc[i].size,
+ vp->aos_desc[i].type,
+ &vtxFetchMethod);
+ }
+ }
+
+ return GL_TRUE;
+}
+
+GLboolean evergreen_Process_Vertex_Program_Vfetch_Instructions2(
+ GLcontext *ctx,
+ struct evergreen_vertex_program *vp,
+ struct gl_vertex_program *mesa_vp)
+{
+ int i;
+ context_t *context = R700_CONTEXT(ctx);
+
+ VTX_FETCH_METHOD vtxFetchMethod;
+ vtxFetchMethod.bEnableMini = GL_FALSE;
+ vtxFetchMethod.mega_fetch_remainder = 0;
+
+ for(i=0; i<context->nNumActiveAos; i++)
+ {
+ EG_assemble_vfetch_instruction(&vp->r700AsmCode,
+ vp->r700AsmCode.ucVP_AttributeMap[context->stream_desc[i].element],
+ context->stream_desc[i].type,
+ context->stream_desc[i].size,
+ context->stream_desc[i].element,
+ context->stream_desc[i]._signed,
+ context->stream_desc[i].normalize,
+ context->stream_desc[i].format,
+ &vtxFetchMethod);
+ }
+
+ return GL_TRUE;
+}
+
+void evergreen_Map_Vertex_Program(GLcontext *ctx,
+ struct evergreen_vertex_program *vp,
+ struct gl_vertex_program *mesa_vp)
+{
+ GLuint ui;
+ r700_AssemblerBase *pAsm = &(vp->r700AsmCode);
+ unsigned int num_inputs;
+
+ // R0 will always be used for index into vertex buffer
+ pAsm->number_used_registers = 1;
+ pAsm->starting_vfetch_register_number = pAsm->number_used_registers;
+
+ // Map Inputs: Add 1 to mapping since R0 is used for index
+ num_inputs = evergreen_Map_Vertex_Input(pAsm, mesa_vp, pAsm->number_used_registers);
+ pAsm->number_used_registers += num_inputs;
+
+ // Create VFETCH instructions for inputs
+ if (GL_TRUE != evergreen_Process_Vertex_Program_Vfetch_Instructions2(ctx, vp, mesa_vp) )
+ {
+ radeon_error("Calling evergreen_Process_Vertex_Program_Vfetch_Instructions2 return error. \n");
+ return;
+ }
+
+ // Map Outputs
+ pAsm->number_of_exports = evergreen_Map_Vertex_Output(pAsm, mesa_vp, pAsm->number_used_registers);
+
+ pAsm->starting_export_register_number = pAsm->number_used_registers;
+
+ pAsm->number_used_registers += pAsm->number_of_exports;
+
+ pAsm->pucOutMask = (unsigned char*) MALLOC(pAsm->number_of_exports);
+
+ for(ui=0; ui<pAsm->number_of_exports; ui++)
+ {
+ pAsm->pucOutMask[ui] = 0x0;
+ }
+
+ /* Map temporary registers (GPRs) */
+ pAsm->starting_temp_register_number = pAsm->number_used_registers;
+
+ if(mesa_vp->Base.NumNativeTemporaries >= mesa_vp->Base.NumTemporaries)
+ { /* arb uses NumNativeTemporaries */
+ pAsm->number_used_registers += mesa_vp->Base.NumNativeTemporaries;
+ }
+ else
+ { /* fix func t_vp uses NumTemporaries */
+ pAsm->number_used_registers += mesa_vp->Base.NumTemporaries;
+ }
+
+ pAsm->flag_reg_index = pAsm->number_used_registers++;
+
+ pAsm->uFirstHelpReg = pAsm->number_used_registers;
+}
+
+GLboolean evergreen_Find_Instruction_Dependencies_vp(struct evergreen_vertex_program *vp,
+ struct gl_vertex_program *mesa_vp)
+{
+ GLuint i, j;
+ GLint * puiTEMPwrites;
+ struct prog_instruction *pILInst;
+ InstDeps *pInstDeps;
+
+ puiTEMPwrites = (GLint*) MALLOC(sizeof(GLuint)*mesa_vp->Base.NumTemporaries);
+ for(i=0; i<mesa_vp->Base.NumTemporaries; i++)
+ {
+ puiTEMPwrites[i] = -1;
+ }
+
+ pInstDeps = (InstDeps*)MALLOC(sizeof(InstDeps)*mesa_vp->Base.NumInstructions);
+
+ for(i=0; i<mesa_vp->Base.NumInstructions; i++)
+ {
+ pInstDeps[i].nDstDep = -1;
+ pILInst = &(mesa_vp->Base.Instructions[i]);
+
+ //Dst
+ if(pILInst->DstReg.File == PROGRAM_TEMPORARY)
+ {
+ //Set lastwrite for the temp
+ puiTEMPwrites[pILInst->DstReg.Index] = i;
+ }
+
+ //Src
+ for(j=0; j<3; j++)
+ {
+ if(pILInst->SrcReg[j].File == PROGRAM_TEMPORARY)
+ {
+ //Set dep.
+ pInstDeps[i].nSrcDeps[j] = puiTEMPwrites[pILInst->SrcReg[j].Index];
+ }
+ else
+ {
+ pInstDeps[i].nSrcDeps[j] = -1;
+ }
+ }
+ }
+
+ vp->r700AsmCode.pInstDeps = pInstDeps;
+
+ FREE(puiTEMPwrites);
+
+ return GL_TRUE;
+}
+
+struct evergreen_vertex_program* evergreenTranslateVertexShader(GLcontext *ctx,
+ struct gl_vertex_program *mesa_vp)
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+
+ struct evergreen_vertex_program *vp;
+ unsigned int i;
+
+ vp = calloc(1, sizeof(*vp));
+ vp->mesa_program = _mesa_clone_vertex_program(ctx, mesa_vp);
+
+ vp->constbo0 = NULL;
+
+ if (mesa_vp->IsPositionInvariant)
+ {
+ _mesa_insert_mvp_code(ctx, vp->mesa_program);
+ }
+
+ for(i=0; i<context->nNumActiveAos; i++)
+ {
+ vp->aos_desc[i].size = context->stream_desc[i].size;
+ vp->aos_desc[i].stride = context->stream_desc[i].stride;
+ vp->aos_desc[i].type = context->stream_desc[i].type;
+ vp->aos_desc[i].format = context->stream_desc[i].format;
+ }
+
+ if (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)
+ {
+ vp->r700AsmCode.bR6xx = 1;
+ }
+
+ //Init_Program
+ Init_r700_AssemblerBase(SPT_VP, &(vp->r700AsmCode), &(vp->r700Shader) );
+
+ vp->r700AsmCode.bUseMemConstant = GL_TRUE;
+ vp->r700AsmCode.unAsic = 8;
+
+ evergreen_Map_Vertex_Program(ctx, vp, vp->mesa_program );
+
+ if(GL_FALSE == evergreen_Find_Instruction_Dependencies_vp(vp, vp->mesa_program))
+ {
+ return NULL;
+ }
+
+ InitShaderProgram(&(vp->r700AsmCode));
+
+ for(i=0; i < MAX_SAMPLERS; i++)
+ {
+ vp->r700AsmCode.SamplerUnits[i] = vp->mesa_program->Base.SamplerUnits[i];
+ }
+
+ vp->r700AsmCode.unCurNumILInsts = vp->mesa_program->Base.NumInstructions;
+
+ if(GL_FALSE == AssembleInstr(0,
+ 0,
+ vp->mesa_program->Base.NumInstructions,
+ &(vp->mesa_program->Base.Instructions[0]),
+ &(vp->r700AsmCode)) )
+ {
+ return NULL;
+ }
+
+ if(GL_FALSE == Process_Vertex_Exports(&(vp->r700AsmCode), vp->mesa_program->Base.OutputsWritten) )
+ {
+ return NULL;
+ }
+
+ if( GL_FALSE == RelocProgram(&(vp->r700AsmCode), &(vp->mesa_program->Base)) )
+ {
+ return GL_FALSE;
+ }
+
+ vp->r700Shader.nRegs = (vp->r700AsmCode.number_used_registers == 0) ? 0
+ : (vp->r700AsmCode.number_used_registers - 1);
+
+ vp->r700Shader.nParamExports = vp->r700AsmCode.number_of_exports;
+
+ vp->translated = GL_TRUE;
+
+ return vp;
+}
+
+void evergreenSelectVertexShader(GLcontext *ctx)
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ struct evergreen_vertex_program_cont *vpc;
+ struct evergreen_vertex_program *vp;
+ unsigned int i;
+ GLboolean match;
+ GLbitfield InputsRead;
+
+ vpc = (struct evergreen_vertex_program_cont *)ctx->VertexProgram._Current;
+
+ InputsRead = vpc->mesa_program.Base.InputsRead;
+ if (vpc->mesa_program.IsPositionInvariant)
+ {
+ InputsRead |= VERT_BIT_POS;
+ }
+
+ for (vp = vpc->progs; vp; vp = vp->next)
+ {
+ match = GL_TRUE;
+ for(i=0; i<context->nNumActiveAos; i++)
+ {
+ if (vp->aos_desc[i].size != context->stream_desc[i].size ||
+ vp->aos_desc[i].format != context->stream_desc[i].format)
+ {
+ match = GL_FALSE;
+ break;
+ }
+ }
+ if (match)
+ {
+ context->selected_vp = vp;
+ return;
+ }
+ }
+
+ vp = evergreenTranslateVertexShader(ctx, &(vpc->mesa_program));
+ if(!vp)
+ {
+ radeon_error("Failed to translate vertex shader. \n");
+ return;
+ }
+ vp->next = vpc->progs;
+ vpc->progs = vp;
+ context->selected_vp = vp;
+ return;
+}
+
+int evergreen_getTypeSize(GLenum type)
+{
+ switch (type)
+ {
+ case GL_DOUBLE:
+ return sizeof(GLdouble);
+ case GL_FLOAT:
+ return sizeof(GLfloat);
+ case GL_INT:
+ return sizeof(GLint);
+ case GL_UNSIGNED_INT:
+ return sizeof(GLuint);
+ case GL_SHORT:
+ return sizeof(GLshort);
+ case GL_UNSIGNED_SHORT:
+ return sizeof(GLushort);
+ case GL_BYTE:
+ return sizeof(GLbyte);
+ case GL_UNSIGNED_BYTE:
+ return sizeof(GLubyte);
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+static void evergreenTranslateAttrib(GLcontext *ctx, GLuint unLoc, int count, const struct gl_client_array *input)
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+
+ StreamDesc * pStreamDesc = &(context->stream_desc[context->nNumActiveAos]);
+
+ GLuint stride;
+
+ stride = (input->StrideB == 0) ? evergreen_getTypeSize(input->Type) * input->Size
+ : input->StrideB;
+
+ if (input->Type == GL_DOUBLE || input->Type == GL_UNSIGNED_INT || input->Type == GL_INT ||
+#if MESA_BIG_ENDIAN
+ evergreen_getTypeSize(input->Type) != 4 ||
+#endif
+ stride < 4)
+ {
+ pStreamDesc->type = GL_FLOAT;
+
+ if (input->StrideB == 0)
+ {
+ pStreamDesc->stride = 0;
+ }
+ else
+ {
+ pStreamDesc->stride = sizeof(GLfloat) * input->Size;
+ }
+ pStreamDesc->dwords = input->Size;
+ pStreamDesc->is_named_bo = GL_FALSE;
+ }
+ else
+ {
+ pStreamDesc->type = input->Type;
+ pStreamDesc->dwords = (evergreen_getTypeSize(input->Type) * input->Size + 3)/ 4;
+ if (!input->BufferObj->Name)
+ {
+ if (input->StrideB == 0)
+ {
+ pStreamDesc->stride = 0;
+ }
+ else
+ {
+ pStreamDesc->stride = (evergreen_getTypeSize(pStreamDesc->type) * input->Size + 3) & ~3;
+ }
+
+ pStreamDesc->is_named_bo = GL_FALSE;
+ }
+ }
+
+ pStreamDesc->size = input->Size;
+ pStreamDesc->dst_loc = context->nNumActiveAos;
+ pStreamDesc->element = unLoc;
+ pStreamDesc->format = input->Format;
+
+ switch (pStreamDesc->type)
+ { //GetSurfaceFormat
+ case GL_FLOAT:
+ pStreamDesc->_signed = 0;
+ pStreamDesc->normalize = GL_FALSE;
+ break;
+ case GL_SHORT:
+ pStreamDesc->_signed = 1;
+ pStreamDesc->normalize = input->Normalized;
+ break;
+ case GL_BYTE:
+ pStreamDesc->_signed = 1;
+ pStreamDesc->normalize = input->Normalized;
+ break;
+ case GL_UNSIGNED_SHORT:
+ pStreamDesc->_signed = 0;
+ pStreamDesc->normalize = input->Normalized;
+ break;
+ case GL_UNSIGNED_BYTE:
+ pStreamDesc->_signed = 0;
+ pStreamDesc->normalize = input->Normalized;
+ break;
+ default:
+ case GL_INT:
+ case GL_UNSIGNED_INT:
+ case GL_DOUBLE:
+ assert(0);
+ break;
+ }
+ context->nNumActiveAos++;
+}
+
+void evergreenSetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count)
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ struct evergreen_vertex_program *vpc
+ = (struct evergreen_vertex_program *)ctx->VertexProgram._Current;
+
+ struct gl_vertex_program * mesa_vp = (struct gl_vertex_program *)&(vpc->mesa_program);
+ unsigned int unLoc = 0;
+ unsigned int unBit = mesa_vp->Base.InputsRead;
+ context->nNumActiveAos = 0;
+
+ if (mesa_vp->IsPositionInvariant)
+ {
+ unBit |= VERT_BIT_POS;
+ }
+
+ while(unBit)
+ {
+ if(unBit & 1)
+ {
+ evergreenTranslateAttrib(ctx, unLoc, count, arrays[unLoc]);
+ }
+
+ unBit >>= 1;
+ ++unLoc;
+ }
+ context->radeon.tcl.aos_count = context->nNumActiveAos;
+}
+
+void * evergreenGetActiveVpShaderBo(GLcontext * ctx)
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ struct evergreen_vertex_program *vp = context->selected_vp;;
+
+ if (vp)
+ return vp->shaderbo;
+ else
+ return NULL;
+}
+
+void * evergreenGetActiveVpShaderConstBo(GLcontext * ctx)
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ struct evergreen_vertex_program *vp = context->selected_vp;;
+
+ if (vp)
+ return vp->constbo0;
+ else
+ return NULL;
+}
+
+GLboolean evergreenSetupVertexProgram(GLcontext * ctx)
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+ struct evergreen_vertex_program *vp = context->selected_vp;
+
+ if(GL_FALSE == vp->loaded)
+ {
+ if(vp->r700Shader.bNeedsAssembly == GL_TRUE)
+ {
+ Assemble( &(vp->r700Shader) );
+ }
+
+ /* Load vp to gpu */
+ r600EmitShader(ctx,
+ &(vp->shaderbo),
+ (GLvoid *)(vp->r700Shader.pProgram),
+ vp->r700Shader.uShaderBinaryDWORDSize,
+ "VS");
+
+ vp->loaded = GL_TRUE;
+ }
+
+ EVERGREEN_STATECHANGE(context, vs);
+
+ /* TODO : enable this after MemUse fixed *=
+ (context->chipobj.MemUse)(context, vp->shadercode.buf->id);
+ */
+
+ evergreen->SQ_PGM_RESOURCES_VS.u32All = 0;
+ SETbit(evergreen->SQ_PGM_RESOURCES_VS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit);
+
+ evergreen->vs.SQ_ALU_CONST_CACHE_VS_0.u32All = 0; /* set from buffer object. */
+
+ evergreen->vs.SQ_PGM_START_VS.u32All = 0;
+
+ SETfield(evergreen->SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.nRegs + 1,
+ NUM_GPRS_shift, NUM_GPRS_mask);
+
+ if(vp->r700Shader.uStackSize) /* we don't use branch for now, it should be zero. */
+ {
+ SETfield(evergreen->SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.uStackSize,
+ STACK_SIZE_shift, STACK_SIZE_mask);
+ }
+
+ EVERGREEN_STATECHANGE(context, spi);
+
+ SETfield(evergreen->SPI_VS_OUT_CONFIG.u32All,
+ vp->r700Shader.nParamExports ? (vp->r700Shader.nParamExports - 1) : 0,
+ VS_EXPORT_COUNT_shift, VS_EXPORT_COUNT_mask);
+ SETfield(evergreen->SPI_PS_IN_CONTROL_0.u32All, vp->r700Shader.nParamExports,
+ NUM_INTERP_shift, NUM_INTERP_mask);
+
+ /*
+ SETbit(evergreen->SPI_PS_IN_CONTROL_0.u32All, PERSP_GRADIENT_ENA_bit);
+ CLEARbit(evergreen->SPI_PS_IN_CONTROL_0.u32All, LINEAR_GRADIENT_ENA_bit);
+ */
+
+ return GL_TRUE;
+}
+
+GLboolean evergreenSetupVPconstants(GLcontext * ctx)
+{
+ context_t *context = EVERGREEN_CONTEXT(ctx);
+ EVERGREEN_CHIP_CONTEXT *evergreen = GET_EVERGREEN_CHIP(context);
+ struct evergreen_vertex_program *vp = context->selected_vp;
+
+ struct gl_program_parameter_list *paramList;
+ unsigned int unNumParamData;
+ unsigned int ui;
+
+ /* sent out shader constants. */
+ paramList = vp->mesa_program->Base.Parameters;
+
+ if(NULL != paramList) {
+ /* vp->mesa_program was cloned, not updated by glsl shader api. */
+ /* _mesa_reference_program has already checked glsl shProg is ok and set ctx->VertexProgem._Current */
+ /* so, use ctx->VertexProgem._Current */
+ struct gl_program_parameter_list *paramListOrginal =
+ ctx->VertexProgram._Current->Base.Parameters;
+
+ _mesa_load_state_parameters(ctx, paramList);
+
+ if (paramList->NumParameters > EVERGREEN_MAX_DX9_CONSTS)
+ return GL_FALSE;
+
+ EVERGREEN_STATECHANGE(context, vs);
+
+ evergreen->vs.num_consts = paramList->NumParameters;
+
+ unNumParamData = paramList->NumParameters;
+
+ for(ui=0; ui<unNumParamData; ui++) {
+ if(paramList->Parameters[ui].Type == PROGRAM_UNIFORM)
+ {
+ evergreen->vs.consts[ui][0].f32All = paramListOrginal->ParameterValues[ui][0];
+ evergreen->vs.consts[ui][1].f32All = paramListOrginal->ParameterValues[ui][1];
+ evergreen->vs.consts[ui][2].f32All = paramListOrginal->ParameterValues[ui][2];
+ evergreen->vs.consts[ui][3].f32All = paramListOrginal->ParameterValues[ui][3];
+ }
+ else
+ {
+ evergreen->vs.consts[ui][0].f32All = paramList->ParameterValues[ui][0];
+ evergreen->vs.consts[ui][1].f32All = paramList->ParameterValues[ui][1];
+ evergreen->vs.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
+ evergreen->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
+ }
+ }
+
+ radeonAllocDmaRegion(&context->radeon,
+ &context->vp_Constbo,
+ &context->vp_bo_offset,
+ 256,
+ 256);
+ r600EmitShaderConsts(ctx,
+ context->vp_Constbo,
+ context->vp_bo_offset,
+ (GLvoid *)&(evergreen->vs.consts[0][0]),
+ unNumParamData * 4 * 4);
+ } else
+ evergreen->vs.num_consts = 0;
+
+ COMPILED_SUB * pCompiledSub;
+ GLuint uj;
+ GLuint unConstOffset = evergreen->vs.num_consts;
+ for(ui=0; ui<vp->r700AsmCode.unNumPresub; ui++)
+ {
+ pCompiledSub = vp->r700AsmCode.presubs[ui].pCompiledSub;
+
+ evergreen->vs.num_consts += pCompiledSub->NumParameters;
+
+ for(uj=0; uj<pCompiledSub->NumParameters; uj++)
+ {
+ evergreen->vs.consts[uj + unConstOffset][0].f32All = pCompiledSub->ParameterValues[uj][0];
+ evergreen->vs.consts[uj + unConstOffset][1].f32All = pCompiledSub->ParameterValues[uj][1];
+ evergreen->vs.consts[uj + unConstOffset][2].f32All = pCompiledSub->ParameterValues[uj][2];
+ evergreen->vs.consts[uj + unConstOffset][3].f32All = pCompiledSub->ParameterValues[uj][3];
+ }
+ unConstOffset += pCompiledSub->NumParameters;
+ }
+} \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r600/evergreen_vertprog.h b/src/mesa/drivers/dri/r600/evergreen_vertprog.h
new file mode 100644
index 0000000000..5853902115
--- /dev/null
+++ b/src/mesa/drivers/dri/r600/evergreen_vertprog.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Authors:
+ * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
+ */
+
+
+#ifndef _EVERGREEN_VERTPROG_H_
+#define _EVERGREEN_VERTPROG_H_
+
+#include "main/glheader.h"
+#include "main/mtypes.h"
+
+#include "r700_shader.h"
+#include "r700_assembler.h"
+
+typedef struct evergreenArrayDesc //TEMP
+{
+ GLint size; //number of data element
+ GLenum type; //data element type
+ GLsizei stride;
+ GLenum format; //GL_RGBA or GL_BGRA
+} evergreenArrayDesc;
+
+struct evergreen_vertex_program
+{
+ struct gl_vertex_program *mesa_program; /* Must be first */
+
+ struct evergreen_vertex_program *next;
+
+ r700_AssemblerBase r700AsmCode;
+ R700_Shader r700Shader;
+
+ GLboolean translated;
+ GLboolean loaded;
+
+ void * shaderbo;
+
+ GLuint K0used;
+ void * constbo0;
+
+ evergreenArrayDesc aos_desc[VERT_ATTRIB_MAX];
+};
+
+struct evergreen_vertex_program_cont
+{
+ struct gl_vertex_program mesa_program;
+
+ struct evergreen_vertex_program *progs;
+};
+
+//Internal
+unsigned int evergreen_Map_Vertex_Output(r700_AssemblerBase *pAsm,
+ struct gl_vertex_program *mesa_vp,
+ unsigned int unStart);
+unsigned int evergreen_Map_Vertex_Input(r700_AssemblerBase *pAsm,
+ struct gl_vertex_program *mesa_vp,
+ unsigned int unStart);
+GLboolean evergreen_Process_Vertex_Program_Vfetch_Instructions(
+ struct evergreen_vertex_program *vp,
+ struct gl_vertex_program *mesa_vp);
+GLboolean evergreen_Process_Vertex_Program_Vfetch_Instructions2(
+ GLcontext *ctx,
+ struct evergreen_vertex_program *vp,
+ struct gl_vertex_program *mesa_vp);
+void evergreen_Map_Vertex_Program(GLcontext *ctx,
+ struct evergreen_vertex_program *vp,
+ struct gl_vertex_program *mesa_vp);
+GLboolean evergreen_Find_Instruction_Dependencies_vp(struct evergreen_vertex_program *vp,
+ struct gl_vertex_program *mesa_vp);
+
+struct evergreen_vertex_program* evergreenTranslateVertexShader(GLcontext *ctx,
+ struct gl_vertex_program *mesa_vp);
+
+/* Interface */
+extern void evergreenSelectVertexShader(GLcontext *ctx);
+extern void evergreenSetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count);
+
+extern GLboolean evergreenSetupVertexProgram(GLcontext * ctx);
+
+extern GLboolean evergreenSetupVPconstants(GLcontext * ctx);
+
+extern void * evergreenGetActiveVpShaderBo(GLcontext * ctx);
+
+extern void * evergreenGetActiveVpShaderConstBo(GLcontext * ctx);
+
+extern int evergreen_getTypeSize(GLenum type);
+
+#endif /* _EVERGREEN_VERTPROG_H_ */
diff --git a/src/mesa/drivers/dri/r600/r600_blit.c b/src/mesa/drivers/dri/r600/r600_blit.c
index 27acff9c16..ef47ae1c05 100644
--- a/src/mesa/drivers/dri/r600/r600_blit.c
+++ b/src/mesa/drivers/dri/r600/r600_blit.c
@@ -1454,7 +1454,7 @@ set_default_state(context_t *context)
SETbit(sq_dyn_gpr_cntl_ps_flush_req, VS_PC_LIMIT_ENABLE_bit);
}
- BEGIN_BATCH_NO_AUTOSTATE(117);
+ BEGIN_BATCH_NO_AUTOSTATE(120);
R600_OUT_BATCH_REGSEQ(SQ_CONFIG, 6);
R600_OUT_BATCH(sq_config);
R600_OUT_BATCH(sq_gpr_resource_mgmt_1);
@@ -1499,9 +1499,10 @@ set_default_state(context_t *context)
R600_OUT_BATCH_REGVAL(PA_SU_VTX_CNTL, (PIX_CENTER_bit) |
(X_ROUND_TO_EVEN << PA_SU_VTX_CNTL__ROUND_MODE_shift) |
(X_1_256TH << QUANT_MODE_shift));
+ R600_OUT_BATCH_REGVAL(PA_SC_AA_CONFIG, 0);
R600_OUT_BATCH_REGSEQ(VGT_MAX_VTX_INDX, 4);
- R600_OUT_BATCH(2048);
+ R600_OUT_BATCH(0xffffff);
R600_OUT_BATCH(0);
R600_OUT_BATCH(0);
R600_OUT_BATCH(0);
@@ -1614,7 +1615,7 @@ unsigned r600_blit(GLcontext *ctx,
/* Flush is needed to make sure that source buffer has correct data */
radeonFlush(ctx);
- rcommonEnsureCmdBufSpace(&context->radeon, 308, __FUNCTION__);
+ rcommonEnsureCmdBufSpace(&context->radeon, 311, __FUNCTION__);
/* load shaders */
load_shaders(context->radeon.glCtx);
@@ -1623,7 +1624,7 @@ unsigned r600_blit(GLcontext *ctx,
return GL_FALSE;
/* set clear state */
- /* 117 */
+ /* 120 */
set_default_state(context);
/* shaders */
diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.c b/src/mesa/drivers/dri/r600/r600_cmdbuf.c
index 8013553f67..b3331fc8b8 100644
--- a/src/mesa/drivers/dri/r600/r600_cmdbuf.c
+++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.c
@@ -473,7 +473,14 @@ void r600InitCmdBuf(context_t *r600) /* from rcommonInitCmdBuf */
radeonContextPtr rmesa = &r600->radeon;
GLuint size;
- r600InitAtoms(r600);
+ if(r600->radeon.radeonScreen->chip_family >= CHIP_FAMILY_CEDAR)
+ {
+ evergreenInitAtoms(r600);
+ }
+ else
+ {
+ r600InitAtoms(r600);
+ }
/* Initialize command buffer */
size = 256 * driQueryOptioni(&rmesa->optionCache,
diff --git a/src/mesa/drivers/dri/r600/r600_cmdbuf.h b/src/mesa/drivers/dri/r600/r600_cmdbuf.h
index 78fccd0b60..801bb013f6 100644
--- a/src/mesa/drivers/dri/r600/r600_cmdbuf.h
+++ b/src/mesa/drivers/dri/r600/r600_cmdbuf.h
@@ -190,6 +190,46 @@ do { \
#define R600_OUT_BATCH_REGSEQ(reg, count) \
R600_OUT_BATCH_REGS((reg), (count))
+/* evergreen */
+#define EVERGREEN_OUT_BATCH_REGS(reg, num) \
+do { \
+ if ((reg) >= R600_SET_CONFIG_REG_OFFSET && (reg) < R600_SET_CONFIG_REG_END) { \
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONFIG_REG, (num))); \
+ R600_OUT_BATCH(((reg) - R600_SET_CONFIG_REG_OFFSET) >> 2); \
+ } else if ((reg) >= R600_SET_CONTEXT_REG_OFFSET && (reg) < R600_SET_CONTEXT_REG_END) { \
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CONTEXT_REG, (num))); \
+ R600_OUT_BATCH(((reg) - R600_SET_CONTEXT_REG_OFFSET) >> 2); \
+ } else if ((reg) >= EG_SET_RESOURCE_OFFSET && (reg) < EG_SET_RESOURCE_END) { \
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_RESOURCE, (num))); \
+ R600_OUT_BATCH(((reg) - EG_SET_RESOURCE_OFFSET) >> 2); \
+ } else if ((reg) >= EG_SET_LOOP_CONST_OFFSET && (reg) < EG_SET_LOOP_CONST_END) { \
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_LOOP_CONST, (num))); \
+ R600_OUT_BATCH(((reg) - EG_SET_LOOP_CONST_OFFSET) >> 2); \
+ } else if ((reg) >= R600_SET_SAMPLER_OFFSET && (reg) < R600_SET_SAMPLER_END) { \
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_SAMPLER, (num))); \
+ R600_OUT_BATCH(((reg) - R600_SET_SAMPLER_OFFSET) >> 2); \
+ } else if ((reg) >= R600_SET_CTL_CONST_OFFSET && (reg) < R600_SET_CTL_CONST_END) { \
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_CTL_CONST, (num))); \
+ R600_OUT_BATCH(((reg) - R600_SET_CTL_CONST_OFFSET) >> 2); \
+ } else if ((reg) >= EG_SET_BOOL_CONST_OFFSET && (reg) < EG_SET_BOOL_CONST_END) { \
+ R600_OUT_BATCH(CP_PACKET3(R600_IT_SET_BOOL_CONST, (num))); \
+ R600_OUT_BATCH(((reg) - EG_SET_BOOL_CONST_OFFSET) >> 2); \
+ } else { \
+ R600_OUT_BATCH(CP_PACKET0((reg), (num))); \
+ } \
+} while (0)
+
+/** Single register write to command buffer; requires 3 dwords for most things. */
+#define EVERGREEN_OUT_BATCH_REGVAL(reg, val) \
+ EVERGREEN_OUT_BATCH_REGS((reg), 1); \
+ R600_OUT_BATCH((val))
+
+/** Continuous register range write to command buffer; requires 1 dword,
+ * expects count dwords afterwards for register contents. */
+#define EVERGREEN_OUT_BATCH_REGSEQ(reg, count) \
+ EVERGREEN_OUT_BATCH_REGS((reg), (count))
+
+
extern void r600InitCmdBuf(context_t *r600);
#endif /* __R600_CMDBUF_H__ */
diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c
index 389b0412ba..bb959e7d2d 100644
--- a/src/mesa/drivers/dri/r600/r600_context.c
+++ b/src/mesa/drivers/dri/r600/r600_context.c
@@ -66,6 +66,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r700_state.h"
#include "r700_ioctl.h"
+#include "evergreen_context.h"
+#include "evergreen_state.h"
+#include "evergreen_tex.h"
+#include "evergreen_ioctl.h"
+#include "evergreen_oglprog.h"
#include "utils.h"
@@ -247,6 +252,19 @@ static void r600_init_vtbl(radeonContextPtr radeon)
static void r600InitConstValues(GLcontext *ctx, radeonScreenPtr screen)
{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+
+ if( (context->radeon.radeonScreen->chip_family >= CHIP_FAMILY_CEDAR)
+ &&(context->radeon.radeonScreen->chip_family <= CHIP_FAMILY_HEMLOCK) )
+ {
+ r700->bShaderUseMemConstant = GL_TRUE;
+ }
+ else
+ {
+ r700->bShaderUseMemConstant = GL_FALSE;
+ }
+
ctx->Const.MaxTextureImageUnits = 16;
/* 8 per clause on r6xx, 16 on r7xx
* but I think mesa only supports 8 at the moment
@@ -381,18 +399,45 @@ GLboolean r600CreateContext(gl_api api,
r600ParseOptions(r600, screen);
r600->radeon.radeonScreen = screen;
- r600_init_vtbl(&r600->radeon);
+ if(screen->chip_family >= CHIP_FAMILY_CEDAR)
+ {
+ evergreen_init_vtbl(&r600->radeon);
+ }
+ else
+ {
+ r600_init_vtbl(&r600->radeon);
+ }
+
/* Init default driver functions then plug in our R600-specific functions
* (the texture functions are especially important)
*/
_mesa_init_driver_functions(&functions);
- r700InitStateFuncs(&r600->radeon, &functions);
- r600InitTextureFuncs(&r600->radeon, &functions);
- r700InitShaderFuncs(&functions);
+ if(screen->chip_family >= CHIP_FAMILY_CEDAR)
+ {
+ evergreenCreateChip(r600);
+ evergreenInitStateFuncs(&r600->radeon, &functions);
+ evergreenInitTextureFuncs(&r600->radeon, &functions);
+ evergreenInitShaderFuncs(&functions);
+ }
+ else
+ {
+ r700InitStateFuncs(&r600->radeon, &functions);
+ r600InitTextureFuncs(&r600->radeon, &functions);
+ r700InitShaderFuncs(&functions);
+ }
+
radeonInitQueryObjFunctions(&functions);
- r700InitIoctlFuncs(&functions);
+
+ if(screen->chip_family >= CHIP_FAMILY_CEDAR)
+ {
+ evergreenInitIoctlFuncs(&functions);
+ }
+ else
+ {
+ r700InitIoctlFuncs(&functions);
+ }
radeonInitBufferObjectFuncs(&functions);
if (!radeonInitContext(&r600->radeon, &functions,
@@ -435,16 +480,46 @@ GLboolean r600CreateContext(gl_api api,
radeon_init_debug();
- r700InitDraw(ctx);
+ if(screen->chip_family >= CHIP_FAMILY_CEDAR)
+ {
+ evergreenInitDraw(ctx);
+ }
+ else
+ {
+ r700InitDraw(ctx);
+ }
radeon_fbo_init(&r600->radeon);
radeonInitSpanFuncs( ctx );
r600InitCmdBuf(r600);
- r700InitState(r600->radeon.glCtx);
+
+ if(screen->chip_family >= CHIP_FAMILY_CEDAR)
+ {
+ evergreenInitState(r600->radeon.glCtx);
+ }
+ else
+ {
+ r700InitState(r600->radeon.glCtx);
+ }
r600InitGLExtensions(ctx);
return GL_TRUE;
}
+void r600DestroyContext(__DRIcontext *driContextPriv )
+{
+ void *pChip;
+ context_t *context = (context_t *) driContextPriv->driverPrivate;
+
+ assert(context);
+
+ pChip = context->pChip;
+
+ /* destroy context first, free pChip, in case there are things flush to asic. */
+ radeonDestroyContext(driContextPriv);
+
+ FREE(pChip);
+}
+
diff --git a/src/mesa/drivers/dri/r600/r600_context.h b/src/mesa/drivers/dri/r600/r600_context.h
index 063dd7c49a..6a83196648 100644
--- a/src/mesa/drivers/dri/r600/r600_context.h
+++ b/src/mesa/drivers/dri/r600/r600_context.h
@@ -53,6 +53,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r700_oglprog.h"
#include "r700_vertprog.h"
+#include "evergreen_chip.h"
+
struct r600_context;
typedef struct r600_context context_t;
@@ -63,6 +65,10 @@ typedef struct r600_context context_t;
#include "tnl_dd/t_dd_vertex.h"
#undef TAG
+#define FORCE_CF_TEX_BARRIER 1
+
+/* #define GENERATE_SHADER_FOR_2D 1 */
+
#define R600_FALLBACK_NONE 0
#define R600_FALLBACK_TCL 1
#define R600_FALLBACK_RAST 2
@@ -103,6 +109,24 @@ struct r600_hw_state {
struct radeon_state_atom tx_brdr_clr;
};
+struct evergreen_hw_state {
+ struct radeon_state_atom one_time_init;
+ struct radeon_state_atom init;
+ struct radeon_state_atom pa;
+ struct radeon_state_atom vgt;
+ struct radeon_state_atom tp;
+ struct radeon_state_atom sq;
+ struct radeon_state_atom vs;
+ struct radeon_state_atom spi;
+ struct radeon_state_atom sx;
+ struct radeon_state_atom tx;
+ struct radeon_state_atom db;
+ struct radeon_state_atom cb;
+ struct radeon_state_atom vtx;
+ struct radeon_state_atom cp;
+ struct radeon_state_atom timestamp;
+};
+
typedef struct StreamDesc
{
GLint size; //number of data element
@@ -141,6 +165,9 @@ struct r600_context {
struct r600_hw_state atoms;
+ struct evergreen_hw_state evergreen_atoms;
+ void * pChip;
+
struct r700_vertex_program *selected_vp;
/* Vertex buffers
@@ -150,16 +177,29 @@ struct r600_context {
struct r700_index_buffer ind_buf;
struct radeon_bo *blit_bo;
GLboolean blit_bo_loaded;
+
+ /* Shader const buffer */
+ struct radeon_bo * vp_Constbo;
+ int vp_bo_offset;
+ struct radeon_bo * fp_Constbo;
+ int fp_bo_offset;
};
+#define EVERGREEN_CONTEXT(ctx) ((context_t *)(ctx->DriverCtx))
+
#define R700_CONTEXT(ctx) ((context_t *)(ctx->DriverCtx))
#define GL_CONTEXT(context) ((GLcontext *)(context->radeon.glCtx))
+#define GET_EVERGREEN_CHIP(context) ((EVERGREEN_CHIP_CONTEXT*)(context->pChip))
+
extern GLboolean r600CreateContext(gl_api api,
const __GLcontextModes * glVisual,
__DRIcontext * driContextPriv,
void *sharedContextPrivate);
+extern void r600DestroyContext(__DRIcontext *driContextPriv );
+extern void evergreenCreateChip(context_t *context);
+
#define R700_CONTEXT_STATES(context) ((R700_CHIP_CONTEXT *)(&context->hw))
#define R600_NEWPRIM( rmesa ) \
@@ -175,6 +215,13 @@ do { \
r600->radeon.hw.is_dirty = GL_TRUE; \
} while(0)
+#define EVERGREEN_STATECHANGE(r600, ATOM) \
+do { \
+ R600_NEWPRIM(r600); \
+ r600->evergreen_atoms.ATOM.dirty = GL_TRUE; \
+ r600->radeon.hw.is_dirty = GL_TRUE; \
+} while(0)
+
extern GLboolean r700SyncSurf(context_t *context,
struct radeon_bo *pbo,
uint32_t read_domain,
@@ -187,6 +234,9 @@ extern void r700Start3D(context_t *context);
extern void r600InitAtoms(context_t *context);
extern void r700InitDraw(GLcontext *ctx);
+extern void evergreenInitAtoms(context_t *context);
+extern void evergreenInitDraw(GLcontext *ctx);
+
#define RADEON_D_CAPTURE 0
#define RADEON_D_PLAYBACK 1
#define RADEON_D_PLAYBACK_RAW 2
diff --git a/src/mesa/drivers/dri/r600/r600_emit.c b/src/mesa/drivers/dri/r600/r600_emit.c
index 1eb89a5305..a840106c14 100644
--- a/src/mesa/drivers/dri/r600/r600_emit.c
+++ b/src/mesa/drivers/dri/r600/r600_emit.c
@@ -49,6 +49,71 @@ void r600EmitCacheFlush(context_t *rmesa)
{
}
+GLboolean r600AllocShaderConsts(GLcontext * ctx,
+ void ** constbo,
+ int sizeinBYTE,
+ char * szShaderUsage)
+{
+ radeonContextPtr radeonctx = RADEON_CONTEXT(ctx);
+ struct radeon_bo * pbo;
+
+ if(sizeinBYTE < 64) /* SQ_ALU_CONST_BUFFER_SIZE need 64 bytes at least to be non 0 */
+ {
+ sizeinBYTE = 64;
+ }
+
+shader_again_alloc:
+ pbo = radeon_bo_open(radeonctx->radeonScreen->bom,
+ 0,
+ sizeinBYTE,
+ 256,
+ RADEON_GEM_DOMAIN_GTT,
+ 0);
+
+ radeon_print(RADEON_SHADER, RADEON_NORMAL, "%s %p size %d: %s\n", __func__, pbo, sizeinBYTE, szShaderUsage);
+
+ if (!pbo) {
+ radeon_print(RADEON_MEMORY | RADEON_CS, RADEON_IMPORTANT, "No memory for buffer object. Flushing command buffer.\n");
+ rcommonFlushCmdBuf(radeonctx, __FUNCTION__);
+ goto shader_again_alloc;
+ }
+
+ radeon_cs_space_add_persistent_bo(radeonctx->cmdbuf.cs,
+ pbo,
+ RADEON_GEM_DOMAIN_GTT, 0);
+
+ if (radeon_cs_space_check_with_bo(radeonctx->cmdbuf.cs,
+ pbo,
+ RADEON_GEM_DOMAIN_GTT, 0)) {
+ radeon_error("failure to revalidate BOs - badness\n");
+ return GL_FALSE;
+ }
+
+ *constbo = (void*)pbo;
+
+ return GL_TRUE;
+}
+GLboolean r600EmitShaderConsts(GLcontext * ctx,
+ void * constbo,
+ int bo_offset,
+ GLvoid * data,
+ int sizeinBYTE)
+{
+ struct radeon_bo * pbo = (struct radeon_bo *)constbo;
+ uint8_t *out;
+
+ radeon_bo_map(pbo, 1);
+
+ out = (uint8_t*)(pbo->ptr);
+ out = (uint8_t*)ADD_POINTERS(pbo->ptr, bo_offset);
+
+ memcpy(out, data, sizeinBYTE);
+
+ radeon_bo_unmap(pbo);
+
+ return GL_TRUE;
+}
+
GLboolean r600EmitShader(GLcontext * ctx,
void ** shaderbo,
GLvoid * data,
diff --git a/src/mesa/drivers/dri/r600/r600_emit.h b/src/mesa/drivers/dri/r600/r600_emit.h
index 661774d11e..259561539f 100644
--- a/src/mesa/drivers/dri/r600/r600_emit.h
+++ b/src/mesa/drivers/dri/r600/r600_emit.h
@@ -52,4 +52,14 @@ extern GLboolean r600EmitShader(GLcontext * ctx,
extern GLboolean r600DeleteShader(GLcontext * ctx,
void * shaderbo);
+extern GLboolean r600AllocShaderConsts(GLcontext * ctx,
+ void ** constbo,
+ int sizeinBYTE,
+ char * szShaderUsage);
+GLboolean r600EmitShaderConsts(GLcontext * ctx,
+ void * constbo,
+ int bo_offset,
+ GLvoid * data,
+ int sizeinBYTE);
+
#endif
diff --git a/src/mesa/drivers/dri/r600/r600_texstate.c b/src/mesa/drivers/dri/r600/r600_texstate.c
index ba3690b70e..fd928cfe5d 100644
--- a/src/mesa/drivers/dri/r600/r600_texstate.c
+++ b/src/mesa/drivers/dri/r600/r600_texstate.c
@@ -50,6 +50,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include "r700_fragprog.h"
#include "r700_vertprog.h"
+#include "evergreen_tex.h"
+
void r600UpdateTextureState(GLcontext * ctx);
void r600UpdateTextureState(GLcontext * ctx)
@@ -878,6 +880,18 @@ GLboolean r600ValidateBuffers(GLcontext * ctx)
RADEON_GEM_DOMAIN_GTT, 0);
}
+ pbo = (struct radeon_bo *)r700GetActiveFpShaderConstBo(ctx);
+ if (pbo) {
+ radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, pbo,
+ RADEON_GEM_DOMAIN_GTT, 0);
+ }
+
+ pbo = (struct radeon_bo *)r700GetActiveVpShaderConstBo(ctx);
+ if (pbo) {
+ radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs, pbo,
+ RADEON_GEM_DOMAIN_GTT, 0);
+ }
+
ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, first_elem(&rmesa->radeon.dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0);
if (ret)
return GL_FALSE;
@@ -897,6 +911,12 @@ void r600SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
if (!tObj)
return;
+ if(rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_CEDAR)
+ {
+ evergreenSetTexOffset(pDRICtx, texname, offset, depth, pitch);
+ return;
+ }
+
t->image_override = GL_TRUE;
if (!offset)
@@ -989,6 +1009,12 @@ void r600SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint glx_texture_fo
radeon = pDRICtx->driverPrivate;
rmesa = pDRICtx->driverPrivate;
+ if(rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_CEDAR)
+ {
+ evergreenSetTexBuffer(pDRICtx, target, glx_texture_format, dPriv);
+ return;
+ }
+
rfb = dPriv->driverPrivate;
texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit];
texObj = _mesa_select_tex_object(radeon->glCtx, texUnit, target);
diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c
index 9c954cbf70..45ff9c0624 100644
--- a/src/mesa/drivers/dri/r600/r700_assembler.c
+++ b/src/mesa/drivers/dri/r600/r700_assembler.c
@@ -38,6 +38,7 @@
#include "r600_context.h"
#include "r700_assembler.h"
+#include "evergreen_sq.h"
#define USE_CF_FOR_CONTINUE_BREAK 1
#define USE_CF_FOR_POP_AFTER 1
@@ -258,6 +259,18 @@ GLboolean is_reduction_opcode(PVSDWORD* dest)
return GL_FALSE;
}
+GLboolean EG_is_reduction_opcode(PVSDWORD* dest)
+{
+ if (dest->dst.op3 == 0)
+ {
+ if ( (dest->dst.opcode == EG_OP2_INST_DOT4 || dest->dst.opcode == EG_OP2_INST_DOT4_IEEE || dest->dst.opcode == EG_OP2_INST_CUBE) )
+ {
+ return GL_TRUE;
+ }
+ }
+ return GL_FALSE;
+}
+
GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size)
{
GLuint format = FMT_INVALID;
@@ -423,6 +436,60 @@ unsigned int r700GetNumOperands(GLuint opcode, GLuint nIsOp3)
return 3;
}
+unsigned int EG_GetNumOperands(GLuint opcode, GLuint nIsOp3)
+{
+ if(nIsOp3 > 0)
+ {
+ return 3;
+ }
+
+ switch (opcode)
+ {
+ case EG_OP2_INST_ADD:
+ case EG_OP2_INST_KILLE:
+ case EG_OP2_INST_KILLGT:
+ case EG_OP2_INST_KILLGE:
+ case EG_OP2_INST_KILLNE:
+ case EG_OP2_INST_MUL:
+ case EG_OP2_INST_MAX:
+ case EG_OP2_INST_MIN:
+ //case EG_OP2_INST_MAX_DX10:
+ //case EG_OP2_INST_MIN_DX10:
+ case EG_OP2_INST_SETE:
+ case EG_OP2_INST_SETNE:
+ case EG_OP2_INST_SETGT:
+ case EG_OP2_INST_SETGE:
+ case EG_OP2_INST_PRED_SETE:
+ case EG_OP2_INST_PRED_SETGT:
+ case EG_OP2_INST_PRED_SETGE:
+ case EG_OP2_INST_PRED_SETNE:
+ case EG_OP2_INST_DOT4:
+ case EG_OP2_INST_DOT4_IEEE:
+ case EG_OP2_INST_CUBE:
+ return 2;
+
+ case EG_OP2_INST_MOV:
+ //case SQ_OP2_INST_MOVA_FLOOR:
+ case EG_OP2_INST_FRACT:
+ case EG_OP2_INST_FLOOR:
+ case EG_OP2_INST_TRUNC:
+ case EG_OP2_INST_EXP_IEEE:
+ case EG_OP2_INST_LOG_CLAMPED:
+ case EG_OP2_INST_LOG_IEEE:
+ case EG_OP2_INST_RECIP_IEEE:
+ case EG_OP2_INST_RECIPSQRT_IEEE:
+ case EG_OP2_INST_FLT_TO_INT:
+ case EG_OP2_INST_SIN:
+ case EG_OP2_INST_COS:
+ return 1;
+
+ default: radeon_error(
+ "Need instruction operand number for %x.\n", opcode);
+ };
+
+ return 3;
+}
+
int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader)
{
GLuint i;
@@ -718,21 +785,55 @@ GLboolean add_vfetch_instruction(r700_AssemblerBase* pAsm,
return GL_FALSE;
}
- pAsm->cf_current_vtx_clause_ptr->m_Word1.f.pop_count = 0x0;
- pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_const = 0x0;
- pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
- pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count = 0x0;
- pAsm->cf_current_vtx_clause_ptr->m_Word1.f.end_of_program = 0x0;
- pAsm->cf_current_vtx_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
- pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_VTX;
- pAsm->cf_current_vtx_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
- pAsm->cf_current_vtx_clause_ptr->m_Word1.f.barrier = 0x1;
+ if(8 == pAsm->unAsic)
+ {
+ SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, EG_CF_INST_VC,
+ EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
+ SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 0,
+ EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
+ SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 0,
+ EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
+ SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, SQ_CF_COND_ACTIVE,
+ EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
+ SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 0,
+ EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
+ SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 0,
+ EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
+ SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 0,
+ EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
+ SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 0,
+ EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
+ SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 1,
+ EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
+ }
+ else
+ {
+ pAsm->cf_current_vtx_clause_ptr->m_Word1.f.pop_count = 0x0;
+ pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+ pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count = 0x0;
+ pAsm->cf_current_vtx_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_vtx_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_VTX;
+ pAsm->cf_current_vtx_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+ pAsm->cf_current_vtx_clause_ptr->m_Word1.f.barrier = 0x1;
+ }
LinkVertexInstruction(pAsm->cf_current_vtx_clause_ptr, vertex_instruction_ptr );
}
else
{
- pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count++;
+ if(8 == pAsm->unAsic)
+ {
+ unsigned int count = GETbits(pAsm->cf_current_vtx_clause_ptr->m_Word1.val,
+ EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask) + 1;
+ SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, count,
+ EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
+ }
+ else
+ {
+ pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count++;
+ }
}
AddVTXInstruction(pAsm->pR700Shader, vertex_instruction_ptr);
@@ -767,20 +868,59 @@ GLboolean add_tex_instruction(r700_AssemblerBase* pAsm,
radeon_error("Could not allocate a new TEX CF instruction.\n");
return GL_FALSE;
}
-
- pAsm->cf_current_tex_clause_ptr->m_Word1.f.pop_count = 0x0;
- pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_const = 0x0;
- pAsm->cf_current_tex_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
- pAsm->cf_current_tex_clause_ptr->m_Word1.f.end_of_program = 0x0;
- pAsm->cf_current_tex_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
- pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_TEX;
- pAsm->cf_current_tex_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
- pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x0; //0x1;
+ if(8 == pAsm->unAsic)
+ {
+ SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, EG_CF_INST_TC,
+ EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
+ SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0,
+ EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
+ SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0,
+ EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
+ SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, SQ_CF_COND_ACTIVE,
+ EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
+ SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0,
+ EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
+ SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0,
+ EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
+ SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0,
+ EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
+ SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0,
+ EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
+#ifdef FORCE_CF_TEX_BARRIER
+ SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 1,
+ EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
+#else
+ SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0,
+ EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
+#endif
+ }
+ else
+ {
+ pAsm->cf_current_tex_clause_ptr->m_Word1.f.pop_count = 0x0;
+ pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_tex_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+
+ pAsm->cf_current_tex_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_tex_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_TEX;
+ pAsm->cf_current_tex_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+ pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x0; //0x1;
+ }
}
else
- {
- pAsm->cf_current_tex_clause_ptr->m_Word1.f.count++;
+ {
+ if(8 == pAsm->unAsic)
+ {
+ unsigned int count = GETbits(pAsm->cf_current_tex_clause_ptr->m_Word1.val,
+ EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask) + 1;
+ SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, count,
+ EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
+ }
+ else
+ {
+ pAsm->cf_current_tex_clause_ptr->m_Word1.f.count++;
+ }
}
// If this clause constains any TEX instruction that is dependent on a previous instruction,
@@ -891,6 +1031,188 @@ GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
return GL_TRUE;
}
+GLboolean EG_assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
+ GLuint destination_register,
+ GLenum type,
+ GLint size,
+ GLubyte element,
+ GLuint _signed,
+ GLboolean normalize,
+ GLenum format,
+ VTX_FETCH_METHOD * pFetchMethod)
+{
+ GLuint client_size_inbyte;
+ GLuint data_format;
+ GLuint mega_fetch_count;
+ GLuint is_mega_fetch_flag;
+
+ GLuint dst_sel_x, dst_sel_y, dst_sel_z, dst_sel_w;
+
+ R700VertexGenericFetch* vfetch_instruction_ptr;
+ R700VertexGenericFetch* assembled_vfetch_instruction_ptr
+ = pAsm->vfetch_instruction_ptr_array[element];
+
+ if (assembled_vfetch_instruction_ptr == NULL)
+ {
+ vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch);
+ if (vfetch_instruction_ptr == NULL)
+ {
+ return GL_FALSE;
+ }
+ Init_R700VertexGenericFetch(vfetch_instruction_ptr);
+ }
+ else
+ {
+ vfetch_instruction_ptr = assembled_vfetch_instruction_ptr;
+ }
+
+ data_format = GetSurfaceFormat(type, size, &client_size_inbyte);
+
+ if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here
+ {
+ //TODO : mini fetch
+ mega_fetch_count = 0;
+ is_mega_fetch_flag = 0;
+ }
+ else
+ {
+ mega_fetch_count = MEGA_FETCH_BYTES - 1;
+ is_mega_fetch_flag = 0x1;
+ pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte;
+ }
+
+ SETfield(vfetch_instruction_ptr->m_Word0.val, EG_VC_INST_FETCH,
+ EG_VTX_WORD0__VC_INST_shift,
+ EG_VTX_WORD0__VC_INST_mask);
+ SETfield(vfetch_instruction_ptr->m_Word0.val, EG_VTX_FETCH_VERTEX_DATA,
+ EG_VTX_WORD0__FETCH_TYPE_shift,
+ EG_VTX_WORD0__FETCH_TYPE_mask);
+ CLEARbit(vfetch_instruction_ptr->m_Word0.val,
+ EG_VTX_WORD0__FWQ_bit);
+ SETfield(vfetch_instruction_ptr->m_Word0.val, element,
+ EG_VTX_WORD0__BUFFER_ID_shift,
+ EG_VTX_WORD0__BUFFER_ID_mask);
+ SETfield(vfetch_instruction_ptr->m_Word0.val, 0x0,
+ EG_VTX_WORD0__SRC_GPR_shift,
+ EG_VTX_WORD0__SRC_GPR_mask);
+ SETfield(vfetch_instruction_ptr->m_Word0.val, SQ_ABSOLUTE,
+ EG_VTX_WORD0__SRC_REL_shift,
+ EG_VTX_WORD0__SRC_REL_bit);
+ SETfield(vfetch_instruction_ptr->m_Word0.val, SQ_SEL_X,
+ EG_VTX_WORD0__SRC_SEL_X_shift,
+ EG_VTX_WORD0__SRC_SEL_X_mask);
+ SETfield(vfetch_instruction_ptr->m_Word0.val, mega_fetch_count,
+ EG_VTX_WORD0__MFC_shift,
+ EG_VTX_WORD0__MFC_mask);
+
+ if(format == GL_BGRA)
+ {
+ dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_Z;
+ dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y;
+ dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_X;
+ dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W;
+ }
+ else
+ {
+ dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_X;
+ dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y;
+ dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_Z;
+ dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W;
+
+ }
+ SETfield(vfetch_instruction_ptr->m_Word1.val, dst_sel_x,
+ EG_VTX_WORD1__DST_SEL_X_shift,
+ EG_VTX_WORD1__DST_SEL_X_mask);
+ SETfield(vfetch_instruction_ptr->m_Word1.val, dst_sel_y,
+ EG_VTX_WORD1__DST_SEL_Y_shift,
+ EG_VTX_WORD1__DST_SEL_Y_mask);
+ SETfield(vfetch_instruction_ptr->m_Word1.val, dst_sel_z,
+ EG_VTX_WORD1__DST_SEL_Z_shift,
+ EG_VTX_WORD1__DST_SEL_Z_mask);
+ SETfield(vfetch_instruction_ptr->m_Word1.val, dst_sel_w,
+ EG_VTX_WORD1__DST_SEL_W_shift,
+ EG_VTX_WORD1__DST_SEL_W_mask);
+
+ SETfield(vfetch_instruction_ptr->m_Word1.val, 0, /* use format here, in r6/r7, format used set in const, need to use same */
+ EG_VTX_WORD1__UCF_shift,
+ EG_VTX_WORD1__UCF_bit);
+ SETfield(vfetch_instruction_ptr->m_Word1.val, data_format,
+ EG_VTX_WORD1__DATA_FORMAT_shift,
+ EG_VTX_WORD1__DATA_FORMAT_mask);
+#ifdef TEST_VFETCH
+ SETfield(vfetch_instruction_ptr->m_Word1.val, SQ_FORMAT_COMP_SIGNED,
+ EG_VTX_WORD1__FCA_shift,
+ EG_VTX_WORD1__FCA_bit);
+#else
+ if(1 == _signed)
+ {
+ SETfield(vfetch_instruction_ptr->m_Word1.val, SQ_FORMAT_COMP_SIGNED,
+ EG_VTX_WORD1__FCA_shift,
+ EG_VTX_WORD1__FCA_bit);
+ }
+ else
+ {
+ SETfield(vfetch_instruction_ptr->m_Word1.val, SQ_FORMAT_COMP_UNSIGNED,
+ EG_VTX_WORD1__FCA_shift,
+ EG_VTX_WORD1__FCA_bit);
+ }
+#endif /* TEST_VFETCH */
+
+ if(GL_TRUE == normalize)
+ {
+ SETfield(vfetch_instruction_ptr->m_Word1.val, SQ_NUM_FORMAT_NORM,
+ EG_VTX_WORD1__NFA_shift,
+ EG_VTX_WORD1__NFA_mask);
+ }
+ else
+ {
+ SETfield(vfetch_instruction_ptr->m_Word1.val, SQ_NUM_FORMAT_SCALED,
+ EG_VTX_WORD1__NFA_shift,
+ EG_VTX_WORD1__NFA_mask);
+ }
+
+ /* Destination register */
+ SETfield(vfetch_instruction_ptr->m_Word1.val, destination_register,
+ EG_VTX_WORD1_GPR__DST_GPR_shift,
+ EG_VTX_WORD1_GPR__DST_GPR_mask);
+ SETfield(vfetch_instruction_ptr->m_Word1.val, SQ_ABSOLUTE,
+ EG_VTX_WORD1_GPR__DST_REL_shift,
+ EG_VTX_WORD1_GPR__DST_REL_bit);
+
+
+ SETfield(vfetch_instruction_ptr->m_Word2.val, 0,
+ EG_VTX_WORD2__OFFSET_shift,
+ EG_VTX_WORD2__OFFSET_mask);
+ SETfield(vfetch_instruction_ptr->m_Word2.val, SQ_ENDIAN_NONE,
+ EG_VTX_WORD2__ENDIAN_SWAP_shift,
+ EG_VTX_WORD2__ENDIAN_SWAP_mask);
+ SETfield(vfetch_instruction_ptr->m_Word2.val, 0,
+ EG_VTX_WORD2__CBNS_shift,
+ EG_VTX_WORD2__CBNS_bit);
+ SETfield(vfetch_instruction_ptr->m_Word2.val, is_mega_fetch_flag,
+ EG_VTX_WORD2__MEGA_FETCH_shift,
+ EG_VTX_WORD2__MEGA_FETCH_mask);
+
+ if (assembled_vfetch_instruction_ptr == NULL)
+ {
+ if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) )
+ {
+ return GL_FALSE;
+ }
+
+ if (pAsm->vfetch_instruction_ptr_array[element] != NULL)
+ {
+ return GL_FALSE;
+ }
+ else
+ {
+ pAsm->vfetch_instruction_ptr_array[element] = vfetch_instruction_ptr;
+ }
+ }
+
+ return GL_TRUE;
+}
+
GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm,
GLuint destination_register,
GLenum type,
@@ -1357,7 +1679,7 @@ GLboolean assemble_src(r700_AssemblerBase *pAsm,
break;
case PROGRAM_INPUT:
setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
- pAsm->S[fld].src.rtype = SRC_REG_INPUT;
+ pAsm->S[fld].src.rtype = SRC_REG_GPR;
switch (pAsm->currentShaderType)
{
case SPT_FP:
@@ -1368,6 +1690,19 @@ GLboolean assemble_src(r700_AssemblerBase *pAsm,
break;
}
break;
+ case PROGRAM_OUTPUT:
+ setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
+ pAsm->S[fld].src.rtype = SRC_REG_GPR;
+ switch (pAsm->currentShaderType)
+ {
+ case SPT_FP:
+ pAsm->S[fld].src.reg = pAsm->uiFP_OutputMap[pILInst->SrcReg[src].Index];
+ break;
+ case SPT_VP:
+ pAsm->S[fld].src.reg = pAsm->ucVP_OutputMap[pILInst->SrcReg[src].Index];
+ break;
+ }
+ break;
default:
radeon_error("Invalid source argument type : %d \n", pILInst->SrcReg[src].File);
return GL_FALSE;
@@ -1521,7 +1856,7 @@ GLboolean tex_src(r700_AssemblerBase *pAsm)
bValidTexCoord = GL_TRUE;
pAsm->S[0].src.reg =
pAsm->ucVP_AttributeMap[pILInst->SrcReg[0].Index];
- pAsm->S[0].src.rtype = SRC_REG_INPUT;
+ pAsm->S[0].src.rtype = SRC_REG_GPR;
break;
}
}
@@ -1544,7 +1879,7 @@ GLboolean tex_src(r700_AssemblerBase *pAsm)
bValidTexCoord = GL_TRUE;
pAsm->S[0].src.reg =
pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
- pAsm->S[0].src.rtype = SRC_REG_INPUT;
+ pAsm->S[0].src.rtype = SRC_REG_GPR;
break;
case FRAG_ATTRIB_FACE:
fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n");
@@ -1560,7 +1895,7 @@ GLboolean tex_src(r700_AssemblerBase *pAsm)
bValidTexCoord = GL_TRUE;
pAsm->S[0].src.reg =
pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
- pAsm->S[0].src.rtype = SRC_REG_INPUT;
+ pAsm->S[0].src.rtype = SRC_REG_GPR;
}
}
@@ -1606,19 +1941,68 @@ GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalize
texture_coordinate_source = &(pAsm->S[0].src);
texture_unit_source = &(pAsm->S[1].src);
- tex_instruction_ptr->m_Word0.f.tex_inst = pAsm->D.dst.opcode;
- tex_instruction_ptr->m_Word0.f.bc_frac_mode = 0x0;
- tex_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
- tex_instruction_ptr->m_Word0.f.alt_const = 0;
-
- if(SPT_VP == pAsm->currentShaderType)
+ if(8 == pAsm->unAsic) /* evergreen */
{
- tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg + VERT_ATTRIB_MAX;
- pAsm->unVetTexBits |= 1 << texture_unit_source->reg;
+
+ SETfield(tex_instruction_ptr->m_Word0.val, pAsm->D.dst.opcode,
+ EG_TEX_WORD0__TEX_INST_shift,
+ EG_TEX_WORD0__TEX_INST_mask);
+
+ if( (SQ_TEX_INST_GET_GRADIENTS_H == pAsm->D.dst.opcode)
+ ||(SQ_TEX_INST_GET_GRADIENTS_V == pAsm->D.dst.opcode) )
+ {
+ /* Use fine texel derivative calculation rather than use quad derivative */
+ SETfield(tex_instruction_ptr->m_Word0.val, 1,
+ EG_TEX_WORD0__INST_MOD_shift,
+ EG_TEX_WORD0__INST_MOD_mask);
+ }
+ else
+ {
+ SETfield(tex_instruction_ptr->m_Word0.val, 0,
+ EG_TEX_WORD0__INST_MOD_shift,
+ EG_TEX_WORD0__INST_MOD_mask);
+ }
+
+ CLEARbit(tex_instruction_ptr->m_Word0.val, EG_TEX_WORD0__FWQ_bit);
+
+ if(SPT_VP == pAsm->currentShaderType)
+ {
+ SETfield(tex_instruction_ptr->m_Word0.val, (texture_unit_source->reg + VERT_ATTRIB_MAX),
+ EG_TEX_WORD0__RESOURCE_ID_shift,
+ EG_TEX_WORD0__RESOURCE_ID_mask);
+ pAsm->unVetTexBits |= 1 << texture_unit_source->reg;
+ }
+ else
+ {
+ SETfield(tex_instruction_ptr->m_Word0.val, texture_unit_source->reg,
+ EG_TEX_WORD0__RESOURCE_ID_shift,
+ EG_TEX_WORD0__RESOURCE_ID_mask);
+ }
+
+ CLEARbit(tex_instruction_ptr->m_Word0.val, EG_TEX_WORD0__ALT_CONST_bit);
+ SETfield(tex_instruction_ptr->m_Word0.val, 0,
+ EG_TEX_WORD0__RIM_shift,
+ EG_TEX_WORD0__RIM_mask);
+ SETfield(tex_instruction_ptr->m_Word0.val, 0,
+ EG_TEX_WORD0__SIM_shift,
+ EG_TEX_WORD0__SIM_mask);
}
else
{
- tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg;
+ tex_instruction_ptr->m_Word0.f.tex_inst = pAsm->D.dst.opcode;
+ tex_instruction_ptr->m_Word0.f.bc_frac_mode = 0x0;
+ tex_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
+ tex_instruction_ptr->m_Word0.f.alt_const = 0;
+
+ if(SPT_VP == pAsm->currentShaderType)
+ {
+ tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg + VERT_ATTRIB_MAX;
+ pAsm->unVetTexBits |= 1 << texture_unit_source->reg;
+ }
+ else
+ {
+ tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg;
+ }
}
tex_instruction_ptr->m_Word1.f.lod_bias = 0x0;
@@ -1644,8 +2028,20 @@ GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalize
if ( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
(pAsm->D.dst.rtype == DST_REG_OUT) )
{
- tex_instruction_ptr->m_Word0.f.src_gpr = texture_coordinate_source->reg;
- tex_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
+ if(8 == pAsm->unAsic) /* evergreen */
+ {
+ SETfield(tex_instruction_ptr->m_Word0.val, texture_coordinate_source->reg,
+ EG_TEX_WORD0__SRC_GPR_shift,
+ EG_TEX_WORD0__SRC_GPR_mask);
+ SETfield(tex_instruction_ptr->m_Word0.val, SQ_ABSOLUTE,
+ EG_TEX_WORD0__SRC_REL_shift,
+ EG_TEX_WORD0__SRC_REL_bit);
+ }
+ else
+ {
+ tex_instruction_ptr->m_Word0.f.src_gpr = texture_coordinate_source->reg;
+ tex_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
+ }
tex_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
tex_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE;
@@ -1696,7 +2092,8 @@ void initialize(r700_AssemblerBase *pAsm)
GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr,
int source_index,
PVSSRC* pSource,
- BITS scalar_channel_index)
+ BITS scalar_channel_index,
+ r700_AssemblerBase *pAsm)
{
BITS src_sel;
BITS src_rel;
@@ -1745,14 +2142,23 @@ GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr,
else
{
if ( (pSource->rtype == SRC_REG_TEMPORARY) ||
- (pSource->rtype == SRC_REG_INPUT)
+ (pSource->rtype == SRC_REG_GPR)
)
{
src_sel = pSource->reg;
}
else if (pSource->rtype == SRC_REG_CONSTANT)
{
- src_sel = pSource->reg + CFILE_REGISTER_OFFSET;
+ /* TODO : 4 const buffers */
+ if(GL_TRUE == pAsm->bUseMemConstant)
+ {
+ src_sel = pSource->reg + SQ_ALU_SRC_KCACHE0_BASE;
+ pAsm->kcacheUsed = SQ_ALU_SRC_KCACHE0_BASE;
+ }
+ else
+ {
+ src_sel = pSource->reg + CFILE_REGISTER_OFFSET;
+ }
}
else if (pSource->rtype == SRC_REC_LITERAL)
{
@@ -1902,6 +2308,17 @@ GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
pAsm->cf_current_alu_clause_ptr->m_Word1.f.count += (GetInstructionSize(alu_instruction_ptr->m_ShaderInstType) / 2);
}
+ /* TODO : handle 4 bufs */
+ if( (pAsm->kcacheUsed > 0) && (GL_TRUE == pAsm->bUseMemConstant) )
+ {
+ pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank0 = 0x0;
+ pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank1 = 0x0;
+ pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_mode0 = SQ_CF_KCACHE_LOCK_2;
+ pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_mode1 = SQ_CF_KCACHE_NOP;
+ pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr0 = 0x0;
+ pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr1 = 0x0;
+ }
+
// If this clause constains any instruction that is forward dependent on a TEX instruction,
// set the whole_quad_mode for this clause
if ( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) )
@@ -1925,6 +2342,80 @@ GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
return GL_TRUE;
}
+GLboolean EG_add_ps_interp(r700_AssemblerBase* pAsm)
+{
+ R700ALUInstruction * alu_instruction_ptr = NULL;
+
+ int ui;
+ unsigned int uj;
+ unsigned int unWord0Temp = 0x380C00;
+ unsigned int unWord1Temp = 0x146B10; //SQ_SEL_X
+
+ if(pAsm->uIIns > 0)
+ {
+ for(ui=(pAsm->uIIns-1); ui>=0; ui--)
+ {
+ for(uj=0; uj<8; uj++)
+ {
+ alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
+ Init_R700ALUInstruction(alu_instruction_ptr);
+ alu_instruction_ptr->m_Word0.val = unWord0Temp;
+ alu_instruction_ptr->m_Word1.val = unWord1Temp;
+
+ if(uj < 4)
+ {
+ SETfield(alu_instruction_ptr->m_Word1.val, EG_OP2_INST_INTERP_ZW,
+ EG_ALU_WORD1_OP2__ALU_INST_shift, EG_ALU_WORD1_OP2__ALU_INST_mask);
+ }
+ else
+ {
+ SETfield(alu_instruction_ptr->m_Word1.val, EG_OP2_INST_INTERP_XY,
+ EG_ALU_WORD1_OP2__ALU_INST_shift, EG_ALU_WORD1_OP2__ALU_INST_mask);
+ }
+ if( (uj > 1) && (uj < 6) )
+ {
+ SETfield(alu_instruction_ptr->m_Word1.val, 1,
+ EG_ALU_WORD1_OP2__WRITE_MASK_shift, EG_ALU_WORD1_OP2__WRITE_MASK_bit);
+ }
+ else
+ {
+ SETfield(alu_instruction_ptr->m_Word1.val, 0,
+ EG_ALU_WORD1_OP2__WRITE_MASK_shift, EG_ALU_WORD1_OP2__WRITE_MASK_bit);
+ }
+ if( (uj > 1) && (uj < 6) )
+ {
+ SETfield(alu_instruction_ptr->m_Word1.val, ui,
+ EG_ALU_WORD1__DST_GPR_shift, EG_ALU_WORD1__DST_GPR_mask);
+ }
+ else
+ {
+ SETfield(alu_instruction_ptr->m_Word1.val, 111,
+ EG_ALU_WORD1__DST_GPR_shift, EG_ALU_WORD1__DST_GPR_mask);
+ }
+
+ SETfield(alu_instruction_ptr->m_Word1.val, (uj % 4),
+ EG_ALU_WORD1__DST_CHAN_shift, EG_ALU_WORD1__DST_CHAN_mask);
+ SETfield(alu_instruction_ptr->m_Word0.val, (1 - (uj % 2)),
+ EG_ALU_WORD0__SRC0_CHAN_shift, EG_ALU_WORD0__SRC0_CHAN_mask);
+ SETfield(alu_instruction_ptr->m_Word0.val, (EG_ALU_SRC_PARAM_BASE + ui),
+ EG_ALU_WORD0__SRC1_SEL_shift, EG_ALU_WORD0__SRC1_SEL_mask);
+ if(3 == (uj % 4))
+ {
+ SETfield(alu_instruction_ptr->m_Word0.val, 1,
+ EG_ALU_WORD0__LAST_shift, EG_ALU_WORD0__LAST_bit);
+ }
+
+ if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, 4) )
+ {
+ return GL_FALSE;
+ }
+ }
+ }
+ }
+
+ return GL_TRUE;
+}
+
void get_src_properties(R700ALUInstruction* alu_instruction_ptr,
int source_index,
BITS* psrc_sel,
@@ -2175,8 +2666,16 @@ GLboolean check_scalar(r700_AssemblerBase* pAsm,
BITS src_neg [3] = {0,0,0};
GLuint swizzle_key;
+ GLuint number_of_operands;
- GLuint number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
+ if(8 == pAsm->unAsic)
+ {
+ number_of_operands = EG_GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
+ }
+ else
+ {
+ number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
+ }
for (src=0; src<number_of_operands; src++)
{
@@ -2264,8 +2763,16 @@ GLboolean check_vector(r700_AssemblerBase* pAsm,
BITS src_neg [3] = {0,0,0};
GLuint swizzle_key;
+ GLuint number_of_operands;
- GLuint number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
+ if(8 == pAsm->unAsic)
+ {
+ number_of_operands = EG_GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
+ }
+ else
+ {
+ number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
+ }
for (src=0; src<number_of_operands; src++)
{
@@ -2345,12 +2852,23 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
PVSSRC * pcurrent_source;
int current_source_index;
GLuint contiguous_slots_needed;
+ GLuint uNumSrc;
+ GLboolean bSplitInst;
+
+ if(8 == pAsm->unAsic)
+ {
+ uNumSrc = EG_GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
+ }
+ else
+ {
+ uNumSrc = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
+ }
- GLuint uNumSrc = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
//GLuint channel_swizzle, j;
//GLuint chan_counter[4] = {0, 0, 0, 0};
//PVSSRC * pSource[3];
- GLboolean bSplitInst = GL_FALSE;
+ bSplitInst = GL_FALSE;
+ pAsm->kcacheUsed = 0;
if (1 == pAsm->D.dst.math)
{
@@ -2384,7 +2902,7 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
default: channel_swizzle = SQ_SEL_MASK; break;
}
if ( ((pSource[j]->rtype == SRC_REG_TEMPORARY) ||
- (pSource[j]->rtype == SRC_REG_INPUT))
+ (pSource[j]->rtype == SRC_REG_GPR))
&& (channel_swizzle <= SQ_SEL_W) )
{
chan_counter[channel_swizzle]++;
@@ -2449,7 +2967,8 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
current_source_index,
pcurrent_source,
- scalar_channel_index) )
+ scalar_channel_index,
+ pAsm) )
{
return GL_FALSE;
}
@@ -2463,7 +2982,8 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
current_source_index,
pcurrent_source,
- scalar_channel_index) )
+ scalar_channel_index,
+ pAsm) )
{
return GL_FALSE;
}
@@ -2546,7 +3066,8 @@ GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr,
current_source_index,
pcurrent_source,
- scalar_channel_index) )
+ scalar_channel_index,
+ pAsm) )
{
return GL_FALSE;
}
@@ -2987,7 +3508,14 @@ GLboolean assemble_DOT(r700_AssemblerBase *pAsm)
return GL_FALSE;
}
- pAsm->D.dst.opcode = SQ_OP2_INST_DOT4;
+ if(8 == pAsm->unAsic)
+ {
+ pAsm->D.dst.opcode = EG_OP2_INST_DOT4;
+ }
+ else
+ {
+ pAsm->D.dst.opcode = SQ_OP2_INST_DOT4;
+ }
if( GL_FALSE == assemble_dst(pAsm) )
{
@@ -3004,7 +3532,14 @@ GLboolean assemble_DOT(r700_AssemblerBase *pAsm)
return GL_FALSE;
}
- if(OPCODE_DP3 == pAsm->pILInst[pAsm->uiCurInst].Opcode)
+ if(OPCODE_DP2 == pAsm->pILInst[pAsm->uiCurInst].Opcode)
+ {
+ zerocomp_PVSSRC(&(pAsm->S[0].src),2);
+ zerocomp_PVSSRC(&(pAsm->S[0].src),3);
+ zerocomp_PVSSRC(&(pAsm->S[1].src),2);
+ zerocomp_PVSSRC(&(pAsm->S[1].src),3);
+ }
+ else if(OPCODE_DP3 == pAsm->pILInst[pAsm->uiCurInst].Opcode)
{
zerocomp_PVSSRC(&(pAsm->S[0].src), 3);
zerocomp_PVSSRC(&(pAsm->S[1].src), 3);
@@ -3062,6 +3597,11 @@ GLboolean assemble_DST(r700_AssemblerBase *pAsm)
GLboolean assemble_EX2(r700_AssemblerBase *pAsm)
{
+ if(8 == pAsm->unAsic)
+ {
+ return assemble_math_function(pAsm, EG_OP2_INST_EXP_IEEE);
+ }
+
return assemble_math_function(pAsm, SQ_OP2_INST_EXP_IEEE);
}
@@ -3094,7 +3634,14 @@ GLboolean assemble_EXP(r700_AssemblerBase *pAsm)
return GL_FALSE;
}
- pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
+ if(8 == pAsm->unAsic)
+ {
+ pAsm->D.dst.opcode = EG_OP2_INST_EXP_IEEE;
+ }
+ else
+ {
+ pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
+ }
pAsm->D.dst.math = 1;
if( GL_FALSE == assemble_dst(pAsm) )
@@ -3143,7 +3690,14 @@ GLboolean assemble_EXP(r700_AssemblerBase *pAsm)
// EX2 dst.z, a.x
if ((pAsm->pILInst->DstReg.WriteMask >> 2) & 0x1) {
- pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
+ if(8 == pAsm->unAsic)
+ {
+ pAsm->D.dst.opcode = EG_OP2_INST_EXP_IEEE;
+ }
+ else
+ {
+ pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
+ }
pAsm->D.dst.math = 1;
if( GL_FALSE == assemble_dst(pAsm) )
@@ -3218,6 +3772,11 @@ GLboolean assemble_FLR(r700_AssemblerBase *pAsm)
GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm)
{
+ if(8 == pAsm->unAsic)
+ {
+ return assemble_math_function(pAsm, EG_OP2_INST_FLT_TO_INT);
+ }
+
return assemble_math_function(pAsm, SQ_OP2_INST_FLT_TO_INT);
}
@@ -3300,6 +3859,11 @@ GLboolean assemble_KIL(r700_AssemblerBase *pAsm, GLuint opcode)
GLboolean assemble_LG2(r700_AssemblerBase *pAsm)
{
+ if(8 == pAsm->unAsic)
+ {
+ return assemble_math_function(pAsm, EG_OP2_INST_LOG_IEEE);
+ }
+
return assemble_math_function(pAsm, SQ_OP2_INST_LOG_IEEE);
}
@@ -3339,7 +3903,14 @@ GLboolean assemble_LRP(r700_AssemblerBase *pAsm)
return GL_FALSE;
}
- pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ if(8 == pAsm->unAsic)
+ {
+ pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
+ }
+ else
+ {
+ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ }
pAsm->D.dst.op3 = 1;
pAsm->D.dst.rtype = DST_REG_TEMPORARY;
@@ -3437,7 +4008,14 @@ GLboolean assemble_LOG(r700_AssemblerBase *pAsm)
// LG2 tmp2.x, tmp1.x
// FLOOR tmp3.x, tmp2.x
- pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;
+ if(8 == pAsm->unAsic)
+ {
+ pAsm->D.dst.opcode = EG_OP2_INST_LOG_IEEE;
+ }
+ else
+ {
+ pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;
+ }
pAsm->D.dst.math = 1;
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
@@ -3528,7 +4106,14 @@ GLboolean assemble_LOG(r700_AssemblerBase *pAsm)
return GL_FALSE;
}
- pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
+ if(8 == pAsm->unAsic)
+ {
+ pAsm->D.dst.opcode = EG_OP2_INST_EXP_IEEE;
+ }
+ else
+ {
+ pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
+ }
pAsm->D.dst.math = 1;
if( GL_FALSE == assemble_dst(pAsm) )
@@ -3610,7 +4195,14 @@ GLboolean assemble_MAD(struct r700_AssemblerBase *pAsm)
return GL_FALSE;
}
- pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ if(8 == pAsm->unAsic)
+ {
+ pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
+ }
+ else
+ {
+ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ }
pAsm->D.dst.op3 = 1;
tmp = (-1);
@@ -3778,7 +4370,14 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm)
swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y);
/* dst.z = log(src.y) */
- pAsm->D.dst.opcode = SQ_OP2_INST_LOG_CLAMPED;
+ if(8 == pAsm->unAsic)
+ {
+ pAsm->D.dst.opcode = EG_OP2_INST_LOG_CLAMPED;
+ }
+ else
+ {
+ pAsm->D.dst.opcode = SQ_OP2_INST_LOG_CLAMPED;
+ }
pAsm->D.dst.math = 1;
pAsm->D.dst.rtype = dstType;
pAsm->D.dst.reg = dstReg;
@@ -3809,7 +4408,14 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm)
swizzleagain_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X);
/* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
- pAsm->D.dst.opcode = SQ_OP3_INST_MUL_LIT;
+ if(8 == pAsm->unAsic)
+ {
+ pAsm->D.dst.opcode = EG_OP3_INST_MUL_LIT;
+ }
+ else
+ {
+ pAsm->D.dst.opcode = SQ_OP3_INST_MUL_LIT;
+ }
pAsm->D.dst.math = 1;
pAsm->D.dst.op3 = 1;
pAsm->D.dst.rtype = DST_REG_TEMPORARY;
@@ -3842,7 +4448,14 @@ GLboolean assemble_LIT(r700_AssemblerBase *pAsm)
}
/* dst.z = exp(tmp.x) */
- pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
+ if(8 == pAsm->unAsic)
+ {
+ pAsm->D.dst.opcode = EG_OP2_INST_EXP_IEEE;
+ }
+ else
+ {
+ pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
+ }
pAsm->D.dst.math = 1;
pAsm->D.dst.rtype = dstType;
pAsm->D.dst.reg = dstReg;
@@ -3997,7 +4610,14 @@ GLboolean assemble_POW(r700_AssemblerBase *pAsm)
tmp = gethelpr(pAsm);
// LG2 tmp.x, a.swizzle
- pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;
+ if(8 == pAsm->unAsic)
+ {
+ pAsm->D.dst.opcode = EG_OP2_INST_LOG_IEEE;
+ }
+ else
+ {
+ pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;
+ }
pAsm->D.dst.math = 1;
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
@@ -4041,7 +4661,14 @@ GLboolean assemble_POW(r700_AssemblerBase *pAsm)
// EX2 dst.mask, tmp.x
// EX2 tmp.x, tmp.x
- pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
+ if(8 == pAsm->unAsic)
+ {
+ pAsm->D.dst.opcode = EG_OP2_INST_EXP_IEEE;
+ }
+ else
+ {
+ pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
+ }
pAsm->D.dst.math = 1;
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
@@ -4085,11 +4712,21 @@ GLboolean assemble_POW(r700_AssemblerBase *pAsm)
GLboolean assemble_RCP(r700_AssemblerBase *pAsm)
{
+ if(8 == pAsm->unAsic)
+ {
+ return assemble_math_function(pAsm, EG_OP2_INST_RECIP_IEEE);
+ }
+
return assemble_math_function(pAsm, SQ_OP2_INST_RECIP_IEEE);
}
GLboolean assemble_RSQ(r700_AssemblerBase *pAsm)
{
+ if(8 == pAsm->unAsic)
+ {
+ return assemble_math_function(pAsm, EG_OP2_INST_RECIPSQRT_IEEE);
+ }
+
return assemble_math_function(pAsm, SQ_OP2_INST_RECIPSQRT_IEEE);
}
@@ -4175,7 +4812,14 @@ GLboolean assemble_SCS(r700_AssemblerBase *pAsm)
}
// COS dst.x, a.x
- pAsm->D.dst.opcode = SQ_OP2_INST_COS;
+ if(8 == pAsm->unAsic)
+ {
+ pAsm->D.dst.opcode = EG_OP2_INST_COS;
+ }
+ else
+ {
+ pAsm->D.dst.opcode = SQ_OP2_INST_COS;
+ }
pAsm->D.dst.math = 1;
assemble_dst(pAsm);
@@ -4194,7 +4838,14 @@ GLboolean assemble_SCS(r700_AssemblerBase *pAsm)
}
// SIN dst.y, a.x
- pAsm->D.dst.opcode = SQ_OP2_INST_SIN;
+ if(8 == pAsm->unAsic)
+ {
+ pAsm->D.dst.opcode = EG_OP2_INST_SIN;
+ }
+ else
+ {
+ pAsm->D.dst.opcode = SQ_OP2_INST_SIN;
+ }
pAsm->D.dst.math = 1;
assemble_dst(pAsm);
@@ -4349,6 +5000,65 @@ GLboolean assemble_SLT(r700_AssemblerBase *pAsm)
return GL_TRUE;
}
+GLboolean assemble_SSG(r700_AssemblerBase *pAsm)
+{
+ checkop1(pAsm);
+
+ GLuint tmp = gethelpr(pAsm);
+ /* tmp = (src > 0 ? 1 : src) */
+ pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT;
+ pAsm->D.dst.op3 = 1;
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+
+ if( GL_FALSE == assemble_src(pAsm, 0, -1) )
+ {
+ return GL_FALSE;
+ }
+
+ setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_1);
+
+ if( GL_FALSE == assemble_src(pAsm, 0, 2) )
+ {
+ return GL_FALSE;
+ }
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ /* dst = (-tmp > 0 ? -1 : tmp) */
+ pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT;
+ pAsm->D.dst.op3 = 1;
+
+ if( GL_FALSE == assemble_dst(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+ noswizzle_PVSSRC(&(pAsm->S[0].src));
+ neg_PVSSRC(&(pAsm->S[0].src));
+
+ setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_1);
+ neg_PVSSRC(&(pAsm->S[1].src));
+
+ setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE);
+ pAsm->S[2].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[2].src.reg = tmp;
+ noswizzle_PVSSRC(&(pAsm->S[2].src));
+
+ if( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ return GL_TRUE;
+}
+
GLboolean assemble_STP(r700_AssemblerBase *pAsm)
{
return GL_TRUE;
@@ -4387,7 +5097,14 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
if (pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP)
{
GLuint tmp = gethelpr(pAsm);
- pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE;
+ if(8 == pAsm->unAsic)
+ {
+ pAsm->D.dst.opcode = EG_OP2_INST_RECIP_IEEE;
+ }
+ else
+ {
+ pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE;
+ }
pAsm->D.dst.math = 1;
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
pAsm->D.dst.rtype = DST_REG_TEMPORARY;
@@ -4437,7 +5154,14 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
GLuint tmp2 = gethelpr(pAsm);
/* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
- pAsm->D.dst.opcode = SQ_OP2_INST_CUBE;
+ if(8 == pAsm->unAsic)
+ {
+ pAsm->D.dst.opcode = EG_OP2_INST_CUBE;
+ }
+ else
+ {
+ pAsm->D.dst.opcode = SQ_OP2_INST_CUBE;
+ }
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
pAsm->D.dst.rtype = DST_REG_TEMPORARY;
pAsm->D.dst.reg = tmp1;
@@ -4462,7 +5186,14 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
}
/* tmp1.z = RCP_e(|tmp1.z|) */
- pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE;
+ if(8 == pAsm->unAsic)
+ {
+ pAsm->D.dst.opcode = EG_OP2_INST_RECIP_IEEE;
+ }
+ else
+ {
+ pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE;
+ }
pAsm->D.dst.math = 1;
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
pAsm->D.dst.rtype = DST_REG_TEMPORARY;
@@ -4481,7 +5212,14 @@ GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
* MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
* muladd has no writemask, have to use another temp
*/
- pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ if(8 == pAsm->unAsic)
+ {
+ pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
+ }
+ else
+ {
+ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ }
pAsm->D.dst.op3 = 1;
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
pAsm->D.dst.rtype = DST_REG_TEMPORARY;
@@ -4668,7 +5406,14 @@ GLboolean assemble_XPD(r700_AssemblerBase *pAsm)
return GL_FALSE;
}
- pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ if(8 == pAsm->unAsic)
+ {
+ pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
+ }
+ else
+ {
+ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ }
pAsm->D.dst.op3 = 1;
if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
@@ -4825,16 +5570,49 @@ GLboolean jumpToOffest(r700_AssemblerBase *pAsm, GLuint pops, GLint offset)
return GL_FALSE;
}
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+ if(8 == pAsm->unAsic)
+ {
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ EG_CF_INST_JUMP,
+ EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ pops,
+ EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ SQ_CF_COND_ACTIVE,
+ EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 1,
+ EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
+ }
+ else
+ {
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+ }
pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + offset;
@@ -4848,17 +5626,50 @@ GLboolean pops(r700_AssemblerBase *pAsm, GLuint pops)
return GL_FALSE;
}
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+ if(8 == pAsm->unAsic)
+ {
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ EG_CF_INST_POP,
+ EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ pops,
+ EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ SQ_CF_COND_ACTIVE,
+ EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 1,
+ EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
+ }
+ else
+ {
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+ }
pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
return GL_TRUE;
@@ -4876,23 +5687,66 @@ GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse)
return GL_FALSE;
}
- if(GL_TRUE != bHasElse)
- {
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
+ if(8 == pAsm->unAsic)
+ {
+ if(GL_TRUE != bHasElse)
+ {
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 1,
+ EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
+ }
+ else
+ {
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
+ }
+
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ EG_CF_INST_JUMP,
+ EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ SQ_CF_COND_ACTIVE,
+ EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 1,
+ EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
}
else
{
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
- }
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+ if(GL_TRUE != bHasElse)
+ {
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
+ }
+ else
+ {
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
+ }
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+ }
pAsm->FCSP++;
pAsm->fc_stack[pAsm->FCSP].type = FC_IF;
@@ -4919,16 +5773,49 @@ GLboolean assemble_ELSE(r700_AssemblerBase *pAsm)
return GL_FALSE;
}
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; ///
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+ if(8 == pAsm->unAsic)
+ {
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 1,
+ EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ EG_CF_INST_ELSE,
+ EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ SQ_CF_COND_ACTIVE,
+ EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 1,
+ EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
+ }
+ else
+ {
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; ///
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ELSE;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ELSE;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+ }
pAsm->fc_stack[pAsm->FCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( (void *)pAsm->fc_stack[pAsm->FCSP].mid,
0,
@@ -4988,17 +5875,49 @@ GLboolean assemble_BGNLOOP(r700_AssemblerBase *pAsm)
return GL_FALSE;
}
-
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+ if(8 == pAsm->unAsic)
+ {
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ EG_CF_INST_LOOP_START_NO_AL,
+ EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ SQ_CF_COND_ACTIVE,
+ EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 1,
+ EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
+ }
+ else
+ {
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_START_NO_AL;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_START_NO_AL;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+ }
pAsm->FCSP++;
pAsm->fc_stack[pAsm->FCSP].type = FC_LOOP;
@@ -5039,18 +5958,50 @@ GLboolean assemble_BRK(r700_AssemblerBase *pAsm)
return GL_FALSE;
}
-
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+ if(8 == pAsm->unAsic)
+ {
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 1,
+ EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ EG_CF_INST_LOOP_BREAK,
+ EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ SQ_CF_COND_ACTIVE,
+ EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 1,
+ EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
+ }
+ else
+ {
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+ }
pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
(void *)pAsm->fc_stack[unFCSP].mid,
@@ -5064,18 +6015,52 @@ GLboolean assemble_BRK(r700_AssemblerBase *pAsm)
return GL_FALSE;
}
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+ if(8 == pAsm->unAsic)
+ {
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 1,
+ EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ EG_CF_INST_POP,
+ EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ SQ_CF_COND_ACTIVE,
+ EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 1,
+ EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
+ }
+ else
+ {
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
- pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+ }
+
+ pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE);
@@ -5109,18 +6094,50 @@ GLboolean assemble_CONT(r700_AssemblerBase *pAsm)
return GL_FALSE;
}
-
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+ if(8 == pAsm->unAsic)
+ {
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 1,
+ EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ EG_CF_INST_LOOP_CONTINUE,
+ EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ SQ_CF_COND_ACTIVE,
+ EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 1,
+ EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
+ }
+ else
+ {
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_CONTINUE;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_CONTINUE;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+ }
pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
(void *)pAsm->fc_stack[unFCSP].mid,
@@ -5134,17 +6151,51 @@ GLboolean assemble_CONT(r700_AssemblerBase *pAsm)
return GL_FALSE;
}
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+ if(8 == pAsm->unAsic)
+ {
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 1,
+ EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ EG_CF_INST_POP,
+ EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ SQ_CF_COND_ACTIVE,
+ EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 1,
+ EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
+ }
+ else
+ {
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+ }
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE);
@@ -5163,17 +6214,49 @@ GLboolean assemble_ENDLOOP(r700_AssemblerBase *pAsm)
return GL_FALSE;
}
-
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+ if(8 == pAsm->unAsic)
+ {
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ EG_CF_INST_LOOP_END,
+ EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ SQ_CF_COND_ACTIVE,
+ EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 1,
+ EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
+ }
+ else
+ {
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_END;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_END;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+ }
pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->fc_stack[pAsm->FCSP].first->m_uIndex + 1;
pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
@@ -5235,17 +6318,51 @@ void add_return_inst(r700_AssemblerBase *pAsm)
{
return;
}
- //pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_RETURN;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+ if(8 == pAsm->unAsic)
+ {
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ EG_CF_INST_RETURN,
+ EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ SQ_CF_COND_ACTIVE,
+ EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 1,
+ EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
+ }
+ else
+ {
+ //pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_RETURN;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+ }
}
GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex, GLuint uiIL_Shift)
@@ -5368,17 +6485,50 @@ GLboolean assemble_CAL(r700_AssemblerBase *pAsm,
return GL_FALSE;
}
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.call_count = 1;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+ if(8 == pAsm->unAsic)
+ {
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ EG_CF_INST_CALL,
+ EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ SQ_CF_COND_ACTIVE,
+ EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 1,
+ EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 1,
+ EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
+ }
+ else
+ {
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.call_count = 1;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_CALL;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_CALL;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+ }
/* Put in caller */
if( (pAsm->unCallerArrayPointer + 1) > pAsm->unCallerArraySize )
@@ -5579,16 +6729,49 @@ GLboolean breakLoopOnFlag(r700_AssemblerBase *pAsm, GLuint unFCSP)
return GL_FALSE;
}
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
+ if(8 == pAsm->unAsic)
+ {
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 1,
+ EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ EG_CF_INST_LOOP_BREAK,
+ EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ SQ_CF_COND_ACTIVE,
+ EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 1,
+ EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
+ SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
+ 1,
+ EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
+ }
+ else
+ {
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
- pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+ pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
+ }
pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
(void *)pAsm->fc_stack[unFCSP].mid,
@@ -5677,10 +6860,19 @@ GLboolean AssembleInstr(GLuint uiFirstInst,
return GL_FALSE;
break;
case OPCODE_COS:
- if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_COS) )
- return GL_FALSE;
+ if(8 == pR700AsmCode->unAsic)
+ {
+ if ( GL_FALSE == assemble_TRIG(pR700AsmCode, EG_OP2_INST_COS) )
+ return GL_FALSE;
+ }
+ else
+ {
+ if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_COS) )
+ return GL_FALSE;
+ }
break;
+ case OPCODE_DP2:
case OPCODE_DP3:
case OPCODE_DP4:
case OPCODE_DPH:
@@ -5794,8 +6986,16 @@ GLboolean AssembleInstr(GLuint uiFirstInst,
return GL_FALSE;
break;
case OPCODE_SIN:
- if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_SIN) )
- return GL_FALSE;
+ if(8 == pR700AsmCode->unAsic)
+ {
+ if ( GL_FALSE == assemble_TRIG(pR700AsmCode, EG_OP2_INST_SIN) )
+ return GL_FALSE;
+ }
+ else
+ {
+ if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_SIN) )
+ return GL_FALSE;
+ }
break;
case OPCODE_SCS:
if ( GL_FALSE == assemble_SCS(pR700AsmCode) )
@@ -5872,6 +7072,13 @@ GLboolean AssembleInstr(GLuint uiFirstInst,
// return GL_FALSE;
// break;
+ case OPCODE_SSG:
+ if ( GL_FALSE == assemble_SSG(pR700AsmCode) )
+ {
+ return GL_FALSE;
+ }
+ break;
+
case OPCODE_SWZ:
if ( GL_FALSE == assemble_MOV(pR700AsmCode) )
{
@@ -6006,7 +7213,7 @@ GLboolean AssembleInstr(GLuint uiFirstInst,
return GL_TRUE;
default:
- radeon_error("internal: unknown instruction\n");
+ radeon_error("r600: unknown instruction %d\n", pILInst[i].Opcode);
return GL_FALSE;
}
}
@@ -6016,7 +7223,15 @@ GLboolean AssembleInstr(GLuint uiFirstInst,
GLboolean InitShaderProgram(r700_AssemblerBase * pAsm)
{
+#ifndef GENERATE_SHADER_FOR_2D
setRetInLoopFlag(pAsm, SQ_SEL_0);
+#endif
+
+ if((SPT_FP == pAsm->currentShaderType) && (8 == pAsm->unAsic))
+ {
+ EG_add_ps_interp(pAsm);
+ }
+
pAsm->alu_x_opcode = SQ_CF_INST_ALU;
return GL_TRUE;
}
@@ -6039,6 +7254,7 @@ GLboolean RelocProgram(r700_AssemblerBase * pAsm, struct gl_program * pILProg)
plstCFmain = pAsm->CALLSTACK[0].plstCFInstructions_local;
+#ifndef GENERATE_SHADER_FOR_2D
/* remove flags init if they are not used */
if((pAsm->unCFflags & HAS_LOOPRET) == 0)
{
@@ -6069,6 +7285,7 @@ GLboolean RelocProgram(r700_AssemblerBase * pAsm, struct gl_program * pILProg)
pInst = pInst->pNextInst;
};
}
+#endif /* GENERATE_SHADER_FOR_2D */
if(pAsm->CALLSTACK[0].max > 0)
{
@@ -6175,13 +7392,20 @@ GLboolean RelocProgram(r700_AssemblerBase * pAsm, struct gl_program * pILProg)
}
else
{
- if(pAsm->bR6xx)
+ if(8 == pAsm->unAsic)
{
- uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f6.alu_inst, 0);
+ uNumSrc = EG_GetNumOperands(pALU->m_Word1_OP2.f.alu_inst, 0);
}
else
{
- uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f.alu_inst, 0);
+ if(pAsm->bR6xx)
+ {
+ uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f6.alu_inst, 0);
+ }
+ else
+ {
+ uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f.alu_inst, 0);
+ }
}
if(2 == uNumSrc)
{ /* 2 srcs */
@@ -6472,12 +7696,42 @@ GLboolean Process_Export(r700_AssemblerBase* pAsm,
pAsm->cf_current_export_clause_ptr->m_Word0.f.index_gpr = 0x0;
pAsm->cf_current_export_clause_ptr->m_Word0.f.elem_size = 0x3;
- pAsm->cf_current_export_clause_ptr->m_Word1.f.burst_count = (export_count - 1);
- pAsm->cf_current_export_clause_ptr->m_Word1.f.end_of_program = 0x0;
- pAsm->cf_current_export_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
- pAsm->cf_current_export_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT; // _DONE
- pAsm->cf_current_export_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
- pAsm->cf_current_export_clause_ptr->m_Word1.f.barrier = 0x1;
+ if(8 == pAsm->unAsic)
+ {
+ SETfield(pAsm->cf_current_export_clause_ptr->m_Word1.val,
+ (export_count - 1),
+ EG_CF_ALLOC_EXPORT_WORD1__BURST_COUNT_shift,
+ EG_CF_ALLOC_EXPORT_WORD1__BURST_COUNT_mask);
+ SETfield(pAsm->cf_current_export_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_ALLOC_EXPORT_WORD1__EOP_shift,
+ EG_CF_ALLOC_EXPORT_WORD1__EOP_bit);
+ SETfield(pAsm->cf_current_export_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_ALLOC_EXPORT_WORD1__VPM_shift,
+ EG_CF_ALLOC_EXPORT_WORD1__VPM_bit);
+ SETfield(pAsm->cf_current_export_clause_ptr->m_Word1.val,
+ EG_CF_INST_EXPORT,
+ EG_CF_WORD1__CF_INST_shift,
+ EG_CF_WORD1__CF_INST_mask);
+ SETfield(pAsm->cf_current_export_clause_ptr->m_Word1.val,
+ 0,
+ EG_CF_ALLOC_EXPORT_WORD1__MARK_shift,
+ EG_CF_ALLOC_EXPORT_WORD1__MARK_bit);
+ SETfield(pAsm->cf_current_export_clause_ptr->m_Word1.val,
+ 1,
+ EG_CF_ALLOC_EXPORT_WORD1__BARRIER_shift,
+ EG_CF_ALLOC_EXPORT_WORD1__BARRIER_bit);
+ }
+ else
+ {
+ pAsm->cf_current_export_clause_ptr->m_Word1.f.burst_count = (export_count - 1);
+ pAsm->cf_current_export_clause_ptr->m_Word1.f.end_of_program = 0x0;
+ pAsm->cf_current_export_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
+ pAsm->cf_current_export_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT; // _DONE
+ pAsm->cf_current_export_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
+ pAsm->cf_current_export_clause_ptr->m_Word1.f.barrier = 0x1;
+ }
if (export_count == 1)
{
@@ -6605,8 +7859,22 @@ GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode,
if(pR700AsmCode->cf_last_export_ptr != NULL)
{
- pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
- pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
+ if(8 == pR700AsmCode->unAsic)
+ {
+ SETfield(pR700AsmCode->cf_last_export_ptr->m_Word1.val,
+ 1,
+ EG_CF_ALLOC_EXPORT_WORD1__EOP_shift,
+ EG_CF_ALLOC_EXPORT_WORD1__EOP_bit);
+ SETfield(pR700AsmCode->cf_last_export_ptr->m_Word1.val,
+ EG_CF_INST_EXPORT_DONE,
+ EG_CF_WORD1__CF_INST_shift,
+ EG_CF_WORD1__CF_INST_mask);
+ }
+ else
+ {
+ pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
+ pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
+ }
}
return GL_TRUE;
@@ -6652,7 +7920,17 @@ GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode,
export_count--;
}
- pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
+ if(8 == pR700AsmCode->unAsic)
+ {
+ SETfield(pR700AsmCode->cf_last_export_ptr->m_Word1.val,
+ EG_CF_INST_EXPORT_DONE,
+ EG_CF_WORD1__CF_INST_shift,
+ EG_CF_WORD1__CF_INST_mask);
+ }
+ else
+ {
+ pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
+ }
pR700AsmCode->number_of_exports = export_count;
@@ -6747,7 +8025,17 @@ GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode,
// At least one param should be exported
if (export_count)
{
- pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
+ if(8 == pR700AsmCode->unAsic)
+ {
+ SETfield(pR700AsmCode->cf_last_export_ptr->m_Word1.val,
+ EG_CF_INST_EXPORT_DONE,
+ EG_CF_WORD1__CF_INST_shift,
+ EG_CF_WORD1__CF_INST_mask);
+ }
+ else
+ {
+ pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
+ }
}
else
{
@@ -6765,7 +8053,17 @@ GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode,
pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_0;
pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_0;
pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_1;
- pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
+ if(8 == pR700AsmCode->unAsic)
+ {
+ SETfield(pR700AsmCode->cf_last_export_ptr->m_Word1.val,
+ EG_CF_INST_EXPORT_DONE,
+ EG_CF_WORD1__CF_INST_shift,
+ EG_CF_WORD1__CF_INST_mask);
+ }
+ else
+ {
+ pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
+ }
}
pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
diff --git a/src/mesa/drivers/dri/r600/r700_assembler.h b/src/mesa/drivers/dri/r600/r700_assembler.h
index dbc6cdb190..d357b0e3ec 100644
--- a/src/mesa/drivers/dri/r600/r700_assembler.h
+++ b/src/mesa/drivers/dri/r600/r700_assembler.h
@@ -108,7 +108,7 @@ typedef enum AddressMode
typedef enum SrcRegisterType
{
SRC_REG_TEMPORARY = 0,
- SRC_REG_INPUT = 1,
+ SRC_REG_GPR = 1,
SRC_REG_CONSTANT = 2,
SRC_REG_ALT_TEMPORARY = 3,
SRC_REC_LITERAL = 4,
@@ -464,6 +464,10 @@ typedef struct r700_AssemblerBase
GLuint uiCurInst;
GLubyte SamplerUnits[MAX_SAMPLERS];
GLboolean bR6xx;
+
+ /* TODO : merge bR6xx */
+ GLuint unAsic;
+
/* helper to decide which type of instruction to assemble */
GLboolean is_tex;
/* we inserted helper intructions and need barrier on next TEX ins */
@@ -489,6 +493,9 @@ typedef struct r700_AssemblerBase
GLuint shadow_regs[R700_MAX_TEXTURE_UNITS];
+ GLboolean bUseMemConstant;
+ GLuint kcacheUsed;
+
} r700_AssemblerBase;
//Internal use
@@ -512,6 +519,8 @@ GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size);
unsigned int r700GetNumOperands(GLuint opcode, GLuint nIsOp3);
+unsigned int EG_GetNumOperands(GLuint opcode, GLuint nIsOp3);
+
GLboolean IsTex(gl_inst_opcode Opcode);
GLboolean IsAlu(gl_inst_opcode Opcode);
int check_current_clause(r700_AssemblerBase* pAsm,
@@ -535,6 +544,18 @@ GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm,
GLboolean normalize,
GLenum format,
VTX_FETCH_METHOD * pFetchMethod);
+
+GLboolean EG_assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
+ GLuint destination_register,
+ GLenum type,
+ GLint size,
+ GLubyte element,
+ GLuint _signed,
+ GLboolean normalize,
+ GLenum format,
+ VTX_FETCH_METHOD * pFetchMethod);
+//-----------------------
+
GLboolean cleanup_vfetch_instructions(r700_AssemblerBase* pAsm);
GLuint gethelpr(r700_AssemblerBase* pAsm);
void resethelpr(r700_AssemblerBase* pAsm);
@@ -553,8 +574,10 @@ GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalize
void initialize(r700_AssemblerBase *pAsm);
GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr,
int source_index,
- PVSSRC* pSource,
- BITS scalar_channel_index);
+ PVSSRC* pSource,
+ BITS scalar_channel_index,
+ r700_AssemblerBase *pAsm);
+
GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
R700ALUInstruction* alu_instruction_ptr,
GLuint contiguous_slots_needed);
@@ -625,6 +648,7 @@ GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode);
GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode);
GLboolean assemble_SLT(r700_AssemblerBase *pAsm);
+GLboolean assemble_SSG(r700_AssemblerBase *pAsm);
GLboolean assemble_STP(r700_AssemblerBase *pAsm);
GLboolean assemble_TEX(r700_AssemblerBase *pAsm);
GLboolean assemble_XPD(r700_AssemblerBase *pAsm);
@@ -663,6 +687,7 @@ GLboolean callPreSub(r700_AssemblerBase* pAsm,
COMPILED_SUB * pCompiledSub,
GLshort uOutReg,
GLshort uNumValidSrc);
+GLboolean EG_add_ps_interp(r700_AssemblerBase* pAsm);
//Interface
GLboolean AssembleInstr(GLuint uiFirstInst,
diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c
index bf8063391a..71f1af7562 100644
--- a/src/mesa/drivers/dri/r600/r700_chip.c
+++ b/src/mesa/drivers/dri/r600/r700_chip.c
@@ -173,7 +173,6 @@ static void r700SetupVTXConstants(GLcontext * ctx,
{
context_t *context = R700_CONTEXT(ctx);
struct radeon_aos * paos = (struct radeon_aos *)pAos;
- unsigned int nVBsize;
BATCH_LOCALS(&context->radeon);
unsigned int uSQ_VTX_CONSTANT_WORD0_0;
@@ -194,18 +193,8 @@ static void r700SetupVTXConstants(GLcontext * ctx,
else
r700SyncSurf(context, paos->bo, RADEON_GEM_DOMAIN_GTT, 0, VC_ACTION_ENA_bit);
- if(0 == pStreamDesc->stride)
- {
- nVBsize = paos->count * pStreamDesc->size * getTypeSize(pStreamDesc->type);
- }
- else
- {
- nVBsize = (paos->count - 1) * pStreamDesc->stride
- + pStreamDesc->size * getTypeSize(pStreamDesc->type);
- }
-
uSQ_VTX_CONSTANT_WORD0_0 = paos->offset;
- uSQ_VTX_CONSTANT_WORD1_0 = nVBsize - 1;
+ uSQ_VTX_CONSTANT_WORD1_0 = paos->bo->size - paos->offset - 1;
SETfield(uSQ_VTX_CONSTANT_WORD2_0, 0, BASE_ADDRESS_HI_shift, BASE_ADDRESS_HI_mask); /* TODO */
SETfield(uSQ_VTX_CONSTANT_WORD2_0, pStreamDesc->stride, SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift,
@@ -721,6 +710,7 @@ static void r700SendPSState(GLcontext *ctx, struct radeon_state_atom *atom)
context_t *context = R700_CONTEXT(ctx);
R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
struct radeon_bo * pbo;
+ struct radeon_bo * pbo_const;
BATCH_LOCALS(&context->radeon);
radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
@@ -750,6 +740,9 @@ static void r700SendPSState(GLcontext *ctx, struct radeon_state_atom *atom)
R600_OUT_BATCH_REGVAL(SQ_LOOP_CONST_0, 0x01000FFF);
END_BATCH();
+ pbo_const = (struct radeon_bo *)r700GetActiveFpShaderConstBo(GL_CONTEXT(context));
+ //TODO : set up shader const
+
COMMIT_BATCH();
}
@@ -759,13 +752,14 @@ static void r700SendVSState(GLcontext *ctx, struct radeon_state_atom *atom)
context_t *context = R700_CONTEXT(ctx);
R700_CHIP_CONTEXT *r700 = R700_CONTEXT_STATES(context);
struct radeon_bo * pbo;
+ struct radeon_bo * pbo_const;
BATCH_LOCALS(&context->radeon);
radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
pbo = (struct radeon_bo *)r700GetActiveVpShaderBo(GL_CONTEXT(context));
if (!pbo)
- return;
+ return;
r700SyncSurf(context, pbo, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit);
@@ -788,6 +782,29 @@ static void r700SendVSState(GLcontext *ctx, struct radeon_state_atom *atom)
//R600_OUT_BATCH_REGVAL((SQ_LOOP_CONST_0 + (SQ_LOOP_CONST_vs<2)), 0x0100000F);
END_BATCH();
+ /* TODO : handle 4 bufs */
+ if(GL_TRUE == r700->bShaderUseMemConstant)
+ {
+ pbo_const = (struct radeon_bo *)r700GetActiveVpShaderConstBo(GL_CONTEXT(context));
+ if(NULL != pbo_const)
+ {
+ r700SyncSurf(context, pbo_const, RADEON_GEM_DOMAIN_GTT, 0, SH_ACTION_ENA_bit); /* TODO : Check kc bit. */
+
+ BEGIN_BATCH_NO_AUTOSTATE(3);
+ R600_OUT_BATCH_REGVAL(SQ_ALU_CONST_BUFFER_SIZE_VS_0, (r700->vs.num_consts * 4)/16 );
+ END_BATCH();
+
+ BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+ R600_OUT_BATCH_REGSEQ(SQ_ALU_CONST_CACHE_VS_0, 1);
+ R600_OUT_BATCH(r700->vs.SQ_ALU_CONST_CACHE_VS_0.u32All);
+ R600_OUT_BATCH_RELOC(r700->vs.SQ_ALU_CONST_CACHE_VS_0.u32All,
+ pbo_const,
+ r700->vs.SQ_ALU_CONST_CACHE_VS_0.u32All,
+ RADEON_GEM_DOMAIN_GTT, 0, 0);
+ END_BATCH();
+ }
+ }
+
COMMIT_BATCH();
}
@@ -1558,45 +1575,55 @@ static void r600_init_query_stateobj(radeonContextPtr radeon, int SZ)
void r600InitAtoms(context_t *context)
{
- radeon_print(RADEON_STATE, RADEON_NORMAL, "%s %p\n", __func__, context);
- context->radeon.hw.max_state_size = 10 + 5 + 14; /* start 3d, idle, cb/db flush */
-
- /* Setup the atom linked list */
- make_empty_list(&context->radeon.hw.atomlist);
- context->radeon.hw.atomlist.name = "atom-list";
-
- ALLOC_STATE(sq, always, 34, r700SendSQConfig);
- ALLOC_STATE(db, always, 17, r700SendDBState);
- ALLOC_STATE(stencil, always, 4, r700SendStencilState);
- ALLOC_STATE(db_target, always, 16, r700SendDepthTargetState);
- ALLOC_STATE(sc, always, 15, r700SendSCState);
- ALLOC_STATE(scissor, always, 22, r700SendScissorState);
- ALLOC_STATE(aa, always, 12, r700SendAAState);
- ALLOC_STATE(cl, always, 12, r700SendCLState);
- ALLOC_STATE(gb, always, 6, r700SendGBState);
- ALLOC_STATE(ucp, ucp, (R700_MAX_UCP * 6), r700SendUCPState);
- ALLOC_STATE(su, always, 9, r700SendSUState);
- ALLOC_STATE(poly, always, 10, r700SendPolyState);
- ALLOC_STATE(cb, cb, 18, r700SendCBState);
- ALLOC_STATE(clrcmp, always, 6, r700SendCBCLRCMPState);
- ALLOC_STATE(cb_target, always, 31, r700SendRenderTargetState);
- ALLOC_STATE(blnd, blnd, (6 + (R700_MAX_RENDER_TARGETS * 3)), r700SendCBBlendState);
- ALLOC_STATE(blnd_clr, always, 6, r700SendCBBlendColorState);
- ALLOC_STATE(sx, always, 9, r700SendSXState);
- ALLOC_STATE(vgt, always, 41, r700SendVGTState);
- ALLOC_STATE(spi, always, (59 + R700_MAX_SHADER_EXPORTS), r700SendSPIState);
- ALLOC_STATE(vpt, always, 16, r700SendViewportState);
- ALLOC_STATE(fs, always, 18, r700SendFSState);
- ALLOC_STATE(vs, always, 21, r700SendVSState);
- ALLOC_STATE(ps, always, 24, r700SendPSState);
- ALLOC_STATE(vs_consts, vs_consts, (2 + (R700_MAX_DX9_CONSTS * 4)), r700SendVSConsts);
- ALLOC_STATE(ps_consts, ps_consts, (2 + (R700_MAX_DX9_CONSTS * 4)), r700SendPSConsts);
- ALLOC_STATE(vtx, vtx, (VERT_ATTRIB_MAX * 18), r700SendVTXState);
- ALLOC_STATE(tx, tx, (R700_TEXTURE_NUMBERUNITS * 20), r700SendTexState);
- ALLOC_STATE(tx_smplr, tx, (R700_TEXTURE_NUMBERUNITS * 5), r700SendTexSamplerState);
- ALLOC_STATE(tx_brdr_clr, tx, (R700_TEXTURE_NUMBERUNITS * 6), r700SendTexBorderColorState);
- r600_init_query_stateobj(&context->radeon, 6 * 2);
-
- context->radeon.hw.is_dirty = GL_TRUE;
- context->radeon.hw.all_dirty = GL_TRUE;
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+ radeon_print(RADEON_STATE, RADEON_NORMAL, "%s %p\n", __func__, context);
+ context->radeon.hw.max_state_size = 10 + 5 + 14; /* start 3d, idle, cb/db flush */
+
+ /* Setup the atom linked list */
+ make_empty_list(&context->radeon.hw.atomlist);
+ context->radeon.hw.atomlist.name = "atom-list";
+
+ ALLOC_STATE(sq, always, 34, r700SendSQConfig);
+ ALLOC_STATE(db, always, 17, r700SendDBState);
+ ALLOC_STATE(stencil, always, 4, r700SendStencilState);
+ ALLOC_STATE(db_target, always, 16, r700SendDepthTargetState);
+ ALLOC_STATE(sc, always, 15, r700SendSCState);
+ ALLOC_STATE(scissor, always, 22, r700SendScissorState);
+ ALLOC_STATE(aa, always, 12, r700SendAAState);
+ ALLOC_STATE(cl, always, 12, r700SendCLState);
+ ALLOC_STATE(gb, always, 6, r700SendGBState);
+ ALLOC_STATE(ucp, ucp, (R700_MAX_UCP * 6), r700SendUCPState);
+ ALLOC_STATE(su, always, 9, r700SendSUState);
+ ALLOC_STATE(poly, always, 10, r700SendPolyState);
+ ALLOC_STATE(cb, cb, 18, r700SendCBState);
+ ALLOC_STATE(clrcmp, always, 6, r700SendCBCLRCMPState);
+ ALLOC_STATE(cb_target, always, 31, r700SendRenderTargetState);
+ ALLOC_STATE(blnd, blnd, (6 + (R700_MAX_RENDER_TARGETS * 3)), r700SendCBBlendState);
+ ALLOC_STATE(blnd_clr, always, 6, r700SendCBBlendColorState);
+ ALLOC_STATE(sx, always, 9, r700SendSXState);
+ ALLOC_STATE(vgt, always, 41, r700SendVGTState);
+ ALLOC_STATE(spi, always, (59 + R700_MAX_SHADER_EXPORTS), r700SendSPIState);
+ ALLOC_STATE(vpt, always, 16, r700SendViewportState);
+ ALLOC_STATE(fs, always, 18, r700SendFSState);
+ if(GL_TRUE == r700->bShaderUseMemConstant)
+ {
+ ALLOC_STATE(vs, always, 36, r700SendVSState);
+ ALLOC_STATE(ps, always, 24, r700SendPSState); /* TODO : not imp yet, fix later. */
+ }
+ else
+ {
+ ALLOC_STATE(vs, always, 21, r700SendVSState);
+ ALLOC_STATE(ps, always, 24, r700SendPSState);
+ ALLOC_STATE(vs_consts, vs_consts, (2 + (R700_MAX_DX9_CONSTS * 4)), r700SendVSConsts);
+ ALLOC_STATE(ps_consts, ps_consts, (2 + (R700_MAX_DX9_CONSTS * 4)), r700SendPSConsts);
+ }
+
+ ALLOC_STATE(vtx, vtx, (VERT_ATTRIB_MAX * 18), r700SendVTXState);
+ ALLOC_STATE(tx, tx, (R700_TEXTURE_NUMBERUNITS * 20), r700SendTexState);
+ ALLOC_STATE(tx_smplr, tx, (R700_TEXTURE_NUMBERUNITS * 5), r700SendTexSamplerState);
+ ALLOC_STATE(tx_brdr_clr, tx, (R700_TEXTURE_NUMBERUNITS * 6), r700SendTexBorderColorState);
+ r600_init_query_stateobj(&context->radeon, 6 * 2);
+
+ context->radeon.hw.is_dirty = GL_TRUE;
+ context->radeon.hw.all_dirty = GL_TRUE;
}
diff --git a/src/mesa/drivers/dri/r600/r700_chip.h b/src/mesa/drivers/dri/r600/r700_chip.h
index 0b6b72f850..ebf1840a79 100644
--- a/src/mesa/drivers/dri/r600/r700_chip.h
+++ b/src/mesa/drivers/dri/r600/r700_chip.h
@@ -43,6 +43,8 @@
#define SETbit(x, bit) ( (x) |= (bit) )
#define CLEARbit(x, bit) ( (x) &= ~(bit) )
+#define GETbits(x, shift, mask) ( ((x) & (mask)) >> (shift) )
+
#define R700_TEXTURE_NUMBERUNITS 16
#define R700_MAX_RENDER_TARGETS 8
#define R700_MAX_VIEWPORTS 16
@@ -238,6 +240,9 @@ typedef struct _VS_STATE_STRUCT
union UINT_FLOAT SQ_PGM_CF_OFFSET_VS ; /* 0xA234 */
GLboolean dirty;
int num_consts;
+
+ union UINT_FLOAT SQ_ALU_CONST_CACHE_VS_0;
+
union UINT_FLOAT consts[R700_MAX_DX9_CONSTS][4];
} VS_STATE_STRUCT;
@@ -499,6 +504,8 @@ typedef struct _R700_CHIP_CONTEXT
GLboolean bEnablePerspective;
+ GLboolean bShaderUseMemConstant;
+
} R700_CHIP_CONTEXT;
#endif /* _R700_CHIP_H_ */
diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.c b/src/mesa/drivers/dri/r600/r700_fragprog.c
index f9d84b6ed6..6fdd93a330 100644
--- a/src/mesa/drivers/dri/r600/r700_fragprog.c
+++ b/src/mesa/drivers/dri/r600/r700_fragprog.c
@@ -362,6 +362,9 @@ GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp,
struct gl_fragment_program *mesa_fp,
GLcontext *ctx)
{
+ context_t *context = R700_CONTEXT(ctx);
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+
GLuint number_of_colors_exported;
GLboolean z_enabled = GL_FALSE;
GLuint unBit, shadow_unit;
@@ -373,6 +376,17 @@ GLboolean r700TranslateFragmentShader(struct r700_fragment_program *fp,
//Init_Program
Init_r700_AssemblerBase( SPT_FP, &(fp->r700AsmCode), &(fp->r700Shader) );
+ if(GL_TRUE == r700->bShaderUseMemConstant)
+ {
+ fp->r700AsmCode.bUseMemConstant = GL_TRUE;
+ }
+ else
+ {
+ fp->r700AsmCode.bUseMemConstant = GL_FALSE;
+ }
+
+ fp->r700AsmCode.unAsic = 7;
+
if(mesa_fp->Base.InputsRead & FRAG_BIT_WPOS)
{
insert_wpos_code(ctx, mesa_fp);
@@ -481,6 +495,14 @@ void * r700GetActiveFpShaderBo(GLcontext * ctx)
return fp->shaderbo;
}
+void * r700GetActiveFpShaderConstBo(GLcontext * ctx)
+{
+ struct r700_fragment_program *fp = (struct r700_fragment_program *)
+ (ctx->FragmentProgram._Current);
+
+ return fp->constbo0;
+}
+
GLboolean r700SetupFragmentProgram(GLcontext * ctx)
{
context_t *context = R700_CONTEXT(ctx);
@@ -768,6 +790,17 @@ GLboolean r700SetupFragmentProgram(GLcontext * ctx)
r700->ps.consts[ui][2].f32All = paramList->ParameterValues[ui][2];
r700->ps.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
}
+
+ /* Load fp constants to gpu */
+ if( (GL_TRUE == r700->bShaderUseMemConstant) && (unNumParamData > 0) )
+ {
+ r600EmitShader(ctx,
+ &(fp->constbo0),
+ (GLvoid *)&(paramList->ParameterValues[0][0]),
+ unNumParamData * 4,
+ "FS Const");
+ }
+
} else
r700->ps.num_consts = 0;
diff --git a/src/mesa/drivers/dri/r600/r700_fragprog.h b/src/mesa/drivers/dri/r600/r700_fragprog.h
index 39c59c9201..aaa6043d5d 100644
--- a/src/mesa/drivers/dri/r600/r700_fragprog.h
+++ b/src/mesa/drivers/dri/r600/r700_fragprog.h
@@ -43,6 +43,9 @@ struct r700_fragment_program
void * shaderbo;
+ GLuint k0used;
+ void * constbo0;
+
GLboolean WritesDepth;
GLuint optimization;
};
@@ -67,4 +70,6 @@ extern GLboolean r700SetupFragmentProgram(GLcontext * ctx);
extern void * r700GetActiveFpShaderBo(GLcontext * ctx);
+extern void * r700GetActiveFpShaderConstBo(GLcontext * ctx);
+
#endif /*_R700_FRAGPROG_H_*/
diff --git a/src/mesa/drivers/dri/r600/r700_oglprog.c b/src/mesa/drivers/dri/r600/r700_oglprog.c
index 8351792511..e0c9179004 100644
--- a/src/mesa/drivers/dri/r600/r700_oglprog.c
+++ b/src/mesa/drivers/dri/r600/r700_oglprog.c
@@ -48,6 +48,12 @@ static void freeVertProgCache(GLcontext *ctx, struct r700_vertex_program_cont *c
tmp = vp->next;
/* Release DMA region */
r600DeleteShader(ctx, vp->shaderbo);
+
+ if(NULL != vp->constbo0)
+ {
+ r600DeleteShader(ctx, vp->constbo0);
+ }
+
/* Clean up */
Clean_Up_Assembler(&(vp->r700AsmCode));
Clean_Up_Shader(&(vp->r700Shader));
@@ -79,6 +85,7 @@ static struct gl_program *r700NewProgram(GLcontext * ctx,
&vpc->mesa_program,
target,
id);
+
break;
case GL_FRAGMENT_PROGRAM_NV:
case GL_FRAGMENT_PROGRAM_ARB:
@@ -92,6 +99,8 @@ static struct gl_program *r700NewProgram(GLcontext * ctx,
fp->shaderbo = NULL;
+ fp->constbo0 = NULL;
+
break;
default:
_mesa_problem(ctx, "Bad target in r700NewProgram");
@@ -121,6 +130,11 @@ static void r700DeleteProgram(GLcontext * ctx, struct gl_program *prog)
r600DeleteShader(ctx, fp->shaderbo);
+ if(NULL != fp->constbo0)
+ {
+ r600DeleteShader(ctx, fp->constbo0);
+ }
+
/* Clean up */
Clean_Up_Assembler(&(fp->r700AsmCode));
Clean_Up_Shader(&(fp->r700Shader));
@@ -145,6 +159,13 @@ r700ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog)
break;
case GL_FRAGMENT_PROGRAM_ARB:
r600DeleteShader(ctx, fp->shaderbo);
+
+ if(NULL != fp->constbo0)
+ {
+ r600DeleteShader(ctx, fp->constbo0);
+ fp->constbo0 = NULL;
+ }
+
Clean_Up_Assembler(&(fp->r700AsmCode));
Clean_Up_Shader(&(fp->r700Shader));
fp->translated = GL_FALSE;
diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c
index c5771f9fd0..f90c69c416 100644
--- a/src/mesa/drivers/dri/r600/r700_render.c
+++ b/src/mesa/drivers/dri/r600/r700_render.c
@@ -644,6 +644,7 @@ static void r700SetupStreams(GLcontext *ctx, const struct gl_client_array *input
#endif
)
{
+ assert(count);
r700ConvertAttrib(ctx, count, input[i], &context->stream_desc[index]);
}
else
diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c
index 5ea8918611..925b4ffe6d 100644
--- a/src/mesa/drivers/dri/r600/r700_state.c
+++ b/src/mesa/drivers/dri/r600/r700_state.c
@@ -1580,7 +1580,16 @@ static void r700InitSQConfig(GLcontext * ctx)
CLEARbit(r700->sq_config.SQ_CONFIG.u32All, VC_ENABLE_bit);
else
SETbit(r700->sq_config.SQ_CONFIG.u32All, VC_ENABLE_bit);
- SETbit(r700->sq_config.SQ_CONFIG.u32All, DX9_CONSTS_bit);
+
+ if(GL_TRUE == r700->bShaderUseMemConstant)
+ {
+ CLEARbit(r700->sq_config.SQ_CONFIG.u32All, DX9_CONSTS_bit);
+ }
+ else
+ {
+ SETbit(r700->sq_config.SQ_CONFIG.u32All, DX9_CONSTS_bit);
+ }
+
SETbit(r700->sq_config.SQ_CONFIG.u32All, ALU_INST_PREFER_VECTOR_bit);
SETfield(r700->sq_config.SQ_CONFIG.u32All, ps_prio, PS_PRIO_shift, PS_PRIO_mask);
SETfield(r700->sq_config.SQ_CONFIG.u32All, vs_prio, VS_PRIO_shift, VS_PRIO_mask);
@@ -1689,8 +1698,9 @@ void r700InitState(GLcontext * ctx) //-------------------
SETbit(r700->PA_SC_MODE_CNTL.u32All, FORCE_EOV_CNTDWN_ENABLE_bit);
}
- /* Do scale XY and Z by 1/W0. */
- r700->bEnablePerspective = GL_TRUE;
+ /* Do scale XY and Z by 1/W0. */
+ r700->bEnablePerspective = GL_TRUE;
+
CLEARbit(r700->PA_CL_VTE_CNTL.u32All, VTX_XY_FMT_bit);
CLEARbit(r700->PA_CL_VTE_CNTL.u32All, VTX_Z_FMT_bit);
SETbit(r700->PA_CL_VTE_CNTL.u32All, VTX_W0_FMT_bit);
diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c
index 6a2a09eaf1..7ed4b7d238 100644
--- a/src/mesa/drivers/dri/r600/r700_vertprog.c
+++ b/src/mesa/drivers/dri/r600/r700_vertprog.c
@@ -305,12 +305,17 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx,
struct gl_vertex_program *mesa_vp)
{
context_t *context = R700_CONTEXT(ctx);
+
+ R700_CHIP_CONTEXT *r700 = (R700_CHIP_CONTEXT*)(&context->hw);
+
struct r700_vertex_program *vp;
unsigned int i;
vp = calloc(1, sizeof(*vp));
vp->mesa_program = _mesa_clone_vertex_program(ctx, mesa_vp);
+ vp->constbo0 = NULL;
+
if (mesa_vp->IsPositionInvariant)
{
_mesa_insert_mvp_code(ctx, vp->mesa_program);
@@ -331,6 +336,18 @@ struct r700_vertex_program* r700TranslateVertexShader(GLcontext *ctx,
//Init_Program
Init_r700_AssemblerBase(SPT_VP, &(vp->r700AsmCode), &(vp->r700Shader) );
+
+ if(GL_TRUE == r700->bShaderUseMemConstant)
+ {
+ vp->r700AsmCode.bUseMemConstant = GL_TRUE;
+ }
+ else
+ {
+ vp->r700AsmCode.bUseMemConstant = GL_FALSE;
+ }
+
+ vp->r700AsmCode.unAsic = 7;
+
Map_Vertex_Program(ctx, vp, vp->mesa_program );
if(GL_FALSE == Find_Instruction_Dependencies_vp(vp, vp->mesa_program))
@@ -576,6 +593,17 @@ void * r700GetActiveVpShaderBo(GLcontext * ctx)
return NULL;
}
+void * r700GetActiveVpShaderConstBo(GLcontext * ctx)
+{
+ context_t *context = R700_CONTEXT(ctx);
+ struct r700_vertex_program *vp = context->selected_vp;;
+
+ if (vp)
+ return vp->constbo0;
+ else
+ return NULL;
+}
+
GLboolean r700SetupVertexProgram(GLcontext * ctx)
{
context_t *context = R700_CONTEXT(ctx);
@@ -600,6 +628,19 @@ GLboolean r700SetupVertexProgram(GLcontext * ctx)
vp->r700Shader.uShaderBinaryDWORDSize,
"VS");
+ if(GL_TRUE == r700->bShaderUseMemConstant)
+ {
+ paramList = vp->mesa_program->Base.Parameters;
+ if(NULL != paramList)
+ {
+ unNumParamData = paramList->NumParameters;
+ r600AllocShaderConsts(ctx,
+ &(vp->constbo0),
+ unNumParamData *4*4,
+ "VSCON");
+ }
+ }
+
vp->loaded = GL_TRUE;
}
@@ -616,7 +657,9 @@ GLboolean r700SetupVertexProgram(GLcontext * ctx)
r700->vs.SQ_PGM_RESOURCES_VS.u32All = 0;
SETbit(r700->vs.SQ_PGM_RESOURCES_VS.u32All, PGM_RESOURCES__PRIME_CACHE_ON_DRAW_bit);
- r700->vs.SQ_PGM_START_VS.u32All = 0; /* set from buffer object. */
+ r700->vs.SQ_ALU_CONST_CACHE_VS_0.u32All = 0; /* set from buffer object. */
+
+ r700->vs.SQ_PGM_START_VS.u32All = 0;
SETfield(r700->vs.SQ_PGM_RESOURCES_VS.u32All, vp->r700Shader.nRegs + 1,
NUM_GPRS_shift, NUM_GPRS_mask);
@@ -687,6 +730,16 @@ GLboolean r700SetupVertexProgram(GLcontext * ctx)
r700->vs.consts[ui][3].f32All = paramList->ParameterValues[ui][3];
}
}
+
+ /* Load vp constants to gpu */
+ if(GL_TRUE == r700->bShaderUseMemConstant)
+ {
+ r600EmitShaderConsts(ctx,
+ vp->constbo0,
+ 0,
+ (GLvoid *)&(r700->vs.consts[0][0]),
+ unNumParamData * 4 * 4);
+ }
} else
r700->vs.num_consts = 0;
diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.h b/src/mesa/drivers/dri/r600/r700_vertprog.h
index 645c9ac84a..9acdc8e350 100644
--- a/src/mesa/drivers/dri/r600/r700_vertprog.h
+++ b/src/mesa/drivers/dri/r600/r700_vertprog.h
@@ -56,6 +56,9 @@ struct r700_vertex_program
void * shaderbo;
+ GLuint K0used;
+ void * constbo0;
+
ArrayDesc aos_desc[VERT_ATTRIB_MAX];
};
@@ -97,6 +100,8 @@ extern GLboolean r700SetupVertexProgram(GLcontext * ctx);
extern void * r700GetActiveVpShaderBo(GLcontext * ctx);
+extern void * r700GetActiveVpShaderConstBo(GLcontext * ctx);
+
extern int getTypeSize(GLenum type);
#endif /* _R700_VERTPROG_H_ */
diff --git a/src/mesa/drivers/dri/radeon/radeon_chipset.h b/src/mesa/drivers/dri/radeon/radeon_chipset.h
index 7d54fabebb..61106fbc43 100644
--- a/src/mesa/drivers/dri/radeon/radeon_chipset.h
+++ b/src/mesa/drivers/dri/radeon/radeon_chipset.h
@@ -400,6 +400,46 @@
#define PCI_CHIP_RV740_94B5 0x94B5
#define PCI_CHIP_RV740_94B9 0x94B9
+#define PCI_CHIP_CEDAR_68E0 0x68E0
+#define PCI_CHIP_CEDAR_68E1 0x68E1
+#define PCI_CHIP_CEDAR_68E4 0x68E4
+#define PCI_CHIP_CEDAR_68E5 0x68E5
+#define PCI_CHIP_CEDAR_68E8 0x68E8
+#define PCI_CHIP_CEDAR_68E9 0x68E9
+#define PCI_CHIP_CEDAR_68F1 0x68F1
+#define PCI_CHIP_CEDAR_68F8 0x68F8
+#define PCI_CHIP_CEDAR_68F9 0x68F9
+#define PCI_CHIP_CEDAR_68FE 0x68FE
+
+#define PCI_CHIP_REDWOOD_68C0 0x68C0
+#define PCI_CHIP_REDWOOD_68C1 0x68C1
+#define PCI_CHIP_REDWOOD_68C8 0x68C8
+#define PCI_CHIP_REDWOOD_68C9 0x68C9
+#define PCI_CHIP_REDWOOD_68D8 0x68D8
+#define PCI_CHIP_REDWOOD_68D9 0x68D9
+#define PCI_CHIP_REDWOOD_68DA 0x68DA
+#define PCI_CHIP_REDWOOD_68DE 0x68DE
+
+#define PCI_CHIP_JUNIPER_68A0 0x68A0
+#define PCI_CHIP_JUNIPER_68A1 0x68A1
+#define PCI_CHIP_JUNIPER_68A8 0x68A8
+#define PCI_CHIP_JUNIPER_68A9 0x68A9
+#define PCI_CHIP_JUNIPER_68B0 0x68B0
+#define PCI_CHIP_JUNIPER_68B8 0x68B8
+#define PCI_CHIP_JUNIPER_68B9 0x68B9
+#define PCI_CHIP_JUNIPER_68BE 0x68BE
+
+#define PCI_CHIP_CYPRESS_6880 0x6880
+#define PCI_CHIP_CYPRESS_6888 0x6888
+#define PCI_CHIP_CYPRESS_6889 0x6889
+#define PCI_CHIP_CYPRESS_688A 0x688A
+#define PCI_CHIP_CYPRESS_6898 0x6898
+#define PCI_CHIP_CYPRESS_6899 0x6899
+#define PCI_CHIP_CYPRESS_689E 0x689E
+
+#define PCI_CHIP_HEMLOCK_689C 0x689C
+#define PCI_CHIP_HEMLOCK_689D 0x689D
+
enum {
CHIP_FAMILY_R100,
CHIP_FAMILY_RV100,
@@ -438,6 +478,11 @@ enum {
CHIP_FAMILY_RV730,
CHIP_FAMILY_RV710,
CHIP_FAMILY_RV740,
+ CHIP_FAMILY_CEDAR,
+ CHIP_FAMILY_REDWOOD,
+ CHIP_FAMILY_JUNIPER,
+ CHIP_FAMILY_CYPRESS,
+ CHIP_FAMILY_HEMLOCK,
CHIP_FAMILY_LAST
};
diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c
index 92663bf66d..07f7cba354 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common_context.c
+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c
@@ -93,6 +93,11 @@ static const char* get_chip_family_name(int chip_family)
case CHIP_FAMILY_RV730: return "RV730";
case CHIP_FAMILY_RV710: return "RV710";
case CHIP_FAMILY_RV740: return "RV740";
+ case CHIP_FAMILY_CEDAR: return "CEDAR";
+ case CHIP_FAMILY_REDWOOD: return "REDWOOD";
+ case CHIP_FAMILY_JUNIPER: return "JUNIPER";
+ case CHIP_FAMILY_CYPRESS: return "CYPRESS";
+ case CHIP_FAMILY_HEMLOCK: return "HEMLOCK";
default: return "unknown";
}
}
diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h
index f06e5fdf24..024e31f8ec 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common_context.h
+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h
@@ -244,6 +244,8 @@ struct radeon_tex_obj {
GLuint SQ_TEX_RESOURCE5;
GLuint SQ_TEX_RESOURCE6;
+ GLuint SQ_TEX_RESOURCE7;
+
GLuint SQ_TEX_SAMPLER0;
GLuint SQ_TEX_SAMPLER1;
GLuint SQ_TEX_SAMPLER2;
diff --git a/src/mesa/drivers/dri/radeon/radeon_fbo.c b/src/mesa/drivers/dri/radeon/radeon_fbo.c
index 517485091a..0597d4250d 100644
--- a/src/mesa/drivers/dri/radeon/radeon_fbo.c
+++ b/src/mesa/drivers/dri/radeon/radeon_fbo.c
@@ -609,6 +609,7 @@ radeon_validate_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb)
void radeon_fbo_init(struct radeon_context *radeon)
{
+#if FEATURE_EXT_framebuffer_object
radeon->glCtx->Driver.NewFramebuffer = radeon_new_framebuffer;
radeon->glCtx->Driver.NewRenderbuffer = radeon_new_renderbuffer;
radeon->glCtx->Driver.BindFramebuffer = radeon_bind_framebuffer;
@@ -617,7 +618,10 @@ void radeon_fbo_init(struct radeon_context *radeon)
radeon->glCtx->Driver.FinishRenderTexture = radeon_finish_render_texture;
radeon->glCtx->Driver.ResizeBuffers = radeon_resize_buffers;
radeon->glCtx->Driver.ValidateFramebuffer = radeon_validate_framebuffer;
+#endif
+#if FEATURE_EXT_framebuffer_blit
radeon->glCtx->Driver.BlitFramebuffer = _mesa_meta_BlitFramebuffer;
+#endif
}
diff --git a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
index c6e5f110ea..ddfde3edaf 100644
--- a/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
+++ b/src/mesa/drivers/dri/radeon/radeon_mipmap_tree.c
@@ -199,10 +199,10 @@ static void calculate_miptree_layout_r300(radeonContextPtr rmesa, radeon_mipmap_
for(face = 0; face < mt->faces; face++)
compute_tex_image_offset(rmesa, mt, face, level, &curOffset);
- /* r600 cube levels seems to be aligned to 8 faces but
- * we have separate register for 1'st level offset so add
+ /* from r700? cube levels seems to be aligned to 8 faces,
+ * as we have separate register for 1'st level offset add
* 2 image alignment after 1'st mip level */
- if(rmesa->radeonScreen->chip_family >= CHIP_FAMILY_R600 &&
+ if(rmesa->radeonScreen->chip_family >= CHIP_FAMILY_RV770 &&
mt->target == GL_TEXTURE_CUBE_MAP && level >= 1)
curOffset += 2 * mt->levels[level].size;
}
diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c
index fa97a19302..2ea77e56c7 100644
--- a/src/mesa/drivers/dri/radeon/radeon_screen.c
+++ b/src/mesa/drivers/dri/radeon/radeon_screen.c
@@ -916,6 +916,61 @@ static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id)
screen->chip_flags = RADEON_CHIPSET_TCL;
break;
+ case PCI_CHIP_CEDAR_68E0:
+ case PCI_CHIP_CEDAR_68E1:
+ case PCI_CHIP_CEDAR_68E4:
+ case PCI_CHIP_CEDAR_68E5:
+ case PCI_CHIP_CEDAR_68E8:
+ case PCI_CHIP_CEDAR_68E9:
+ case PCI_CHIP_CEDAR_68F1:
+ case PCI_CHIP_CEDAR_68F8:
+ case PCI_CHIP_CEDAR_68F9:
+ case PCI_CHIP_CEDAR_68FE:
+ screen->chip_family = CHIP_FAMILY_CEDAR;
+ screen->chip_flags = RADEON_CHIPSET_TCL;
+ break;
+
+ case PCI_CHIP_REDWOOD_68C0:
+ case PCI_CHIP_REDWOOD_68C1:
+ case PCI_CHIP_REDWOOD_68C8:
+ case PCI_CHIP_REDWOOD_68C9:
+ case PCI_CHIP_REDWOOD_68D8:
+ case PCI_CHIP_REDWOOD_68D9:
+ case PCI_CHIP_REDWOOD_68DA:
+ case PCI_CHIP_REDWOOD_68DE:
+ screen->chip_family = CHIP_FAMILY_REDWOOD;
+ screen->chip_flags = RADEON_CHIPSET_TCL;
+ break;
+
+ case PCI_CHIP_JUNIPER_68A0:
+ case PCI_CHIP_JUNIPER_68A1:
+ case PCI_CHIP_JUNIPER_68A8:
+ case PCI_CHIP_JUNIPER_68A9:
+ case PCI_CHIP_JUNIPER_68B0:
+ case PCI_CHIP_JUNIPER_68B8:
+ case PCI_CHIP_JUNIPER_68B9:
+ case PCI_CHIP_JUNIPER_68BE:
+ screen->chip_family = CHIP_FAMILY_JUNIPER;
+ screen->chip_flags = RADEON_CHIPSET_TCL;
+ break;
+
+ case PCI_CHIP_CYPRESS_6880:
+ case PCI_CHIP_CYPRESS_6888:
+ case PCI_CHIP_CYPRESS_6889:
+ case PCI_CHIP_CYPRESS_688A:
+ case PCI_CHIP_CYPRESS_6898:
+ case PCI_CHIP_CYPRESS_6899:
+ case PCI_CHIP_CYPRESS_689E:
+ screen->chip_family = CHIP_FAMILY_CYPRESS;
+ screen->chip_flags = RADEON_CHIPSET_TCL;
+ break;
+
+ case PCI_CHIP_HEMLOCK_689C:
+ case PCI_CHIP_HEMLOCK_689D:
+ screen->chip_family = CHIP_FAMILY_HEMLOCK;
+ screen->chip_flags = RADEON_CHIPSET_TCL;
+ break;
+
default:
fprintf(stderr, "unknown chip id 0x%x, can't guess.\n",
device_id);
@@ -1116,7 +1171,7 @@ radeonCreateScreen( __DRIscreen *sPriv )
}
}
else
- {
+ {
screen->fbLocation = (temp & 0xffff) << 16;
}
}
diff --git a/src/mesa/drivers/glslcompiler/Makefile b/src/mesa/drivers/glslcompiler/Makefile
deleted file mode 100644
index 6da9f93f59..0000000000
--- a/src/mesa/drivers/glslcompiler/Makefile
+++ /dev/null
@@ -1,43 +0,0 @@
-# Makefile for stand-alone GL-SL compiler
-
-TOP = ../../../..
-
-include $(TOP)/configs/current
-
-
-PROGRAM = glslcompiler
-
-OBJECTS = \
- glslcompiler.o \
- ../common/driverfuncs.o \
- ../../libmesa.a \
- $(TOP)/src/mapi/glapi/libglapi.a
-
-INCLUDES = \
- -I$(TOP)/include \
- -I$(TOP)/include/GL/internal \
- -I$(TOP)/src/mapi \
- -I$(TOP)/src/mesa \
- -I$(TOP)/src/mesa/main \
- -I$(TOP)/src/mesa/glapi \
- -I$(TOP)/src/mesa/math \
- -I$(TOP)/src/mesa/transform \
- -I$(TOP)/src/mesa/shader \
- -I$(TOP)/src/mesa/swrast \
- -I$(TOP)/src/mesa/swrast_setup \
-
-
-default: $(PROGRAM)
- $(INSTALL) $(PROGRAM) $(TOP)/bin
-
-
-glslcompiler: $(OBJECTS)
- $(CC) $(OBJECTS) $(GL_LIB_DEPS) -o $@
-
-
-glslcompiler.o: glslcompiler.c
- $(CC) -c $(INCLUDES) $(CFLAGS) glslcompiler.c -o $@
-
-
-clean:
- -rm -f *.o *~ $(PROGRAM)
diff --git a/src/mesa/drivers/glslcompiler/glslcompiler.c b/src/mesa/drivers/glslcompiler/glslcompiler.c
deleted file mode 100644
index 7259bf4c56..0000000000
--- a/src/mesa/drivers/glslcompiler/glslcompiler.c
+++ /dev/null
@@ -1,436 +0,0 @@
-/*
- * Mesa 3-D graphics library
- * Version: 6.5.3
- *
- * Copyright (C) 1999-2007 Brian Paul, Tungsten Graphics, Inc.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
- * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-/**
- * \mainpage
- *
- * Stand-alone Shading Language compiler.
- * Basically, a command-line program which accepts GLSL shaders and emits
- * vertex/fragment programs (GPU instructions).
- *
- * This file is basically just a Mesa device driver but instead of building
- * a shared library we build an executable.
- *
- * We can emit programs in three different formats:
- * 1. ARB-style (GL_ARB_vertex/fragment_program)
- * 2. NV-style (GL_NV_vertex/fragment_program)
- * 3. debug-style (a slightly more sophisticated, internal format)
- *
- * Note that the ARB and NV program languages can't express all the
- * features that might be used by a fragment program (examples being
- * uniform and varying vars). So, the ARB/NV programs that are
- * emitted aren't always legal programs in those languages.
- */
-
-
-#include "main/imports.h"
-#include "main/context.h"
-#include "main/extensions.h"
-#include "main/framebuffer.h"
-#include "main/shaderapi.h"
-#include "main/shaderobj.h"
-#include "program/prog_print.h"
-#include "drivers/common/driverfuncs.h"
-#include "tnl/tnl.h"
-#include "tnl/t_context.h"
-#include "tnl/t_pipeline.h"
-#include "swrast/swrast.h"
-#include "swrast_setup/swrast_setup.h"
-#include "vbo/vbo.h"
-
-
-static const char *Prog = "glslcompiler";
-
-
-struct options {
- GLboolean LineNumbers;
- GLboolean Link;
- gl_prog_print_mode Mode;
- const char *VertFile;
- const char *FragFile;
- const char *GeoFile;
- const char *OutputFile;
- GLboolean Params;
- struct gl_sl_pragmas Pragmas;
-};
-
-static struct options Options;
-
-
-/**
- * GLSL compiler driver context. (kind of an artificial thing for now)
- */
-struct compiler_context
-{
- GLcontext MesaContext;
- int foo;
-};
-
-typedef struct compiler_context CompilerContext;
-
-
-
-static void
-UpdateState(GLcontext *ctx, GLuint new_state)
-{
- /* easy - just propogate */
- _swrast_InvalidateState( ctx, new_state );
- _swsetup_InvalidateState( ctx, new_state );
- _tnl_InvalidateState( ctx, new_state );
- _vbo_InvalidateState( ctx, new_state );
-}
-
-
-
-static GLboolean
-CreateContext(void)
-{
- struct dd_function_table ddFuncs;
- GLvisual *vis;
- GLframebuffer *buf;
- GLcontext *ctx;
- CompilerContext *cc;
-
- vis = _mesa_create_visual(GL_FALSE, GL_FALSE, /* RGB */
- 8, 8, 8, 8, /* color */
- 0, 0, /* z, stencil */
- 0, 0, 0, 0, 1); /* accum */
- buf = _mesa_create_framebuffer(vis);
-
- cc = calloc(1, sizeof(*cc));
- if (!vis || !buf || !cc) {
- if (vis)
- _mesa_destroy_visual(vis);
- if (buf)
- _mesa_destroy_framebuffer(buf);
- free(cc);
- return GL_FALSE;
- }
-
- _mesa_init_driver_functions(&ddFuncs);
- ddFuncs.GetString = NULL;/*get_string;*/
- ddFuncs.UpdateState = UpdateState;
- ddFuncs.GetBufferSize = NULL;
-
- ctx = &cc->MesaContext;
- _mesa_initialize_context(ctx, vis, NULL, &ddFuncs, cc);
- _mesa_enable_sw_extensions(ctx);
-
- if (!_swrast_CreateContext( ctx ) ||
- !_vbo_CreateContext( ctx ) ||
- !_tnl_CreateContext( ctx ) ||
- !_swsetup_CreateContext( ctx )) {
- _mesa_destroy_visual(vis);
- _mesa_destroy_framebuffer(buf);
- _mesa_free_context_data(ctx);
- free(cc);
- return GL_FALSE;
- }
- TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
- _swsetup_Wakeup( ctx );
-
- /* Override the context's default pragma settings */
- ctx->Shader.DefaultPragmas = Options.Pragmas;
-
- _mesa_make_current(ctx, buf, buf);
-
- return GL_TRUE;
-}
-
-
-static void
-LoadAndCompileShader(GLuint shader, const char *text)
-{
- GLint stat;
- _mesa_ShaderSourceARB(shader, 1, (const GLchar **) &text, NULL);
- _mesa_CompileShaderARB(shader);
- _mesa_GetShaderiv(shader, GL_COMPILE_STATUS, &stat);
- if (!stat) {
- GLchar log[1000];
- GLsizei len;
- _mesa_GetShaderInfoLog(shader, 1000, &len, log);
- fprintf(stderr, "%s: problem compiling shader: %s\n", Prog, log);
- exit(1);
- }
- else {
- printf("Shader compiled OK\n");
- }
-}
-
-
-/**
- * Read a shader from a file.
- */
-static void
-ReadShader(GLuint shader, const char *filename)
-{
- const int max = 100*1000;
- int n;
- char *buffer = (char*) malloc(max);
- FILE *f = fopen(filename, "r");
- if (!f) {
- fprintf(stderr, "%s: Unable to open shader file %s\n", Prog, filename);
- exit(1);
- }
-
- n = fread(buffer, 1, max, f);
- /*
- printf("%s: read %d bytes from shader file %s\n", Prog, n, filename);
- */
- if (n > 0) {
- buffer[n] = 0;
- LoadAndCompileShader(shader, buffer);
- }
-
- fclose(f);
- free(buffer);
-}
-
-
-static void
-CheckLink(GLuint v_shader, GLuint f_shader)
-{
- GLuint prog;
- GLint stat;
-
- prog = _mesa_CreateProgram();
-
- _mesa_AttachShader(prog, v_shader);
- _mesa_AttachShader(prog, f_shader);
-
- _mesa_LinkProgramARB(prog);
- _mesa_GetProgramiv(prog, GL_LINK_STATUS, &stat);
- if (!stat) {
- GLchar log[1000];
- GLsizei len;
- _mesa_GetProgramInfoLog(prog, 1000, &len, log);
- fprintf(stderr, "Linker error:\n%s\n", log);
- }
- else {
- fprintf(stderr, "Link success!\n");
- }
-}
-
-
-static void
-PrintShaderInstructions(GLuint shader, FILE *f)
-{
- GET_CURRENT_CONTEXT(ctx);
- struct gl_shader *sh = _mesa_lookup_shader(ctx, shader);
- struct gl_program *prog = sh->Program;
- _mesa_fprint_program_opt(stdout, prog, Options.Mode, Options.LineNumbers);
- if (Options.Params)
- _mesa_print_program_parameters(ctx, prog);
-}
-
-
-static GLuint
-CompileShader(const char *filename, GLenum type)
-{
- GLuint shader;
-
- assert(type == GL_FRAGMENT_SHADER ||
- type == GL_VERTEX_SHADER ||
- type == GL_GEOMETRY_SHADER_ARB);
-
- shader = _mesa_CreateShader(type);
- ReadShader(shader, filename);
-
- return shader;
-}
-
-
-static void
-Usage(void)
-{
- printf("Mesa GLSL stand-alone compiler\n");
- printf("Usage:\n");
- printf(" --vs FILE vertex shader input filename\n");
- printf(" --fs FILE fragment shader input filename\n");
- printf(" --gs FILE geometry shader input filename\n");
- printf(" --arb emit ARB-style instructions\n");
- printf(" --nv emit NV-style instructions\n");
- printf(" --link run linker\n");
- printf(" --debug force #pragma debug(on)\n");
- printf(" --nodebug force #pragma debug(off)\n");
- printf(" --opt force #pragma optimize(on)\n");
- printf(" --noopt force #pragma optimize(off)\n");
- printf(" --number, -n emit line numbers (if --arb or --nv)\n");
- printf(" --output, -o FILE output filename\n");
- printf(" --params also emit program parameter info\n");
- printf(" --help display this information\n");
-}
-
-
-static void
-ParseOptions(int argc, char *argv[])
-{
- int i;
-
- Options.LineNumbers = GL_FALSE;
- Options.Mode = PROG_PRINT_DEBUG;
- Options.VertFile = NULL;
- Options.FragFile = NULL;
- Options.GeoFile = NULL;
- Options.OutputFile = NULL;
- Options.Params = GL_FALSE;
- Options.Pragmas.IgnoreOptimize = GL_FALSE;
- Options.Pragmas.IgnoreDebug = GL_FALSE;
- Options.Pragmas.Debug = GL_FALSE;
- Options.Pragmas.Optimize = GL_TRUE;
-
- if (argc == 1) {
- Usage();
- exit(0);
- }
-
- for (i = 1; i < argc; i++) {
- if (strcmp(argv[i], "--vs") == 0) {
- Options.VertFile = argv[i + 1];
- i++;
- }
- else if (strcmp(argv[i], "--fs") == 0) {
- Options.FragFile = argv[i + 1];
- i++;
- }
- else if (strcmp(argv[i], "--gs") == 0) {
- Options.GeoFile = argv[i + 1];
- i++;
- }
- else if (strcmp(argv[i], "--arb") == 0) {
- Options.Mode = PROG_PRINT_ARB;
- }
- else if (strcmp(argv[i], "--nv") == 0) {
- Options.Mode = PROG_PRINT_NV;
- }
- else if (strcmp(argv[i], "--link") == 0) {
- Options.Link = GL_TRUE;
- }
- else if (strcmp(argv[i], "--debug") == 0) {
- Options.Pragmas.IgnoreDebug = GL_TRUE;
- Options.Pragmas.Debug = GL_TRUE;
- }
- else if (strcmp(argv[i], "--nodebug") == 0) {
- Options.Pragmas.IgnoreDebug = GL_TRUE;
- Options.Pragmas.Debug = GL_FALSE;
- }
- else if (strcmp(argv[i], "--opt") == 0) {
- Options.Pragmas.IgnoreOptimize = GL_TRUE;
- Options.Pragmas.Optimize = GL_TRUE;
- }
- else if (strcmp(argv[i], "--noopt") == 0) {
- Options.Pragmas.IgnoreOptimize = GL_TRUE;
- Options.Pragmas.Optimize = GL_FALSE;
- }
- else if (strcmp(argv[i], "--number") == 0 ||
- strcmp(argv[i], "-n") == 0) {
- Options.LineNumbers = GL_TRUE;
- }
- else if (strcmp(argv[i], "--output") == 0 ||
- strcmp(argv[i], "-o") == 0) {
- Options.OutputFile = argv[i + 1];
- i++;
- }
- else if (strcmp(argv[i], "--params") == 0) {
- Options.Params = GL_TRUE;
- }
- else if (strcmp(argv[i], "--help") == 0) {
- Usage();
- exit(0);
- }
- else {
- printf("Unknown option: %s\n", argv[i]);
- Usage();
- exit(1);
- }
- }
-
- if (Options.Mode == PROG_PRINT_DEBUG) {
- /* always print line numbers when emitting debug-style output */
- Options.LineNumbers = GL_TRUE;
- }
-}
-
-
-int
-main(int argc, char *argv[])
-{
- GLuint v_shader = 0, f_shader = 0, g_shader = 0;
-
- ParseOptions(argc, argv);
-
- if (!CreateContext()) {
- fprintf(stderr, "%s: Failed to create compiler context\n", Prog);
- exit(1);
- }
-
- if (Options.VertFile) {
- v_shader = CompileShader(Options.VertFile, GL_VERTEX_SHADER);
- }
-
- if (Options.FragFile) {
- f_shader = CompileShader(Options.FragFile, GL_FRAGMENT_SHADER);
- }
-
- if (Options.GeoFile) {
- g_shader = CompileShader(Options.GeoFile, GL_GEOMETRY_SHADER_ARB);
- }
-
-
- if (v_shader || f_shader || g_shader) {
- if (Options.OutputFile) {
- FILE *f;
- fclose(stdout);
- /*stdout =*/ f = freopen(Options.OutputFile, "w", stdout);
- if (!f) {
- fprintf(stderr, "freopen error\n");
- }
- }
- if (stdout && v_shader) {
- PrintShaderInstructions(v_shader, stdout);
- }
- if (stdout && f_shader) {
- PrintShaderInstructions(f_shader, stdout);
- }
- if (stdout && g_shader) {
- PrintShaderInstructions(g_shader, stdout);
- }
- if (Options.OutputFile) {
- fclose(stdout);
- }
- }
-
- if (Options.Link) {
- if (!v_shader || !f_shader) {
- fprintf(stderr,
- "--link option requires both a vertex and fragment shader.\n");
- exit(1);
- }
-
- CheckLink(v_shader, f_shader);
- }
-
- return 0;
-}
diff --git a/src/mesa/drivers/osmesa/Makefile b/src/mesa/drivers/osmesa/Makefile
index c6b4a04085..39ab09af80 100644
--- a/src/mesa/drivers/osmesa/Makefile
+++ b/src/mesa/drivers/osmesa/Makefile
@@ -23,8 +23,7 @@ INCLUDE_DIRS = \
CORE_MESA = \
$(TOP)/src/mesa/libmesa.a \
$(TOP)/src/mapi/glapi/libglapi.a \
- $(TOP)/src/glsl/cl/libglslcl.a \
- $(TOP)/src/glsl/pp/libglslpp.a
+ $(TOP)/src/glsl/libglsl.a
.c.o:
$(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@
@@ -37,9 +36,9 @@ default: $(TOP)/$(LIB_DIR)/$(OSMESA_LIB_NAME)
# sources. We can also build libOSMesa16/libOSMesa32 by setting
# -DCHAN_BITS=16/32.
$(TOP)/$(LIB_DIR)/$(OSMESA_LIB_NAME): $(OBJECTS) $(CORE_MESA)
- $(MKLIB) -o $(OSMESA_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \
+ $(MKLIB) -o $(OSMESA_LIB) -linker '$(CXX)' -ldflags '$(LDFLAGS)' \
-major $(MESA_MAJOR) -minor $(MESA_MINOR) -patch $(MESA_TINY) \
- -install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) \
+ -install $(TOP)/$(LIB_DIR) -cplusplus $(MKLIB_OPTIONS) \
-id $(INSTALL_LIB_DIR)/lib$(OSMESA_LIB).$(MESA_MAJOR).dylib \
$(OSMESA_LIB_DEPS) $(OBJECTS) $(CORE_MESA)
diff --git a/src/mesa/drivers/x11/Makefile b/src/mesa/drivers/x11/Makefile
index b5b0c1f11a..f759da0a97 100644
--- a/src/mesa/drivers/x11/Makefile
+++ b/src/mesa/drivers/x11/Makefile
@@ -57,9 +57,10 @@ default: $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME)
$(TOP)/$(LIB_DIR)/$(GL_LIB_NAME): $(OBJECTS) $(CORE_MESA)
- @ $(MKLIB) -o $(GL_LIB) -linker '$(CC)' -ldflags '$(LDFLAGS)' \
+ @ $(MKLIB) -o $(GL_LIB) -linker '$(CXX)' -ldflags '$(LDFLAGS)' \
-major $(GL_MAJOR) -minor $(GL_MINOR) -patch $(GL_TINY) \
- -install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) \
+ -install $(TOP)/$(LIB_DIR) \
+ -cplusplus $(MKLIB_OPTIONS) \
-id $(INSTALL_LIB_DIR)/lib$(GL_LIB).$(GL_MAJOR).dylib \
$(GL_LIB_DEPS) $(OBJECTS) $(CORE_MESA)