summaryrefslogtreecommitdiff
path: root/src/mesa/drivers/dri/r300
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers/dri/r300')
-rwxr-xr-xsrc/mesa/drivers/dri/r300/Lindent2
-rw-r--r--src/mesa/drivers/dri/r300/Makefile77
-rw-r--r--src/mesa/drivers/dri/r300/compiler/Makefile86
-rwxr-xr-xsrc/mesa/drivers/dri/r300/compiler/SConscript41
-rw-r--r--src/mesa/drivers/dri/r300/compiler/memory_pool.c97
-rw-r--r--src/mesa/drivers/dri/r300/compiler/memory_pool.h80
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog.c256
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog.h44
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c380
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c228
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h38
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c209
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c651
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c180
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog.c344
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog.h50
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c495
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_code.c173
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_code.h265
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler.c309
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_compiler.h117
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c281
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h70
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c284
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c102
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c331
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.h30
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c474
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h12
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c472
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h246
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_optimize.c446
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c280
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c501
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c256
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program.c183
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program.h233
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c975
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h63
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h150
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c84
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h126
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_print.c300
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c389
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_tex.h39
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_swizzle.h57
-rw-r--r--src/mesa/drivers/dri/r300/r300_blit.c663
-rw-r--r--src/mesa/drivers/dri/r300/r300_blit.h56
-rw-r--r--src/mesa/drivers/dri/r300/r300_cmdbuf.c907
-rw-r--r--src/mesa/drivers/dri/r300/r300_cmdbuf.h69
-rw-r--r--src/mesa/drivers/dri/r300/r300_context.c570
-rw-r--r--src/mesa/drivers/dri/r300/r300_context.h560
-rw-r--r--src/mesa/drivers/dri/r300/r300_draw.c750
-rw-r--r--src/mesa/drivers/dri/r300/r300_emit.c135
-rw-r--r--src/mesa/drivers/dri/r300/r300_emit.h228
-rw-r--r--src/mesa/drivers/dri/r300/r300_fragprog_common.c305
-rw-r--r--src/mesa/drivers/dri/r300/r300_fragprog_common.h37
-rw-r--r--src/mesa/drivers/dri/r300/r300_reg.h3313
-rw-r--r--src/mesa/drivers/dri/r300/r300_render.c505
-rw-r--r--src/mesa/drivers/dri/r300/r300_render.h70
-rw-r--r--src/mesa/drivers/dri/r300/r300_shader.c145
-rw-r--r--src/mesa/drivers/dri/r300/r300_state.c2416
-rw-r--r--src/mesa/drivers/dri/r300/r300_state.h62
-rw-r--r--src/mesa/drivers/dri/r300/r300_swtcl.c683
-rw-r--r--src/mesa/drivers/dri/r300/r300_swtcl.h65
-rw-r--r--src/mesa/drivers/dri/r300/r300_tex.c386
-rw-r--r--src/mesa/drivers/dri/r300/r300_tex.h59
-rw-r--r--src/mesa/drivers/dri/r300/r300_texstate.c524
-rw-r--r--src/mesa/drivers/dri/r300/r300_vertprog.c414
-rw-r--r--src/mesa/drivers/dri/r300/r300_vertprog.h11
l---------src/mesa/drivers/dri/r300/radeon_bo.c1
l---------src/mesa/drivers/dri/r300/radeon_bo_int_drm.h1
l---------src/mesa/drivers/dri/r300/radeon_bo_legacy.c1
l---------src/mesa/drivers/dri/r300/radeon_bo_legacy.h1
l---------src/mesa/drivers/dri/r300/radeon_bocs_wrapper.h1
l---------src/mesa/drivers/dri/r300/radeon_buffer_objects.c1
l---------src/mesa/drivers/dri/r300/radeon_buffer_objects.h1
l---------src/mesa/drivers/dri/r300/radeon_chipset.h1
l---------src/mesa/drivers/dri/r300/radeon_cmdbuf.h1
l---------src/mesa/drivers/dri/r300/radeon_common.c1
l---------src/mesa/drivers/dri/r300/radeon_common.h1
l---------src/mesa/drivers/dri/r300/radeon_common_context.c1
l---------src/mesa/drivers/dri/r300/radeon_common_context.h1
-rw-r--r--src/mesa/drivers/dri/r300/radeon_context.h62
l---------src/mesa/drivers/dri/r300/radeon_cs.c1
l---------src/mesa/drivers/dri/r300/radeon_cs_int_drm.h1
l---------src/mesa/drivers/dri/r300/radeon_cs_legacy.c1
l---------src/mesa/drivers/dri/r300/radeon_cs_legacy.h1
l---------src/mesa/drivers/dri/r300/radeon_cs_space_drm.c1
l---------src/mesa/drivers/dri/r300/radeon_debug.c1
l---------src/mesa/drivers/dri/r300/radeon_debug.h1
l---------src/mesa/drivers/dri/r300/radeon_dma.c1
l---------src/mesa/drivers/dri/r300/radeon_dma.h1
l---------src/mesa/drivers/dri/r300/radeon_fbo.c1
l---------src/mesa/drivers/dri/r300/radeon_lock.c1
l---------src/mesa/drivers/dri/r300/radeon_lock.h1
-rw-r--r--src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c223
-rw-r--r--src/mesa/drivers/dri/r300/radeon_mesa_to_rc.h36
l---------src/mesa/drivers/dri/r300/radeon_mipmap_tree.c1
l---------src/mesa/drivers/dri/r300/radeon_mipmap_tree.h1
l---------src/mesa/drivers/dri/r300/radeon_pixel_read.c1
l---------src/mesa/drivers/dri/r300/radeon_queryobj.c1
l---------src/mesa/drivers/dri/r300/radeon_queryobj.h1
l---------src/mesa/drivers/dri/r300/radeon_screen.c1
l---------src/mesa/drivers/dri/r300/radeon_screen.h1
l---------src/mesa/drivers/dri/r300/radeon_span.c1
l---------src/mesa/drivers/dri/r300/radeon_span.h1
l---------src/mesa/drivers/dri/r300/radeon_tex_copy.c1
l---------src/mesa/drivers/dri/r300/radeon_tex_getimage.c1
l---------src/mesa/drivers/dri/r300/radeon_texture.c1
l---------src/mesa/drivers/dri/r300/radeon_texture.h1
l---------src/mesa/drivers/dri/r300/radeon_tile.c1
l---------src/mesa/drivers/dri/r300/radeon_tile.h1
l---------src/mesa/drivers/dri/r300/server/radeon.h1
l---------src/mesa/drivers/dri/r300/server/radeon_dri.h1
l---------src/mesa/drivers/dri/r300/server/radeon_macros.h1
l---------src/mesa/drivers/dri/r300/server/radeon_reg.h1
117 files changed, 23804 insertions, 0 deletions
diff --git a/src/mesa/drivers/dri/r300/Lindent b/src/mesa/drivers/dri/r300/Lindent
new file mode 100755
index 0000000000..7d8d8896e3
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/Lindent
@@ -0,0 +1,2 @@
+#!/bin/sh
+indent -npro -kr -i8 -ts8 -sob -l80 -ss -ncs "$@"
diff --git a/src/mesa/drivers/dri/r300/Makefile b/src/mesa/drivers/dri/r300/Makefile
new file mode 100644
index 0000000000..2245998c95
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/Makefile
@@ -0,0 +1,77 @@
+# src/mesa/drivers/dri/r300/Makefile
+
+TOP = ../../../../..
+include $(TOP)/configs/current
+
+CFLAGS += $(RADEON_CFLAGS)
+
+LIBNAME = r300_dri.so
+
+ifeq ($(RADEON_LDFLAGS),)
+CS_SOURCES = radeon_cs_space_drm.c radeon_bo.c radeon_cs.c
+endif
+
+COMMON_SOURCES = \
+ ../../common/driverfuncs.c \
+ ../common/mm.c \
+ ../common/utils.c \
+ ../common/texmem.c \
+ ../common/vblank.c \
+ ../common/xmlconfig.c \
+ ../common/dri_util.c
+
+RADEON_COMMON_SOURCES = \
+ radeon_bo_legacy.c \
+ radeon_buffer_objects.c \
+ radeon_common_context.c \
+ radeon_common.c \
+ radeon_cs_legacy.c \
+ radeon_dma.c \
+ radeon_debug.c \
+ radeon_fbo.c \
+ radeon_lock.c \
+ radeon_mipmap_tree.c \
+ radeon_pixel_read.c \
+ radeon_queryobj.c \
+ radeon_span.c \
+ radeon_texture.c \
+ radeon_tex_copy.c \
+ radeon_tex_getimage.c \
+ radeon_tile.c
+
+DRIVER_SOURCES = \
+ radeon_screen.c \
+ r300_blit.c \
+ r300_context.c \
+ r300_draw.c \
+ r300_cmdbuf.c \
+ r300_state.c \
+ r300_render.c \
+ r300_tex.c \
+ r300_texstate.c \
+ r300_vertprog.c \
+ r300_fragprog_common.c \
+ r300_shader.c \
+ radeon_mesa_to_rc.c \
+ r300_emit.c \
+ r300_swtcl.c \
+ $(RADEON_COMMON_SOURCES) \
+ $(EGL_SOURCES) \
+ $(CS_SOURCES)
+
+C_SOURCES = $(COMMON_SOURCES) $(DRIVER_SOURCES)
+
+DRIVER_DEFINES = -DRADEON_R300
+# -DRADEON_BO_TRACK \
+
+DRI_LIB_DEPS += $(RADEON_LDFLAGS)
+
+SUBDIRS = compiler
+
+EXTRA_MODULES = compiler/libr300compiler.a
+
+
+##### TARGETS #####
+
+include ../Makefile.template
+
diff --git a/src/mesa/drivers/dri/r300/compiler/Makefile b/src/mesa/drivers/dri/r300/compiler/Makefile
new file mode 100644
index 0000000000..ff3801dc67
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/Makefile
@@ -0,0 +1,86 @@
+# src/mesa/drivers/dri/r300/compiler/Makefile
+
+TOP = ../../../../../..
+include $(TOP)/configs/current
+
+LIBNAME = r300compiler
+
+C_SOURCES = \
+ radeon_code.c \
+ radeon_compiler.c \
+ radeon_emulate_branches.c \
+ radeon_emulate_loops.c \
+ radeon_program.c \
+ radeon_program_print.c \
+ radeon_opcodes.c \
+ radeon_program_alu.c \
+ radeon_program_pair.c \
+ radeon_program_tex.c \
+ radeon_pair_translate.c \
+ radeon_pair_schedule.c \
+ radeon_pair_regalloc.c \
+ radeon_dataflow.c \
+ radeon_dataflow_deadcode.c \
+ radeon_dataflow_swizzles.c \
+ radeon_optimize.c \
+ r3xx_fragprog.c \
+ r300_fragprog.c \
+ r300_fragprog_swizzle.c \
+ r300_fragprog_emit.c \
+ r500_fragprog.c \
+ r500_fragprog_emit.c \
+ r3xx_vertprog.c \
+ r3xx_vertprog_dump.c \
+ \
+ memory_pool.c
+
+
+### Basic defines ###
+
+OBJECTS = $(C_SOURCES:.c=.o) \
+ $(CPP_SOURCES:.cpp=.o) \
+ $(ASM_SOURCES:.S=.o)
+
+INCLUDES = \
+ -I. \
+ -I$(TOP)/include \
+ -I$(TOP)/src/mesa \
+
+
+##### TARGETS #####
+
+default: depend lib$(LIBNAME).a
+
+lib$(LIBNAME).a: $(OBJECTS) Makefile $(TOP)/configs/current
+ $(MKLIB) -o $(LIBNAME) -static $(OBJECTS)
+
+depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(SYMLINKS)
+ rm -f depend
+ touch depend
+ $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) 2> /dev/null
+
+# Emacs tags
+tags:
+ etags `find . -name \*.[ch]` `find ../include`
+
+# Remove .o and backup files
+clean:
+ rm -f $(OBJECTS) lib$(LIBNAME).a depend depend.bak
+
+# Dummy target
+install:
+ @echo -n ""
+
+##### RULES #####
+
+.c.o:
+ $(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@
+
+.cpp.o:
+ $(CXX) -c $(INCLUDES) $(CXXFLAGS) $(LIBRARY_DEFINES) $< -o $@
+
+.S.o:
+ $(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@
+
+
+sinclude depend
diff --git a/src/mesa/drivers/dri/r300/compiler/SConscript b/src/mesa/drivers/dri/r300/compiler/SConscript
new file mode 100755
index 0000000000..50d9cdb7f2
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/SConscript
@@ -0,0 +1,41 @@
+Import('*')
+
+env = env.Clone()
+env.Append(CPPPATH = '#/include')
+env.Append(CPPPATH = '#/src/mesa')
+
+# temporary fix
+env['CFLAGS'] = str(env['CFLAGS']).replace('-Werror=declaration-after-statement', '')
+
+r300compiler = env.ConvenienceLibrary(
+ target = 'r300compiler',
+ source = [
+ 'radeon_code.c',
+ 'radeon_compiler.c',
+ 'radeon_program.c',
+ 'radeon_program_print.c',
+ 'radeon_opcodes.c',
+ 'radeon_program_alu.c',
+ 'radeon_program_pair.c',
+ 'radeon_program_tex.c',
+ 'radeon_pair_translate.c',
+ 'radeon_pair_schedule.c',
+ 'radeon_pair_regalloc.c',
+ 'radeon_optimize.c',
+ 'radeon_emulate_branches.c',
+ 'radeon_emulate_loops.c',
+ 'radeon_dataflow.c',
+ 'radeon_dataflow_deadcode.c',
+ 'radeon_dataflow_swizzles.c',
+ 'r3xx_fragprog.c',
+ 'r300_fragprog.c',
+ 'r300_fragprog_swizzle.c',
+ 'r300_fragprog_emit.c',
+ 'r500_fragprog.c',
+ 'r500_fragprog_emit.c',
+ 'r3xx_vertprog.c',
+ 'r3xx_vertprog_dump.c',
+ 'memory_pool.c',
+ ])
+
+Return('r300compiler')
diff --git a/src/mesa/drivers/dri/r300/compiler/memory_pool.c b/src/mesa/drivers/dri/r300/compiler/memory_pool.c
new file mode 100644
index 0000000000..76c7c60d8f
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/memory_pool.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "memory_pool.h"
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+#define POOL_LARGE_ALLOC 4096
+#define POOL_ALIGN 4
+
+
+struct memory_block {
+ struct memory_block * next;
+};
+
+void memory_pool_init(struct memory_pool * pool)
+{
+ memset(pool, 0, sizeof(struct memory_pool));
+}
+
+
+void memory_pool_destroy(struct memory_pool * pool)
+{
+ while(pool->blocks) {
+ struct memory_block * block = pool->blocks;
+ pool->blocks = block->next;
+ free(block);
+ }
+}
+
+static void refill_pool(struct memory_pool * pool)
+{
+ unsigned int blocksize = pool->total_allocated;
+ struct memory_block * newblock;
+
+ if (!blocksize)
+ blocksize = 2*POOL_LARGE_ALLOC;
+
+ newblock = (struct memory_block*)malloc(blocksize);
+ newblock->next = pool->blocks;
+ pool->blocks = newblock;
+
+ pool->head = (unsigned char*)(newblock + 1);
+ pool->end = ((unsigned char*)newblock) + blocksize;
+ pool->total_allocated += blocksize;
+}
+
+
+void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes)
+{
+ if (bytes < POOL_LARGE_ALLOC) {
+ void * ptr;
+
+ if (pool->head + bytes > pool->end)
+ refill_pool(pool);
+
+ assert(pool->head + bytes <= pool->end);
+
+ ptr = pool->head;
+
+ pool->head += bytes;
+ pool->head = (unsigned char*)(((unsigned long)pool->head + POOL_ALIGN - 1) & ~(POOL_ALIGN - 1));
+
+ return ptr;
+ } else {
+ struct memory_block * block = (struct memory_block*)malloc(bytes + sizeof(struct memory_block));
+
+ block->next = pool->blocks;
+ pool->blocks = block;
+
+ return (block + 1);
+ }
+}
+
+
diff --git a/src/mesa/drivers/dri/r300/compiler/memory_pool.h b/src/mesa/drivers/dri/r300/compiler/memory_pool.h
new file mode 100644
index 0000000000..42344d0e3b
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/memory_pool.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef MEMORY_POOL_H
+#define MEMORY_POOL_H
+
+struct memory_block;
+
+/**
+ * Provides a pool of memory that can quickly be allocated from, at the
+ * cost of being unable to explicitly free one of the allocated blocks.
+ * Instead, the entire pool can be freed at once.
+ *
+ * The idea is to allow one to quickly allocate a flexible amount of
+ * memory during operations like shader compilation while avoiding
+ * reference counting headaches.
+ */
+struct memory_pool {
+ unsigned char * head;
+ unsigned char * end;
+ unsigned int total_allocated;
+ struct memory_block * blocks;
+};
+
+
+void memory_pool_init(struct memory_pool * pool);
+void memory_pool_destroy(struct memory_pool * pool);
+void * memory_pool_malloc(struct memory_pool * pool, unsigned int bytes);
+
+
+/**
+ * Generic helper for growing an array that has separate size/count
+ * and reserved counters to accomodate up to num new element.
+ *
+ * type * Array;
+ * unsigned int Size;
+ * unsigned int Reserved;
+ *
+ * memory_pool_array_reserve(pool, type, Array, Size, Reserved, k);
+ * assert(Size + k < Reserved);
+ *
+ * \note Size is not changed by this macro.
+ *
+ * \warning Array, Size, Reserved have to be lvalues and may be evaluated
+ * several times.
+ */
+#define memory_pool_array_reserve(pool, type, array, size, reserved, num) do { \
+ unsigned int _num = (num); \
+ if ((size) + _num > (reserved)) { \
+ unsigned int newreserve = (reserved) * 2; \
+ type * newarray; \
+ if (newreserve < _num) \
+ newreserve = 4 * _num; /* arbitrary heuristic */ \
+ newarray = memory_pool_malloc((pool), newreserve * sizeof(type)); \
+ memcpy(newarray, (array), (size) * sizeof(type)); \
+ (array) = newarray; \
+ (reserved) = newreserve; \
+ } \
+} while(0)
+
+#endif /* MEMORY_POOL_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c
new file mode 100644
index 0000000000..794db8335a
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.c
@@ -0,0 +1,256 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "r300_fragprog.h"
+
+#include <stdio.h>
+
+#include "../r300_reg.h"
+
+/* just some random things... */
+void r300FragmentProgramDump(struct rX00_fragment_program_code *c)
+{
+ struct r300_fragment_program_code *code = &c->code.r300;
+ int n, i, j;
+ static int pc = 0;
+
+ fprintf(stderr, "pc=%d*************************************\n", pc++);
+
+ fprintf(stderr, "Hardware program\n");
+ fprintf(stderr, "----------------\n");
+
+ for (n = 0; n <= (code->config & 3); n++) {
+ uint32_t code_addr = code->code_addr[3 - (code->config & 3) + n];
+ int alu_offset = (code_addr & R300_ALU_START_MASK) >> R300_ALU_START_SHIFT;
+ int alu_end = (code_addr & R300_ALU_SIZE_MASK) >> R300_ALU_SIZE_SHIFT;
+ int tex_offset = (code_addr & R300_TEX_START_MASK) >> R300_TEX_START_SHIFT;
+ int tex_end = (code_addr & R300_TEX_SIZE_MASK) >> R300_TEX_SIZE_SHIFT;
+
+ fprintf(stderr, "NODE %d: alu_offset: %d, tex_offset: %d, "
+ "alu_end: %d, tex_end: %d (code_addr: %08x)\n", n,
+ alu_offset, tex_offset, alu_end, tex_end, code_addr);
+
+ if (n > 0 || (code->config & R300_PFS_CNTL_FIRST_NODE_HAS_TEX)) {
+ fprintf(stderr, " TEX:\n");
+ for (i = tex_offset;
+ i <= tex_offset + tex_end;
+ ++i) {
+ const char *instr;
+
+ switch ((code->tex.
+ inst[i] >> R300_TEX_INST_SHIFT) &
+ 15) {
+ case R300_TEX_OP_LD:
+ instr = "TEX";
+ break;
+ case R300_TEX_OP_KIL:
+ instr = "KIL";
+ break;
+ case R300_TEX_OP_TXP:
+ instr = "TXP";
+ break;
+ case R300_TEX_OP_TXB:
+ instr = "TXB";
+ break;
+ default:
+ instr = "UNKNOWN";
+ }
+
+ fprintf(stderr,
+ " %s t%i, %c%i, texture[%i] (%08x)\n",
+ instr,
+ (code->tex.
+ inst[i] >> R300_DST_ADDR_SHIFT) & 31,
+ 't',
+ (code->tex.
+ inst[i] >> R300_SRC_ADDR_SHIFT) & 31,
+ (code->tex.
+ inst[i] & R300_TEX_ID_MASK) >>
+ R300_TEX_ID_SHIFT,
+ code->tex.inst[i]);
+ }
+ }
+
+ for (i = alu_offset;
+ i <= alu_offset + alu_end; ++i) {
+ char srcc[3][10], dstc[20];
+ char srca[3][10], dsta[20];
+ char argc[3][20];
+ char arga[3][20];
+ char flags[5], tmp[10];
+
+ for (j = 0; j < 3; ++j) {
+ int regc = code->alu.inst[i].rgb_addr >> (j * 6);
+ int rega = code->alu.inst[i].alpha_addr >> (j * 6);
+
+ sprintf(srcc[j], "%c%i",
+ (regc & 32) ? 'c' : 't', regc & 31);
+ sprintf(srca[j], "%c%i",
+ (rega & 32) ? 'c' : 't', rega & 31);
+ }
+
+ dstc[0] = 0;
+ sprintf(flags, "%s%s%s",
+ (code->alu.inst[i].
+ rgb_addr & R300_ALU_DSTC_REG_X) ? "x" : "",
+ (code->alu.inst[i].
+ rgb_addr & R300_ALU_DSTC_REG_Y) ? "y" : "",
+ (code->alu.inst[i].
+ rgb_addr & R300_ALU_DSTC_REG_Z) ? "z" : "");
+ if (flags[0] != 0) {
+ sprintf(dstc, "t%i.%s ",
+ (code->alu.inst[i].
+ rgb_addr >> R300_ALU_DSTC_SHIFT) & 31,
+ flags);
+ }
+ sprintf(flags, "%s%s%s",
+ (code->alu.inst[i].
+ rgb_addr & R300_ALU_DSTC_OUTPUT_X) ? "x" : "",
+ (code->alu.inst[i].
+ rgb_addr & R300_ALU_DSTC_OUTPUT_Y) ? "y" : "",
+ (code->alu.inst[i].
+ rgb_addr & R300_ALU_DSTC_OUTPUT_Z) ? "z" : "");
+ if (flags[0] != 0) {
+ sprintf(tmp, "o%i.%s",
+ (code->alu.inst[i].
+ rgb_addr >> 29) & 3,
+ flags);
+ strcat(dstc, tmp);
+ }
+
+ dsta[0] = 0;
+ if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_REG) {
+ sprintf(dsta, "t%i.w ",
+ (code->alu.inst[i].
+ alpha_addr >> R300_ALU_DSTA_SHIFT) & 31);
+ }
+ if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_OUTPUT) {
+ sprintf(tmp, "o%i.w ",
+ (code->alu.inst[i].
+ alpha_addr >> 25) & 3);
+ strcat(dsta, tmp);
+ }
+ if (code->alu.inst[i].alpha_addr & R300_ALU_DSTA_DEPTH) {
+ strcat(dsta, "Z");
+ }
+
+ fprintf(stderr,
+ "%3i: xyz: %3s %3s %3s -> %-20s (%08x)\n"
+ " w: %3s %3s %3s -> %-20s (%08x)\n", i,
+ srcc[0], srcc[1], srcc[2], dstc,
+ code->alu.inst[i].rgb_addr, srca[0], srca[1],
+ srca[2], dsta, code->alu.inst[i].alpha_addr);
+
+ for (j = 0; j < 3; ++j) {
+ int regc = code->alu.inst[i].rgb_inst >> (j * 7);
+ int rega = code->alu.inst[i].alpha_inst >> (j * 7);
+ int d;
+ char buf[20];
+
+ d = regc & 31;
+ if (d < 12) {
+ switch (d % 4) {
+ case R300_ALU_ARGC_SRC0C_XYZ:
+ sprintf(buf, "%s.xyz",
+ srcc[d / 4]);
+ break;
+ case R300_ALU_ARGC_SRC0C_XXX:
+ sprintf(buf, "%s.xxx",
+ srcc[d / 4]);
+ break;
+ case R300_ALU_ARGC_SRC0C_YYY:
+ sprintf(buf, "%s.yyy",
+ srcc[d / 4]);
+ break;
+ case R300_ALU_ARGC_SRC0C_ZZZ:
+ sprintf(buf, "%s.zzz",
+ srcc[d / 4]);
+ break;
+ }
+ } else if (d < 15) {
+ sprintf(buf, "%s.www", srca[d - 12]);
+ } else if (d == 20) {
+ sprintf(buf, "0.0");
+ } else if (d == 21) {
+ sprintf(buf, "1.0");
+ } else if (d == 22) {
+ sprintf(buf, "0.5");
+ } else if (d >= 23 && d < 32) {
+ d -= 23;
+ switch (d / 3) {
+ case 0:
+ sprintf(buf, "%s.yzx",
+ srcc[d % 3]);
+ break;
+ case 1:
+ sprintf(buf, "%s.zxy",
+ srcc[d % 3]);
+ break;
+ case 2:
+ sprintf(buf, "%s.Wzy",
+ srcc[d % 3]);
+ break;
+ }
+ } else {
+ sprintf(buf, "%i", d);
+ }
+
+ sprintf(argc[j], "%s%s%s%s",
+ (regc & 32) ? "-" : "",
+ (regc & 64) ? "|" : "",
+ buf, (regc & 64) ? "|" : "");
+
+ d = rega & 31;
+ if (d < 9) {
+ sprintf(buf, "%s.%c", srcc[d / 3],
+ 'x' + (char)(d % 3));
+ } else if (d < 12) {
+ sprintf(buf, "%s.w", srca[d - 9]);
+ } else if (d == 16) {
+ sprintf(buf, "0.0");
+ } else if (d == 17) {
+ sprintf(buf, "1.0");
+ } else if (d == 18) {
+ sprintf(buf, "0.5");
+ } else {
+ sprintf(buf, "%i", d);
+ }
+
+ sprintf(arga[j], "%s%s%s%s",
+ (rega & 32) ? "-" : "",
+ (rega & 64) ? "|" : "",
+ buf, (rega & 64) ? "|" : "");
+ }
+
+ fprintf(stderr, " xyz: %8s %8s %8s op: %08x\n"
+ " w: %8s %8s %8s op: %08x\n",
+ argc[0], argc[1], argc[2],
+ code->alu.inst[i].rgb_inst, arga[0], arga[1],
+ arga[2], code->alu.inst[i].alpha_inst);
+ }
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h
new file mode 100644
index 0000000000..8b755703be
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/*
+ * Authors:
+ * Ben Skeggs <darktama@iinet.net.au>
+ * Jerome Glisse <j.glisse@gmail.com>
+ */
+#ifndef __R300_FRAGPROG_H_
+#define __R300_FRAGPROG_H_
+
+#include "radeon_compiler.h"
+#include "radeon_program.h"
+
+
+extern void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler);
+
+extern void r300FragmentProgramDump(struct rX00_fragment_program_code *c);
+
+#endif
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
new file mode 100644
index 0000000000..b27a683c39
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_emit.c
@@ -0,0 +1,380 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file
+ *
+ * Emit the r300_fragment_program_code that can be understood by the hardware.
+ * Input is a pre-transformed radeon_program.
+ *
+ * \author Ben Skeggs <darktama@iinet.net.au>
+ *
+ * \author Jerome Glisse <j.glisse@gmail.com>
+ *
+ * \todo FogOption
+ */
+
+#include "r300_fragprog.h"
+
+#include "../r300_reg.h"
+
+#include "radeon_program_pair.h"
+#include "r300_fragprog_swizzle.h"
+
+
+struct r300_emit_state {
+ struct r300_fragment_program_compiler * compiler;
+
+ unsigned current_node : 2;
+ unsigned node_first_tex : 8;
+ unsigned node_first_alu : 8;
+ uint32_t node_flags;
+};
+
+#define PROG_CODE \
+ struct r300_fragment_program_compiler *c = emit->compiler; \
+ struct r300_fragment_program_code *code = &c->code->code.r300
+
+#define error(fmt, args...) do { \
+ rc_error(&c->Base, "%s::%s(): " fmt "\n", \
+ __FILE__, __FUNCTION__, ##args); \
+ } while(0)
+
+
+/**
+ * Mark a temporary register as used.
+ */
+static void use_temporary(struct r300_fragment_program_code *code, unsigned int index)
+{
+ if (index > code->pixsize)
+ code->pixsize = index;
+}
+
+static unsigned int use_source(struct r300_fragment_program_code* code, struct radeon_pair_instruction_source src)
+{
+ if (src.File == RC_FILE_CONSTANT) {
+ return src.Index | (1 << 5);
+ } else if (src.File == RC_FILE_TEMPORARY) {
+ use_temporary(code, src.Index);
+ return src.Index;
+ }
+
+ return 0;
+}
+
+
+static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
+{
+ switch(opcode) {
+ case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP;
+ case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3;
+ case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4;
+ case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC;
+ default:
+ error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);
+ /* fall through */
+ case RC_OPCODE_NOP:
+ /* fall through */
+ case RC_OPCODE_MAD: return R300_ALU_OUTC_MAD;
+ case RC_OPCODE_MAX: return R300_ALU_OUTC_MAX;
+ case RC_OPCODE_MIN: return R300_ALU_OUTC_MIN;
+ case RC_OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA;
+ }
+}
+
+static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
+{
+ switch(opcode) {
+ case RC_OPCODE_CMP: return R300_ALU_OUTA_CMP;
+ case RC_OPCODE_DP3: return R300_ALU_OUTA_DP4;
+ case RC_OPCODE_DP4: return R300_ALU_OUTA_DP4;
+ case RC_OPCODE_EX2: return R300_ALU_OUTA_EX2;
+ case RC_OPCODE_FRC: return R300_ALU_OUTA_FRC;
+ case RC_OPCODE_LG2: return R300_ALU_OUTA_LG2;
+ default:
+ error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);
+ /* fall through */
+ case RC_OPCODE_NOP:
+ /* fall through */
+ case RC_OPCODE_MAD: return R300_ALU_OUTA_MAD;
+ case RC_OPCODE_MAX: return R300_ALU_OUTA_MAX;
+ case RC_OPCODE_MIN: return R300_ALU_OUTA_MIN;
+ case RC_OPCODE_RCP: return R300_ALU_OUTA_RCP;
+ case RC_OPCODE_RSQ: return R300_ALU_OUTA_RSQ;
+ }
+}
+
+/**
+ * Emit one paired ALU instruction.
+ */
+static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* inst)
+{
+ PROG_CODE;
+
+ if (code->alu.length >= R300_PFS_MAX_ALU_INST) {
+ error("Too many ALU instructions");
+ return 0;
+ }
+
+ int ip = code->alu.length++;
+ int j;
+
+ code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode);
+ code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode);
+
+ for(j = 0; j < 3; ++j) {
+ unsigned int src = use_source(code, inst->RGB.Src[j]);
+ code->alu.inst[ip].rgb_addr |= src << (6*j);
+
+ src = use_source(code, inst->Alpha.Src[j]);
+ code->alu.inst[ip].alpha_addr |= src << (6*j);
+
+ unsigned int arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
+ arg |= inst->RGB.Arg[j].Abs << 6;
+ arg |= inst->RGB.Arg[j].Negate << 5;
+ code->alu.inst[ip].rgb_inst |= arg << (7*j);
+
+ arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle);
+ arg |= inst->Alpha.Arg[j].Abs << 6;
+ arg |= inst->Alpha.Arg[j].Negate << 5;
+ code->alu.inst[ip].alpha_inst |= arg << (7*j);
+ }
+
+ if (inst->RGB.Saturate)
+ code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP;
+ if (inst->Alpha.Saturate)
+ code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP;
+
+ if (inst->RGB.WriteMask) {
+ use_temporary(code, inst->RGB.DestIndex);
+ code->alu.inst[ip].rgb_addr |=
+ (inst->RGB.DestIndex << R300_ALU_DSTC_SHIFT) |
+ (inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT);
+ }
+ if (inst->RGB.OutputWriteMask) {
+ code->alu.inst[ip].rgb_addr |=
+ (inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT) |
+ R300_RGB_TARGET(inst->RGB.Target);
+ emit->node_flags |= R300_RGBA_OUT;
+ }
+
+ if (inst->Alpha.WriteMask) {
+ use_temporary(code, inst->Alpha.DestIndex);
+ code->alu.inst[ip].alpha_addr |=
+ (inst->Alpha.DestIndex << R300_ALU_DSTA_SHIFT) |
+ R300_ALU_DSTA_REG;
+ }
+ if (inst->Alpha.OutputWriteMask) {
+ code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT |
+ R300_ALPHA_TARGET(inst->Alpha.Target);
+ emit->node_flags |= R300_RGBA_OUT;
+ }
+ if (inst->Alpha.DepthWriteMask) {
+ code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH;
+ emit->node_flags |= R300_W_OUT;
+ c->code->writes_depth = 1;
+ }
+
+ return 1;
+}
+
+
+/**
+ * Finish the current node without advancing to the next one.
+ */
+static int finish_node(struct r300_emit_state * emit)
+{
+ struct r300_fragment_program_compiler * c = emit->compiler;
+ struct r300_fragment_program_code *code = &emit->compiler->code->code.r300;
+
+ if (code->alu.length == emit->node_first_alu) {
+ /* Generate a single NOP for this node */
+ struct rc_pair_instruction inst;
+ memset(&inst, 0, sizeof(inst));
+ if (!emit_alu(emit, &inst))
+ return 0;
+ }
+
+ unsigned alu_offset = emit->node_first_alu;
+ unsigned alu_end = code->alu.length - alu_offset - 1;
+ unsigned tex_offset = emit->node_first_tex;
+ unsigned tex_end = code->tex.length - tex_offset - 1;
+
+ if (code->tex.length == emit->node_first_tex) {
+ if (emit->current_node > 0) {
+ error("Node %i has no TEX instructions", emit->current_node);
+ return 0;
+ }
+
+ tex_end = 0;
+ } else {
+ if (emit->current_node == 0)
+ code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX;
+ }
+
+ /* Write the config register.
+ * Note: The order in which the words for each node are written
+ * is not correct here and needs to be fixed up once we're entirely
+ * done
+ *
+ * Also note that the register specification from AMD is slightly
+ * incorrect in its description of this register. */
+ code->code_addr[emit->current_node] =
+ (alu_offset << R300_ALU_START_SHIFT) |
+ (alu_end << R300_ALU_SIZE_SHIFT) |
+ (tex_offset << R300_TEX_START_SHIFT) |
+ (tex_end << R300_TEX_SIZE_SHIFT) |
+ emit->node_flags;
+
+ return 1;
+}
+
+
+/**
+ * Begin a block of texture instructions.
+ * Create the necessary indirection.
+ */
+static int begin_tex(struct r300_emit_state * emit)
+{
+ PROG_CODE;
+
+ if (code->alu.length == emit->node_first_alu &&
+ code->tex.length == emit->node_first_tex) {
+ return 1;
+ }
+
+ if (emit->current_node == 3) {
+ error("Too many texture indirections");
+ return 0;
+ }
+
+ if (!finish_node(emit))
+ return 0;
+
+ emit->current_node++;
+ emit->node_first_tex = code->tex.length;
+ emit->node_first_alu = code->alu.length;
+ emit->node_flags = 0;
+ return 1;
+}
+
+
+static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst)
+{
+ PROG_CODE;
+
+ if (code->tex.length >= R300_PFS_MAX_TEX_INST) {
+ error("Too many TEX instructions");
+ return 0;
+ }
+
+ unsigned int unit = inst->U.I.TexSrcUnit;
+ unsigned int dest = inst->U.I.DstReg.Index;
+ unsigned int opcode;
+
+ switch(inst->U.I.Opcode) {
+ case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
+ case RC_OPCODE_TEX: opcode = R300_TEX_OP_LD; break;
+ case RC_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break;
+ case RC_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;
+ default:
+ error("Unknown texture opcode %s", rc_get_opcode_info(inst->U.I.Opcode)->Name);
+ return 0;
+ }
+
+ if (inst->U.I.Opcode == RC_OPCODE_KIL) {
+ unit = 0;
+ dest = 0;
+ } else {
+ use_temporary(code, dest);
+ }
+
+ use_temporary(code, inst->U.I.SrcReg[0].Index);
+
+ code->tex.inst[code->tex.length++] =
+ (inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT) |
+ (dest << R300_DST_ADDR_SHIFT) |
+ (unit << R300_TEX_ID_SHIFT) |
+ (opcode << R300_TEX_INST_SHIFT);
+ return 1;
+}
+
+
+/**
+ * Final compilation step: Turn the intermediate radeon_program into
+ * machine-readable instructions.
+ */
+void r300BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler)
+{
+ struct r300_emit_state emit;
+ struct r300_fragment_program_code *code = &compiler->code->code.r300;
+
+ memset(&emit, 0, sizeof(emit));
+ emit.compiler = compiler;
+
+ memset(code, 0, sizeof(struct r300_fragment_program_code));
+
+ for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
+ inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
+ inst = inst->Next) {
+ if (inst->Type == RC_INSTRUCTION_NORMAL) {
+ if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
+ begin_tex(&emit);
+ continue;
+ }
+
+ emit_tex(&emit, inst);
+ } else {
+ emit_alu(&emit, &inst->U.P);
+ }
+ }
+
+ if (code->pixsize >= compiler->Base.max_temp_regs)
+ rc_error(&compiler->Base, "Too many hardware temporaries used.\n");
+
+ if (compiler->Base.Error)
+ return;
+
+ /* Finish the program */
+ finish_node(&emit);
+
+ code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */
+ code->code_offset =
+ (0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT) |
+ ((code->alu.length-1) << R300_PFS_CNTL_ALU_END_SHIFT) |
+ (0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT) |
+ ((code->tex.length ? code->tex.length-1 : 0) << R300_PFS_CNTL_TEX_END_SHIFT);
+
+ if (emit.current_node < 3) {
+ int shift = 3 - emit.current_node;
+ int i;
+ for(i = emit.current_node; i >= 0; --i)
+ code->code_addr[shift + i] = code->code_addr[i];
+ for(i = 0; i < shift; ++i)
+ code->code_addr[i] = 0;
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
new file mode 100644
index 0000000000..5d5de2f1b2
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.c
@@ -0,0 +1,228 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * @file
+ * Utilities to deal with the somewhat odd restriction on R300 fragment
+ * program swizzles.
+ */
+
+#include "r300_fragprog_swizzle.h"
+
+#include <stdio.h>
+
+#include "../r300_reg.h"
+#include "radeon_compiler.h"
+
+#define MAKE_SWZ3(x, y, z) (RC_MAKE_SWIZZLE(RC_SWIZZLE_##x, RC_SWIZZLE_##y, RC_SWIZZLE_##z, RC_SWIZZLE_ZERO))
+
+struct swizzle_data {
+ unsigned int hash; /**< swizzle value this matches */
+ unsigned int base; /**< base value for hw swizzle */
+ unsigned int stride; /**< difference in base between arg0/1/2 */
+};
+
+static const struct swizzle_data native_swizzles[] = {
+ {MAKE_SWZ3(X, Y, Z), R300_ALU_ARGC_SRC0C_XYZ, 4},
+ {MAKE_SWZ3(X, X, X), R300_ALU_ARGC_SRC0C_XXX, 4},
+ {MAKE_SWZ3(Y, Y, Y), R300_ALU_ARGC_SRC0C_YYY, 4},
+ {MAKE_SWZ3(Z, Z, Z), R300_ALU_ARGC_SRC0C_ZZZ, 4},
+ {MAKE_SWZ3(W, W, W), R300_ALU_ARGC_SRC0A, 1},
+ {MAKE_SWZ3(Y, Z, X), R300_ALU_ARGC_SRC0C_YZX, 1},
+ {MAKE_SWZ3(Z, X, Y), R300_ALU_ARGC_SRC0C_ZXY, 1},
+ {MAKE_SWZ3(W, Z, Y), R300_ALU_ARGC_SRC0CA_WZY, 1},
+ {MAKE_SWZ3(ONE, ONE, ONE), R300_ALU_ARGC_ONE, 0},
+ {MAKE_SWZ3(ZERO, ZERO, ZERO), R300_ALU_ARGC_ZERO, 0},
+ {MAKE_SWZ3(HALF, HALF, HALF), R300_ALU_ARGC_HALF, 0}
+};
+
+static const int num_native_swizzles = sizeof(native_swizzles)/sizeof(native_swizzles[0]);
+
+
+/**
+ * Find a native RGB swizzle that matches the given swizzle.
+ * Returns 0 if none found.
+ */
+static const struct swizzle_data* lookup_native_swizzle(unsigned int swizzle)
+{
+ int i, comp;
+
+ for(i = 0; i < num_native_swizzles; ++i) {
+ const struct swizzle_data* sd = &native_swizzles[i];
+ for(comp = 0; comp < 3; ++comp) {
+ unsigned int swz = GET_SWZ(swizzle, comp);
+ if (swz == RC_SWIZZLE_UNUSED)
+ continue;
+ if (swz != GET_SWZ(sd->hash, comp))
+ break;
+ }
+ if (comp == 3)
+ return sd;
+ }
+
+ return 0;
+}
+
+
+/**
+ * Check whether the given instruction supports the swizzle and negate
+ * combinations in the given source register.
+ */
+static int r300_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
+{
+ if (reg.Abs)
+ reg.Negate = RC_MASK_NONE;
+
+ if (opcode == RC_OPCODE_KIL ||
+ opcode == RC_OPCODE_TEX ||
+ opcode == RC_OPCODE_TXB ||
+ opcode == RC_OPCODE_TXP) {
+ int j;
+
+ if (reg.Abs || reg.Negate)
+ return 0;
+
+ for(j = 0; j < 4; ++j) {
+ unsigned int swz = GET_SWZ(reg.Swizzle, j);
+ if (swz == RC_SWIZZLE_UNUSED)
+ continue;
+ if (swz != j)
+ return 0;
+ }
+
+ return 1;
+ }
+
+ unsigned int relevant = 0;
+ int j;
+
+ for(j = 0; j < 3; ++j)
+ if (GET_SWZ(reg.Swizzle, j) != RC_SWIZZLE_UNUSED)
+ relevant |= 1 << j;
+
+ if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant))
+ return 0;
+
+ if (!lookup_native_swizzle(reg.Swizzle))
+ return 0;
+
+ return 1;
+}
+
+
+static void r300_swizzle_split(
+ struct rc_src_register src, unsigned int mask,
+ struct rc_swizzle_split * split)
+{
+ if (src.Abs)
+ src.Negate = RC_MASK_NONE;
+
+ split->NumPhases = 0;
+
+ while(mask) {
+ const struct swizzle_data *best_swizzle = 0;
+ unsigned int best_matchcount = 0;
+ unsigned int best_matchmask = 0;
+ int i, comp;
+
+ for(i = 0; i < num_native_swizzles; ++i) {
+ const struct swizzle_data *sd = &native_swizzles[i];
+ unsigned int matchcount = 0;
+ unsigned int matchmask = 0;
+ for(comp = 0; comp < 3; ++comp) {
+ if (!GET_BIT(mask, comp))
+ continue;
+ unsigned int swz = GET_SWZ(src.Swizzle, comp);
+ if (swz == RC_SWIZZLE_UNUSED)
+ continue;
+ if (swz == GET_SWZ(sd->hash, comp)) {
+ /* check if the negate bit of current component
+ * is the same for already matched components */
+ if (matchmask && (!!(src.Negate & matchmask) != !!(src.Negate & (1 << comp))))
+ continue;
+
+ matchcount++;
+ matchmask |= 1 << comp;
+ }
+ }
+ if (matchcount > best_matchcount) {
+ best_swizzle = sd;
+ best_matchcount = matchcount;
+ best_matchmask = matchmask;
+ if (matchmask == (mask & RC_MASK_XYZ))
+ break;
+ }
+ }
+
+ if (mask & RC_MASK_W)
+ best_matchmask |= RC_MASK_W;
+
+ split->Phase[split->NumPhases++] = best_matchmask;
+ mask &= ~best_matchmask;
+ }
+}
+
+struct rc_swizzle_caps r300_swizzle_caps = {
+ .IsNative = r300_swizzle_is_native,
+ .Split = r300_swizzle_split
+};
+
+
+/**
+ * Translate an RGB (XYZ) swizzle into the hardware code for the given
+ * instruction source.
+ */
+unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle)
+{
+ const struct swizzle_data* sd = lookup_native_swizzle(swizzle);
+
+ if (!sd) {
+ fprintf(stderr, "Not a native swizzle: %08x\n", swizzle);
+ return 0;
+ }
+
+ return sd->base + src*sd->stride;
+}
+
+
+/**
+ * Translate an Alpha (W) swizzle into the hardware code for the given
+ * instruction source.
+ */
+unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle)
+{
+ if (swizzle < 3)
+ return swizzle + 3*src;
+
+ switch(swizzle) {
+ case RC_SWIZZLE_W: return R300_ALU_ARGA_SRC0A + src;
+ case RC_SWIZZLE_ONE: return R300_ALU_ARGA_ONE;
+ case RC_SWIZZLE_ZERO: return R300_ALU_ARGA_ZERO;
+ case RC_SWIZZLE_HALF: return R300_ALU_ARGA_HALF;
+ default: return R300_ALU_ARGA_ONE;
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h
new file mode 100644
index 0000000000..118476af13
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r300_fragprog_swizzle.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __R300_FRAGPROG_SWIZZLE_H_
+#define __R300_FRAGPROG_SWIZZLE_H_
+
+#include "radeon_swizzle.h"
+
+extern struct rc_swizzle_caps r300_swizzle_caps;
+
+unsigned int r300FPTranslateRGBSwizzle(unsigned int src, unsigned int swizzle);
+unsigned int r300FPTranslateAlphaSwizzle(unsigned int src, unsigned int swizzle);
+
+#endif /* __R300_FRAGPROG_SWIZZLE_H_ */
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
new file mode 100644
index 0000000000..38312658d6
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_compiler.h"
+
+#include <stdio.h>
+
+#include "radeon_dataflow.h"
+#include "radeon_emulate_branches.h"
+#include "radeon_emulate_loops.h"
+#include "radeon_program_alu.h"
+#include "radeon_program_tex.h"
+#include "r300_fragprog.h"
+#include "r300_fragprog_swizzle.h"
+#include "r500_fragprog.h"
+
+
+static void dataflow_outputs_mark_use(void * userdata, void * data,
+ void (*callback)(void *, unsigned int, unsigned int))
+{
+ struct r300_fragment_program_compiler * c = userdata;
+ callback(data, c->OutputColor[0], RC_MASK_XYZW);
+ callback(data, c->OutputColor[1], RC_MASK_XYZW);
+ callback(data, c->OutputColor[2], RC_MASK_XYZW);
+ callback(data, c->OutputColor[3], RC_MASK_XYZW);
+ callback(data, c->OutputDepth, RC_MASK_W);
+}
+
+static void rewrite_depth_out(struct r300_fragment_program_compiler * c)
+{
+ struct rc_instruction *rci;
+
+ for (rci = c->Base.Program.Instructions.Next; rci != &c->Base.Program.Instructions; rci = rci->Next) {
+ struct rc_sub_instruction * inst = &rci->U.I;
+
+ if (inst->DstReg.File != RC_FILE_OUTPUT || inst->DstReg.Index != c->OutputDepth)
+ continue;
+
+ if (inst->DstReg.WriteMask & RC_MASK_Z) {
+ inst->DstReg.WriteMask = RC_MASK_W;
+ } else {
+ inst->DstReg.WriteMask = 0;
+ continue;
+ }
+
+ switch (inst->Opcode) {
+ case RC_OPCODE_FRC:
+ case RC_OPCODE_MOV:
+ inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]);
+ break;
+ case RC_OPCODE_ADD:
+ case RC_OPCODE_MAX:
+ case RC_OPCODE_MIN:
+ case RC_OPCODE_MUL:
+ inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]);
+ inst->SrcReg[1] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[1]);
+ break;
+ case RC_OPCODE_CMP:
+ case RC_OPCODE_MAD:
+ inst->SrcReg[0] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[0]);
+ inst->SrcReg[1] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[1]);
+ inst->SrcReg[2] = lmul_swizzle(RC_SWIZZLE_ZZZZ, inst->SrcReg[2]);
+ break;
+ default:
+ // Scalar instructions needn't be reswizzled
+ break;
+ }
+ }
+}
+
+static void debug_program_log(struct r300_fragment_program_compiler* c, const char * where)
+{
+ if (c->Base.Debug) {
+ fprintf(stderr, "Fragment Program: %s\n", where);
+ rc_print_program(&c->Base.Program);
+ }
+}
+
+void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
+{
+ rewrite_depth_out(c);
+
+ debug_program_log(c, "before compilation");
+
+ /* XXX Ideally this should be done only for r3xx, but since
+ * we don't have branching support for r5xx, we use the emulation
+ * on all chipsets. */
+
+ if(c->Base.is_r500){
+ rc_emulate_loops(&c->Base, R500_PFS_MAX_INST);
+ }
+ else{
+ rc_emulate_loops(&c->Base, R300_PFS_MAX_ALU_INST);
+ }
+ debug_program_log(c, "after emulate loops");
+
+ rc_emulate_branches(&c->Base);
+
+ debug_program_log(c, "after emulate branches");
+
+ if (c->Base.is_r500) {
+ struct radeon_program_transformation transformations[] = {
+ { &r500_transform_IF, 0 },
+ { &radeonTransformALU, 0 },
+ { &radeonTransformDeriv, 0 },
+ { &radeonTransformTrigScale, 0 }
+ };
+ radeonLocalTransform(&c->Base, 4, transformations);
+
+ debug_program_log(c, "after native rewrite part 1");
+
+ c->Base.SwizzleCaps = &r500_swizzle_caps;
+ } else {
+ struct radeon_program_transformation transformations[] = {
+ { &radeonTransformALU, 0 },
+ { &radeonTransformTrigSimple, 0 }
+ };
+ radeonLocalTransform(&c->Base, 2, transformations);
+
+ debug_program_log(c, "after native rewrite part 1");
+
+ c->Base.SwizzleCaps = &r300_swizzle_caps;
+ }
+
+ /* Run the common transformations too.
+ * Remember, lowering comes last! */
+ struct radeon_program_transformation common_transformations[] = {
+ { &radeonTransformTEX, c },
+ };
+ radeonLocalTransform(&c->Base, 1, common_transformations);
+
+ common_transformations[0].function = &radeonTransformALU;
+ radeonLocalTransform(&c->Base, 1, common_transformations);
+
+ if (c->Base.Error)
+ return;
+
+ debug_program_log(c, "after native rewrite part 2");
+
+ rc_dataflow_deadcode(&c->Base, &dataflow_outputs_mark_use, c);
+ if (c->Base.Error)
+ return;
+
+ debug_program_log(c, "after deadcode");
+
+ rc_optimize(&c->Base);
+
+ debug_program_log(c, "after dataflow optimize");
+
+ rc_dataflow_swizzles(&c->Base);
+ if (c->Base.Error)
+ return;
+
+ debug_program_log(c, "after dataflow passes");
+
+ rc_pair_translate(c);
+ if (c->Base.Error)
+ return;
+
+ debug_program_log(c, "after pair translate");
+
+ rc_pair_schedule(c);
+ if (c->Base.Error)
+ return;
+
+ debug_program_log(c, "after pair scheduling");
+
+ rc_pair_regalloc(c, c->Base.max_temp_regs);
+
+ if (c->Base.Error)
+ return;
+
+ debug_program_log(c, "after register allocation");
+
+ if (c->Base.is_r500) {
+ r500BuildFragmentProgramHwCode(c);
+ } else {
+ r300BuildFragmentProgramHwCode(c);
+ }
+
+ rc_constants_copy(&c->code->constants, &c->Base.Program.Constants);
+
+ if (c->Base.Debug) {
+ if (c->Base.is_r500) {
+ r500FragmentProgramDump(c->code);
+ } else {
+ r300FragmentProgramDump(c->code);
+ }
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
new file mode 100644
index 0000000000..507b2e532f
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -0,0 +1,651 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_compiler.h"
+
+#include <stdio.h>
+
+#include "../r300_reg.h"
+
+#include "radeon_dataflow.h"
+#include "radeon_program_alu.h"
+#include "radeon_swizzle.h"
+#include "radeon_emulate_branches.h"
+
+/*
+ * Take an already-setup and valid source then swizzle it appropriately to
+ * obtain a constant ZERO or ONE source.
+ */
+#define __CONST(x, y) \
+ (PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[x]), \
+ t_swizzle(y), \
+ t_swizzle(y), \
+ t_swizzle(y), \
+ t_swizzle(y), \
+ t_src_class(vpi->SrcReg[x].File), \
+ RC_MASK_NONE) | (vpi->SrcReg[x].RelAddr << 4))
+
+
+static unsigned long t_dst_mask(unsigned int mask)
+{
+ /* RC_MASK_* is equivalent to VSF_FLAG_* */
+ return mask & RC_MASK_XYZW;
+}
+
+static unsigned long t_dst_class(rc_register_file file)
+{
+ switch (file) {
+ default:
+ fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file);
+ /* fall-through */
+ case RC_FILE_TEMPORARY:
+ return PVS_DST_REG_TEMPORARY;
+ case RC_FILE_OUTPUT:
+ return PVS_DST_REG_OUT;
+ case RC_FILE_ADDRESS:
+ return PVS_DST_REG_A0;
+ }
+}
+
+static unsigned long t_dst_index(struct r300_vertex_program_code *vp,
+ struct rc_dst_register *dst)
+{
+ if (dst->File == RC_FILE_OUTPUT)
+ return vp->outputs[dst->Index];
+
+ return dst->Index;
+}
+
+static unsigned long t_src_class(rc_register_file file)
+{
+ switch (file) {
+ default:
+ fprintf(stderr, "%s: Bad register file %i\n", __FUNCTION__, file);
+ /* fall-through */
+ case RC_FILE_NONE:
+ case RC_FILE_TEMPORARY:
+ return PVS_SRC_REG_TEMPORARY;
+ case RC_FILE_INPUT:
+ return PVS_SRC_REG_INPUT;
+ case RC_FILE_CONSTANT:
+ return PVS_SRC_REG_CONSTANT;
+ }
+}
+
+static int t_src_conflict(struct rc_src_register a, struct rc_src_register b)
+{
+ unsigned long aclass = t_src_class(a.File);
+ unsigned long bclass = t_src_class(b.File);
+
+ if (aclass != bclass)
+ return 0;
+ if (aclass == PVS_SRC_REG_TEMPORARY)
+ return 0;
+
+ if (a.RelAddr || b.RelAddr)
+ return 1;
+ if (a.Index != b.Index)
+ return 1;
+
+ return 0;
+}
+
+static inline unsigned long t_swizzle(unsigned int swizzle)
+{
+ /* this is in fact a NOP as the Mesa RC_SWIZZLE_* are all identical to VSF_IN_COMPONENT_* */
+ return swizzle;
+}
+
+static unsigned long t_src_index(struct r300_vertex_program_code *vp,
+ struct rc_src_register *src)
+{
+ if (src->File == RC_FILE_INPUT) {
+ assert(vp->inputs[src->Index] != -1);
+ return vp->inputs[src->Index];
+ } else {
+ if (src->Index < 0) {
+ fprintf(stderr,
+ "negative offsets for indirect addressing do not work.\n");
+ return 0;
+ }
+ return src->Index;
+ }
+}
+
+/* these two functions should probably be merged... */
+
+static unsigned long t_src(struct r300_vertex_program_code *vp,
+ struct rc_src_register *src)
+{
+ /* src->Negate uses the RC_MASK_ flags from program_instruction.h,
+ * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
+ */
+ return PVS_SRC_OPERAND(t_src_index(vp, src),
+ t_swizzle(GET_SWZ(src->Swizzle, 0)),
+ t_swizzle(GET_SWZ(src->Swizzle, 1)),
+ t_swizzle(GET_SWZ(src->Swizzle, 2)),
+ t_swizzle(GET_SWZ(src->Swizzle, 3)),
+ t_src_class(src->File),
+ src->Negate) | (src->RelAddr << 4);
+}
+
+static unsigned long t_src_scalar(struct r300_vertex_program_code *vp,
+ struct rc_src_register *src)
+{
+ /* src->Negate uses the RC_MASK_ flags from program_instruction.h,
+ * which equal our VSF_FLAGS_ values, so it's safe to just pass it here.
+ */
+ return PVS_SRC_OPERAND(t_src_index(vp, src),
+ t_swizzle(GET_SWZ(src->Swizzle, 0)),
+ t_swizzle(GET_SWZ(src->Swizzle, 0)),
+ t_swizzle(GET_SWZ(src->Swizzle, 0)),
+ t_swizzle(GET_SWZ(src->Swizzle, 0)),
+ t_src_class(src->File),
+ src->Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
+ (src->RelAddr << 4);
+}
+
+static int valid_dst(struct r300_vertex_program_code *vp,
+ struct rc_dst_register *dst)
+{
+ if (dst->File == RC_FILE_OUTPUT && vp->outputs[dst->Index] == -1) {
+ return 0;
+ } else if (dst->File == RC_FILE_ADDRESS) {
+ assert(dst->Index == 0);
+ }
+
+ return 1;
+}
+
+static void ei_vector1(struct r300_vertex_program_code *vp,
+ unsigned int hw_opcode,
+ struct rc_sub_instruction *vpi,
+ unsigned int * inst)
+{
+ inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
+ 0,
+ 0,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &vpi->SrcReg[0]);
+ inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
+ inst[3] = __CONST(0, RC_SWIZZLE_ZERO);
+}
+
+static void ei_vector2(struct r300_vertex_program_code *vp,
+ unsigned int hw_opcode,
+ struct rc_sub_instruction *vpi,
+ unsigned int * inst)
+{
+ inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
+ 0,
+ 0,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src(vp, &vpi->SrcReg[0]);
+ inst[2] = t_src(vp, &vpi->SrcReg[1]);
+ inst[3] = __CONST(1, RC_SWIZZLE_ZERO);
+}
+
+static void ei_math1(struct r300_vertex_program_code *vp,
+ unsigned int hw_opcode,
+ struct rc_sub_instruction *vpi,
+ unsigned int * inst)
+{
+ inst[0] = PVS_OP_DST_OPERAND(hw_opcode,
+ 1,
+ 0,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
+ inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
+ inst[3] = __CONST(0, RC_SWIZZLE_ZERO);
+}
+
+static void ei_lit(struct r300_vertex_program_code *vp,
+ struct rc_sub_instruction *vpi,
+ unsigned int * inst)
+{
+ //LIT TMP 1.Y Z TMP 1{} {X W Z Y} TMP 1{} {Y W Z X} TMP 1{} {Y X Z W}
+
+ inst[0] = PVS_OP_DST_OPERAND(ME_LIGHT_COEFF_DX,
+ 1,
+ 0,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ /* NOTE: Users swizzling might not work. */
+ inst[1] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
+ t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
+ PVS_SRC_SELECT_FORCE_0, // Z
+ t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
+ t_src_class(vpi->SrcReg[0].File),
+ vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
+ (vpi->SrcReg[0].RelAddr << 4);
+ inst[2] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
+ t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
+ PVS_SRC_SELECT_FORCE_0, // Z
+ t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
+ t_src_class(vpi->SrcReg[0].File),
+ vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
+ (vpi->SrcReg[0].RelAddr << 4);
+ inst[3] = PVS_SRC_OPERAND(t_src_index(vp, &vpi->SrcReg[0]), t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 1)), // Y
+ t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 0)), // X
+ PVS_SRC_SELECT_FORCE_0, // Z
+ t_swizzle(GET_SWZ(vpi->SrcReg[0].Swizzle, 3)), // W
+ t_src_class(vpi->SrcReg[0].File),
+ vpi->SrcReg[0].Negate ? RC_MASK_XYZW : RC_MASK_NONE) |
+ (vpi->SrcReg[0].RelAddr << 4);
+}
+
+static void ei_mad(struct r300_vertex_program_code *vp,
+ struct rc_sub_instruction *vpi,
+ unsigned int * inst)
+{
+ /* Remarks about hardware limitations of MAD
+ * (please preserve this comment, as this information is _NOT_
+ * in the documentation provided by AMD).
+ *
+ * As described in the documentation, MAD with three unique temporary
+ * source registers requires the use of the macro version.
+ *
+ * However (and this is not mentioned in the documentation), apparently
+ * the macro version is _NOT_ a full superset of the normal version.
+ * In particular, the macro version does not always work when relative
+ * addressing is used in the source operands.
+ *
+ * This limitation caused incorrect rendering in Sauerbraten's OpenGL
+ * assembly shader path when using medium quality animations
+ * (i.e. animations with matrix blending instead of quaternion blending).
+ *
+ * Unfortunately, I (nha) have been unable to extract a Piglit regression
+ * test for this issue - for some reason, it is possible to have vertex
+ * programs whose prefix is *exactly* the same as the prefix of the
+ * offending program in Sauerbraten up to the offending instruction
+ * without causing any trouble.
+ *
+ * Bottom line: Only use the macro version only when really necessary;
+ * according to AMD docs, this should improve performance by one clock
+ * as a nice side bonus.
+ */
+ if (vpi->SrcReg[0].File == RC_FILE_TEMPORARY &&
+ vpi->SrcReg[1].File == RC_FILE_TEMPORARY &&
+ vpi->SrcReg[2].File == RC_FILE_TEMPORARY &&
+ vpi->SrcReg[0].Index != vpi->SrcReg[1].Index &&
+ vpi->SrcReg[0].Index != vpi->SrcReg[2].Index &&
+ vpi->SrcReg[1].Index != vpi->SrcReg[2].Index) {
+ inst[0] = PVS_OP_DST_OPERAND(PVS_MACRO_OP_2CLK_MADD,
+ 0,
+ 1,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ } else {
+ inst[0] = PVS_OP_DST_OPERAND(VE_MULTIPLY_ADD,
+ 0,
+ 0,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ }
+ inst[1] = t_src(vp, &vpi->SrcReg[0]);
+ inst[2] = t_src(vp, &vpi->SrcReg[1]);
+ inst[3] = t_src(vp, &vpi->SrcReg[2]);
+}
+
+static void ei_pow(struct r300_vertex_program_code *vp,
+ struct rc_sub_instruction *vpi,
+ unsigned int * inst)
+{
+ inst[0] = PVS_OP_DST_OPERAND(ME_POWER_FUNC_FF,
+ 1,
+ 0,
+ t_dst_index(vp, &vpi->DstReg),
+ t_dst_mask(vpi->DstReg.WriteMask),
+ t_dst_class(vpi->DstReg.File));
+ inst[1] = t_src_scalar(vp, &vpi->SrcReg[0]);
+ inst[2] = __CONST(0, RC_SWIZZLE_ZERO);
+ inst[3] = t_src_scalar(vp, &vpi->SrcReg[1]);
+}
+
+
+static void translate_vertex_program(struct r300_vertex_program_compiler * compiler)
+{
+ struct rc_instruction *rci;
+
+ compiler->code->pos_end = 0; /* Not supported yet */
+ compiler->code->length = 0;
+
+ compiler->SetHwInputOutput(compiler);
+
+ for(rci = compiler->Base.Program.Instructions.Next; rci != &compiler->Base.Program.Instructions; rci = rci->Next) {
+ struct rc_sub_instruction *vpi = &rci->U.I;
+ unsigned int *inst = compiler->code->body.d + compiler->code->length;
+
+ /* Skip instructions writing to non-existing destination */
+ if (!valid_dst(compiler->code, &vpi->DstReg))
+ continue;
+
+ if (compiler->code->length >= VSF_MAX_FRAGMENT_LENGTH) {
+ rc_error(&compiler->Base, "Vertex program has too many instructions\n");
+ return;
+ }
+
+ assert(compiler->Base.is_r500 ||
+ (vpi->Opcode != RC_OPCODE_SEQ &&
+ vpi->Opcode != RC_OPCODE_SNE));
+
+ switch (vpi->Opcode) {
+ case RC_OPCODE_ADD: ei_vector2(compiler->code, VE_ADD, vpi, inst); break;
+ case RC_OPCODE_ARL: ei_vector1(compiler->code, VE_FLT2FIX_DX, vpi, inst); break;
+ case RC_OPCODE_COS: ei_math1(compiler->code, ME_COS, vpi, inst); break;
+ case RC_OPCODE_DP4: ei_vector2(compiler->code, VE_DOT_PRODUCT, vpi, inst); break;
+ case RC_OPCODE_DST: ei_vector2(compiler->code, VE_DISTANCE_VECTOR, vpi, inst); break;
+ case RC_OPCODE_EX2: ei_math1(compiler->code, ME_EXP_BASE2_FULL_DX, vpi, inst); break;
+ case RC_OPCODE_EXP: ei_math1(compiler->code, ME_EXP_BASE2_DX, vpi, inst); break;
+ case RC_OPCODE_FRC: ei_vector1(compiler->code, VE_FRACTION, vpi, inst); break;
+ case RC_OPCODE_LG2: ei_math1(compiler->code, ME_LOG_BASE2_FULL_DX, vpi, inst); break;
+ case RC_OPCODE_LIT: ei_lit(compiler->code, vpi, inst); break;
+ case RC_OPCODE_LOG: ei_math1(compiler->code, ME_LOG_BASE2_DX, vpi, inst); break;
+ case RC_OPCODE_MAD: ei_mad(compiler->code, vpi, inst); break;
+ case RC_OPCODE_MAX: ei_vector2(compiler->code, VE_MAXIMUM, vpi, inst); break;
+ case RC_OPCODE_MIN: ei_vector2(compiler->code, VE_MINIMUM, vpi, inst); break;
+ case RC_OPCODE_MOV: ei_vector1(compiler->code, VE_ADD, vpi, inst); break;
+ case RC_OPCODE_MUL: ei_vector2(compiler->code, VE_MULTIPLY, vpi, inst); break;
+ case RC_OPCODE_POW: ei_pow(compiler->code, vpi, inst); break;
+ case RC_OPCODE_RCP: ei_math1(compiler->code, ME_RECIP_DX, vpi, inst); break;
+ case RC_OPCODE_RSQ: ei_math1(compiler->code, ME_RECIP_SQRT_DX, vpi, inst); break;
+ case RC_OPCODE_SEQ: ei_vector2(compiler->code, VE_SET_EQUAL, vpi, inst); break;
+ case RC_OPCODE_SGE: ei_vector2(compiler->code, VE_SET_GREATER_THAN_EQUAL, vpi, inst); break;
+ case RC_OPCODE_SIN: ei_math1(compiler->code, ME_SIN, vpi, inst); break;
+ case RC_OPCODE_SLT: ei_vector2(compiler->code, VE_SET_LESS_THAN, vpi, inst); break;
+ case RC_OPCODE_SNE: ei_vector2(compiler->code, VE_SET_NOT_EQUAL, vpi, inst); break;
+ default:
+ rc_error(&compiler->Base, "Unknown opcode %s\n", rc_get_opcode_info(vpi->Opcode)->Name);
+ return;
+ }
+
+ compiler->code->length += 4;
+
+ if (compiler->Base.Error)
+ return;
+ }
+}
+
+struct temporary_allocation {
+ unsigned int Allocated:1;
+ unsigned int HwTemp:15;
+ struct rc_instruction * LastRead;
+};
+
+static void allocate_temporary_registers(struct r300_vertex_program_compiler * compiler)
+{
+ struct rc_instruction *inst;
+ unsigned int num_orig_temps = 0;
+ char hwtemps[VSF_MAX_FRAGMENT_TEMPS];
+ struct temporary_allocation * ta;
+ unsigned int i, j;
+
+ compiler->code->num_temporaries = 0;
+ memset(hwtemps, 0, sizeof(hwtemps));
+
+ /* Pass 1: Count original temporaries and allocate structures */
+ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ for (i = 0; i < opcode->NumSrcRegs; ++i) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
+ if (inst->U.I.SrcReg[i].Index >= num_orig_temps)
+ num_orig_temps = inst->U.I.SrcReg[i].Index + 1;
+ }
+ }
+
+ if (opcode->HasDstReg) {
+ if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) {
+ if (inst->U.I.DstReg.Index >= num_orig_temps)
+ num_orig_temps = inst->U.I.DstReg.Index + 1;
+ }
+ }
+ }
+
+ ta = (struct temporary_allocation*)memory_pool_malloc(&compiler->Base.Pool,
+ sizeof(struct temporary_allocation) * num_orig_temps);
+ memset(ta, 0, sizeof(struct temporary_allocation) * num_orig_temps);
+
+ /* Pass 2: Determine original temporary lifetimes */
+ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ for (i = 0; i < opcode->NumSrcRegs; ++i) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY)
+ ta[inst->U.I.SrcReg[i].Index].LastRead = inst;
+ }
+ }
+
+ /* Pass 3: Register allocation */
+ for(inst = compiler->Base.Program.Instructions.Next; inst != &compiler->Base.Program.Instructions; inst = inst->Next) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ for (i = 0; i < opcode->NumSrcRegs; ++i) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
+ unsigned int orig = inst->U.I.SrcReg[i].Index;
+ inst->U.I.SrcReg[i].Index = ta[orig].HwTemp;
+
+ if (ta[orig].Allocated && inst == ta[orig].LastRead)
+ hwtemps[ta[orig].HwTemp] = 0;
+ }
+ }
+
+ if (opcode->HasDstReg) {
+ if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY) {
+ unsigned int orig = inst->U.I.DstReg.Index;
+
+ if (!ta[orig].Allocated) {
+ for(j = 0; j < VSF_MAX_FRAGMENT_TEMPS; ++j) {
+ if (!hwtemps[j])
+ break;
+ }
+ if (j >= VSF_MAX_FRAGMENT_TEMPS) {
+ fprintf(stderr, "Out of hw temporaries\n");
+ } else {
+ ta[orig].Allocated = 1;
+ ta[orig].HwTemp = j;
+ hwtemps[j] = 1;
+
+ if (j >= compiler->code->num_temporaries)
+ compiler->code->num_temporaries = j + 1;
+ }
+ }
+
+ inst->U.I.DstReg.Index = ta[orig].HwTemp;
+ }
+ }
+ }
+}
+
+
+/**
+ * Vertex engine cannot read two inputs or two constants at the same time.
+ * Introduce intermediate MOVs to temporary registers to account for this.
+ */
+static int transform_source_conflicts(
+ struct radeon_compiler *c,
+ struct rc_instruction* inst,
+ void* unused)
+{
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ if (opcode->NumSrcRegs == 3) {
+ if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[2])
+ || t_src_conflict(inst->U.I.SrcReg[0], inst->U.I.SrcReg[2])) {
+ int tmpreg = rc_find_free_temporary(c);
+ struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.DstReg.Index = tmpreg;
+ inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
+
+ reset_srcreg(&inst->U.I.SrcReg[2]);
+ inst->U.I.SrcReg[2].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[2].Index = tmpreg;
+ }
+ }
+
+ if (opcode->NumSrcRegs >= 2) {
+ if (t_src_conflict(inst->U.I.SrcReg[1], inst->U.I.SrcReg[0])) {
+ int tmpreg = rc_find_free_temporary(c);
+ struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.DstReg.Index = tmpreg;
+ inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
+
+ reset_srcreg(&inst->U.I.SrcReg[1]);
+ inst->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[1].Index = tmpreg;
+ }
+ }
+
+ return 1;
+}
+
+static void addArtificialOutputs(struct r300_vertex_program_compiler * compiler)
+{
+ int i;
+
+ for(i = 0; i < 32; ++i) {
+ if ((compiler->RequiredOutputs & (1 << i)) &&
+ !(compiler->Base.Program.OutputsWritten & (1 << i))) {
+ struct rc_instruction * inst = rc_insert_new_instruction(&compiler->Base, compiler->Base.Program.Instructions.Prev);
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+
+ inst->U.I.DstReg.File = RC_FILE_OUTPUT;
+ inst->U.I.DstReg.Index = i;
+ inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+
+ inst->U.I.SrcReg[0].File = RC_FILE_CONSTANT;
+ inst->U.I.SrcReg[0].Index = 0;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
+
+ compiler->Base.Program.OutputsWritten |= 1 << i;
+ }
+ }
+}
+
+static void dataflow_outputs_mark_used(void * userdata, void * data,
+ void (*callback)(void *, unsigned int, unsigned int))
+{
+ struct r300_vertex_program_compiler * c = userdata;
+ int i;
+
+ for(i = 0; i < 32; ++i) {
+ if (c->RequiredOutputs & (1 << i))
+ callback(data, i, RC_MASK_XYZW);
+ }
+}
+
+static int swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
+{
+ (void) opcode;
+ (void) reg;
+
+ return 1;
+}
+
+static void debug_program_log(struct r300_vertex_program_compiler* c, const char * where)
+{
+ if (c->Base.Debug) {
+ fprintf(stderr, "Vertex Program: %s\n", where);
+ rc_print_program(&c->Base.Program);
+ }
+}
+
+
+static struct rc_swizzle_caps r300_vertprog_swizzle_caps = {
+ .IsNative = &swizzle_is_native,
+ .Split = 0 /* should never be called */
+};
+
+
+void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
+{
+ compiler->Base.SwizzleCaps = &r300_vertprog_swizzle_caps;
+
+ addArtificialOutputs(compiler);
+
+ debug_program_log(compiler, "before compilation");
+
+ /* XXX Ideally this should be done only for r3xx, but since
+ * we don't have branching support for r5xx, we use the emulation
+ * on all chipsets. */
+ rc_emulate_branches(&compiler->Base);
+
+ debug_program_log(compiler, "after emulate branches");
+
+ {
+ struct radeon_program_transformation transformations[] = {
+ { &r300_transform_vertex_alu, 0 },
+ { &r300_transform_trig_scale_vertex, 0 }
+ };
+ radeonLocalTransform(&compiler->Base, 2, transformations);
+ }
+
+ debug_program_log(compiler, "after native rewrite");
+
+ {
+ /* Note: This pass has to be done seperately from ALU rewrite,
+ * otherwise non-native ALU instructions with source conflits
+ * will not be treated properly.
+ */
+ struct radeon_program_transformation transformations[] = {
+ { &transform_source_conflicts, 0 },
+ };
+ radeonLocalTransform(&compiler->Base, 1, transformations);
+ }
+
+ debug_program_log(compiler, "after source conflict resolve");
+
+ rc_dataflow_deadcode(&compiler->Base, &dataflow_outputs_mark_used, compiler);
+
+ debug_program_log(compiler, "after deadcode");
+
+ rc_dataflow_swizzles(&compiler->Base);
+
+ allocate_temporary_registers(compiler);
+
+ debug_program_log(compiler, "after dataflow");
+
+ translate_vertex_program(compiler);
+
+ rc_constants_copy(&compiler->code->constants, &compiler->Base.Program.Constants);
+
+ compiler->code->InputsRead = compiler->Base.Program.InputsRead;
+ compiler->code->OutputsWritten = compiler->Base.Program.OutputsWritten;
+
+ if (compiler->Base.Debug) {
+ fprintf(stderr, "Final vertex program code:\n");
+ r300_vertex_program_dump(compiler->code);
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c
new file mode 100644
index 0000000000..5800f1a78e
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog_dump.c
@@ -0,0 +1,180 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_code.h"
+
+#include <stdio.h>
+
+static char* r300_vs_ve_ops[] = {
+ /* R300 vector ops */
+ " VE_NO_OP",
+ " VE_DOT_PRODUCT",
+ " VE_MULTIPLY",
+ " VE_ADD",
+ " VE_MULTIPLY_ADD",
+ " VE_DISTANCE_FACTOR",
+ " VE_FRACTION",
+ " VE_MAXIMUM",
+ " VE_MINIMUM",
+ "VE_SET_GREATER_THAN_EQUAL",
+ " VE_SET_LESS_THAN",
+ " VE_MULTIPLYX2_ADD",
+ " VE_MULTIPLY_CLAMP",
+ " VE_FLT2FIX_DX",
+ " VE_FLT2FIX_DX_RND",
+ /* R500 vector ops */
+ " VE_PRED_SET_EQ_PUSH",
+ " VE_PRED_SET_GT_PUSH",
+ " VE_PRED_SET_GTE_PUSH",
+ " VE_PRED_SET_NEQ_PUSH",
+ " VE_COND_WRITE_EQ",
+ " VE_COND_WRITE_GT",
+ " VE_COND_WRITE_GTE",
+ " VE_COND_WRITE_NEQ",
+ " VE_COND_MUX_EQ",
+ " VE_COND_MUX_GT",
+ " VE_COND_MUX_GTE",
+ " VE_SET_GREATER_THAN",
+ " VE_SET_EQUAL",
+ " VE_SET_NOT_EQUAL",
+ " (reserved)",
+ " (reserved)",
+ " (reserved)",
+};
+
+static char* r300_vs_me_ops[] = {
+ /* R300 math ops */
+ " ME_NO_OP",
+ " ME_EXP_BASE2_DX",
+ " ME_LOG_BASE2_DX",
+ " ME_EXP_BASEE_FF",
+ " ME_LIGHT_COEFF_DX",
+ " ME_POWER_FUNC_FF",
+ " ME_RECIP_DX",
+ " ME_RECIP_FF",
+ " ME_RECIP_SQRT_DX",
+ " ME_RECIP_SQRT_FF",
+ " ME_MULTIPLY",
+ " ME_EXP_BASE2_FULL_DX",
+ " ME_LOG_BASE2_FULL_DX",
+ " ME_POWER_FUNC_FF_CLAMP_B",
+ "ME_POWER_FUNC_FF_CLAMP_B1",
+ "ME_POWER_FUNC_FF_CLAMP_01",
+ " ME_SIN",
+ " ME_COS",
+ /* R500 math ops */
+ " ME_LOG_BASE2_IEEE",
+ " ME_RECIP_IEEE",
+ " ME_RECIP_SQRT_IEEE",
+ " ME_PRED_SET_EQ",
+ " ME_PRED_SET_GT",
+ " ME_PRED_SET_GTE",
+ " ME_PRED_SET_NEQ",
+ " ME_PRED_SET_CLR",
+ " ME_PRED_SET_INV",
+ " ME_PRED_SET_POP",
+ " ME_PRED_SET_RESTORE",
+ " (reserved)",
+ " (reserved)",
+ " (reserved)",
+};
+
+/* XXX refactor to avoid clashing symbols */
+static char* r300_vs_src_debug[] = {
+ "t",
+ "i",
+ "c",
+ "a",
+};
+
+static char* r300_vs_dst_debug[] = {
+ "t",
+ "a0",
+ "o",
+ "ox",
+ "a",
+ "i",
+ "u",
+ "u",
+};
+
+static char* r300_vs_swiz_debug[] = {
+ "X",
+ "Y",
+ "Z",
+ "W",
+ "0",
+ "1",
+ "U",
+ "U",
+};
+
+
+static void r300_vs_op_dump(uint32_t op)
+{
+ fprintf(stderr, " dst: %d%s op: ",
+ (op >> 13) & 0x7f, r300_vs_dst_debug[(op >> 8) & 0x7]);
+ if (op & 0x80) {
+ if (op & 0x1) {
+ fprintf(stderr, "PVS_MACRO_OP_2CLK_M2X_ADD\n");
+ } else {
+ fprintf(stderr, " PVS_MACRO_OP_2CLK_MADD\n");
+ }
+ } else if (op & 0x40) {
+ fprintf(stderr, "%s\n", r300_vs_me_ops[op & 0x1f]);
+ } else {
+ fprintf(stderr, "%s\n", r300_vs_ve_ops[op & 0x1f]);
+ }
+}
+
+static void r300_vs_src_dump(uint32_t src)
+{
+ fprintf(stderr, " reg: %d%s swiz: %s%s/%s%s/%s%s/%s%s\n",
+ (src >> 5) & 0xff, r300_vs_src_debug[src & 0x3],
+ src & (1 << 25) ? "-" : " ",
+ r300_vs_swiz_debug[(src >> 13) & 0x7],
+ src & (1 << 26) ? "-" : " ",
+ r300_vs_swiz_debug[(src >> 16) & 0x7],
+ src & (1 << 27) ? "-" : " ",
+ r300_vs_swiz_debug[(src >> 19) & 0x7],
+ src & (1 << 28) ? "-" : " ",
+ r300_vs_swiz_debug[(src >> 22) & 0x7]);
+}
+
+void r300_vertex_program_dump(struct r300_vertex_program_code * vs)
+{
+ unsigned instrcount = vs->length / 4;
+ unsigned i;
+
+ for(i = 0; i < instrcount; i++) {
+ unsigned offset = i*4;
+ unsigned src;
+
+ fprintf(stderr, "%d: op: 0x%08x", i, vs->body.d[offset]);
+ r300_vs_op_dump(vs->body.d[offset]);
+
+ for(src = 0; src < 3; ++src) {
+ fprintf(stderr, " src%i: 0x%08x", src, vs->body.d[offset+1+src]);
+ r300_vs_src_dump(vs->body.d[offset+1+src]);
+ }
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
new file mode 100644
index 0000000000..632f0bcf4f
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
@@ -0,0 +1,344 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "r500_fragprog.h"
+
+#include <stdio.h>
+
+#include "../r300_reg.h"
+
+/**
+ * Rewrite IF instructions to use the ALU result special register.
+ */
+int r500_transform_IF(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
+ void* data)
+{
+ if (inst->U.I.Opcode != RC_OPCODE_IF)
+ return 0;
+
+ struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg.WriteMask = 0;
+ inst_mov->U.I.WriteALUResult = RC_ALURESULT_W;
+ inst_mov->U.I.ALUResultCompare = RC_COMPARE_FUNC_NOTEQUAL;
+ inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+ inst_mov->U.I.SrcReg[0].Swizzle = combine_swizzles4(inst_mov->U.I.SrcReg[0].Swizzle,
+ RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_UNUSED, RC_SWIZZLE_X);
+
+ inst->U.I.SrcReg[0].File = RC_FILE_SPECIAL;
+ inst->U.I.SrcReg[0].Index = RC_SPECIAL_ALU_RESULT;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
+ inst->U.I.SrcReg[0].Negate = 0;
+
+ return 1;
+}
+
+static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
+{
+ unsigned int relevant;
+ int i;
+
+ if (opcode == RC_OPCODE_TEX ||
+ opcode == RC_OPCODE_TXB ||
+ opcode == RC_OPCODE_TXP ||
+ opcode == RC_OPCODE_KIL) {
+ if (reg.Abs)
+ return 0;
+
+ if (opcode == RC_OPCODE_KIL && (reg.Swizzle != RC_SWIZZLE_XYZW || reg.Negate != RC_MASK_NONE))
+ return 0;
+
+ if (reg.Negate)
+ reg.Negate ^= RC_MASK_XYZW;
+
+ for(i = 0; i < 4; ++i) {
+ unsigned int swz = GET_SWZ(reg.Swizzle, i);
+ if (swz == RC_SWIZZLE_UNUSED) {
+ reg.Negate &= ~(1 << i);
+ continue;
+ }
+ if (swz >= 4)
+ return 0;
+ }
+
+ if (reg.Negate)
+ return 0;
+
+ return 1;
+ } else if (opcode == RC_OPCODE_DDX || opcode == RC_OPCODE_DDY) {
+ /* DDX/MDH and DDY/MDV explicitly ignore incoming swizzles;
+ * if it doesn't fit perfectly into a .xyzw case... */
+ if (reg.Swizzle == RC_SWIZZLE_XYZW && !reg.Abs && !reg.Negate)
+ return 1;
+
+ return 0;
+ } else {
+ /* ALU instructions support almost everything */
+ if (reg.Abs)
+ return 1;
+
+ relevant = 0;
+ for(i = 0; i < 3; ++i) {
+ unsigned int swz = GET_SWZ(reg.Swizzle, i);
+ if (swz != RC_SWIZZLE_UNUSED && swz != RC_SWIZZLE_ZERO)
+ relevant |= 1 << i;
+ }
+ if ((reg.Negate & relevant) && ((reg.Negate & relevant) != relevant))
+ return 0;
+
+ return 1;
+ }
+}
+
+/**
+ * Split source register access.
+ *
+ * The only thing we *cannot* do in an ALU instruction is per-component
+ * negation.
+ */
+static void r500_swizzle_split(struct rc_src_register src, unsigned int usemask,
+ struct rc_swizzle_split * split)
+{
+ unsigned int negatebase[2] = { 0, 0 };
+ int i;
+
+ for(i = 0; i < 4; ++i) {
+ unsigned int swz = GET_SWZ(src.Swizzle, i);
+ if (swz == RC_SWIZZLE_UNUSED || !GET_BIT(usemask, i))
+ continue;
+ negatebase[GET_BIT(src.Negate, i)] |= 1 << i;
+ }
+
+ split->NumPhases = 0;
+
+ for(i = 0; i <= 1; ++i) {
+ if (!negatebase[i])
+ continue;
+
+ split->Phase[split->NumPhases++] = negatebase[i];
+ }
+}
+
+struct rc_swizzle_caps r500_swizzle_caps = {
+ .IsNative = r500_swizzle_is_native,
+ .Split = r500_swizzle_split
+};
+
+static char *toswiz(int swiz_val) {
+ switch(swiz_val) {
+ case 0: return "R";
+ case 1: return "G";
+ case 2: return "B";
+ case 3: return "A";
+ case 4: return "0";
+ case 5: return "H";
+ case 6: return "1";
+ case 7: return "U";
+ }
+ return NULL;
+}
+
+static char *toop(int op_val)
+{
+ char *str = NULL;
+ switch (op_val) {
+ case 0: str = "MAD"; break;
+ case 1: str = "DP3"; break;
+ case 2: str = "DP4"; break;
+ case 3: str = "D2A"; break;
+ case 4: str = "MIN"; break;
+ case 5: str = "MAX"; break;
+ case 6: str = "Reserved"; break;
+ case 7: str = "CND"; break;
+ case 8: str = "CMP"; break;
+ case 9: str = "FRC"; break;
+ case 10: str = "SOP"; break;
+ case 11: str = "MDH"; break;
+ case 12: str = "MDV"; break;
+ }
+ return str;
+}
+
+static char *to_alpha_op(int op_val)
+{
+ char *str = NULL;
+ switch (op_val) {
+ case 0: str = "MAD"; break;
+ case 1: str = "DP"; break;
+ case 2: str = "MIN"; break;
+ case 3: str = "MAX"; break;
+ case 4: str = "Reserved"; break;
+ case 5: str = "CND"; break;
+ case 6: str = "CMP"; break;
+ case 7: str = "FRC"; break;
+ case 8: str = "EX2"; break;
+ case 9: str = "LN2"; break;
+ case 10: str = "RCP"; break;
+ case 11: str = "RSQ"; break;
+ case 12: str = "SIN"; break;
+ case 13: str = "COS"; break;
+ case 14: str = "MDH"; break;
+ case 15: str = "MDV"; break;
+ }
+ return str;
+}
+
+static char *to_mask(int val)
+{
+ char *str = NULL;
+ switch(val) {
+ case 0: str = "NONE"; break;
+ case 1: str = "R"; break;
+ case 2: str = "G"; break;
+ case 3: str = "RG"; break;
+ case 4: str = "B"; break;
+ case 5: str = "RB"; break;
+ case 6: str = "GB"; break;
+ case 7: str = "RGB"; break;
+ case 8: str = "A"; break;
+ case 9: str = "AR"; break;
+ case 10: str = "AG"; break;
+ case 11: str = "ARG"; break;
+ case 12: str = "AB"; break;
+ case 13: str = "ARB"; break;
+ case 14: str = "AGB"; break;
+ case 15: str = "ARGB"; break;
+ }
+ return str;
+}
+
+static char *to_texop(int val)
+{
+ switch(val) {
+ case 0: return "NOP";
+ case 1: return "LD";
+ case 2: return "TEXKILL";
+ case 3: return "PROJ";
+ case 4: return "LODBIAS";
+ case 5: return "LOD";
+ case 6: return "DXDY";
+ }
+ return NULL;
+}
+
+void r500FragmentProgramDump(struct rX00_fragment_program_code *c)
+{
+ struct r500_fragment_program_code *code = &c->code.r500;
+ fprintf(stderr, "R500 Fragment Program:\n--------\n");
+
+ int n;
+ uint32_t inst;
+ uint32_t inst0;
+ char *str = NULL;
+
+ for (n = 0; n < code->inst_end+1; n++) {
+ inst0 = inst = code->inst[n].inst0;
+ fprintf(stderr,"%d\t0:CMN_INST 0x%08x:", n, inst);
+ switch(inst & 0x3) {
+ case R500_INST_TYPE_ALU: str = "ALU"; break;
+ case R500_INST_TYPE_OUT: str = "OUT"; break;
+ case R500_INST_TYPE_FC: str = "FC"; break;
+ case R500_INST_TYPE_TEX: str = "TEX"; break;
+ };
+ fprintf(stderr,"%s %s %s %s %s ", str,
+ inst & R500_INST_TEX_SEM_WAIT ? "TEX_WAIT" : "",
+ inst & R500_INST_LAST ? "LAST" : "",
+ inst & R500_INST_NOP ? "NOP" : "",
+ inst & R500_INST_ALU_WAIT ? "ALU WAIT" : "");
+ fprintf(stderr,"wmask: %s omask: %s\n", to_mask((inst >> 11) & 0xf),
+ to_mask((inst >> 15) & 0xf));
+
+ switch(inst0 & 0x3) {
+ case 0:
+ case 1:
+ fprintf(stderr,"\t1:RGB_ADDR 0x%08x:", code->inst[n].inst1);
+ inst = code->inst[n].inst1;
+
+ fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
+ inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
+ (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
+ (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
+ (inst >> 30));
+
+ fprintf(stderr,"\t2:ALPHA_ADDR 0x%08x:", code->inst[n].inst2);
+ inst = code->inst[n].inst2;
+ fprintf(stderr,"Addr0: %d%c, Addr1: %d%c, Addr2: %d%c, srcp:%d\n",
+ inst & 0xff, (inst & (1<<8)) ? 'c' : 't',
+ (inst >> 10) & 0xff, (inst & (1<<18)) ? 'c' : 't',
+ (inst >> 20) & 0xff, (inst & (1<<28)) ? 'c' : 't',
+ (inst >> 30));
+ fprintf(stderr,"\t3 RGB_INST: 0x%08x:", code->inst[n].inst3);
+ inst = code->inst[n].inst3;
+ fprintf(stderr,"rgb_A_src:%d %s/%s/%s %d rgb_B_src:%d %s/%s/%s %d targ: %d\n",
+ (inst) & 0x3, toswiz((inst >> 2) & 0x7), toswiz((inst >> 5) & 0x7), toswiz((inst >> 8) & 0x7),
+ (inst >> 11) & 0x3,
+ (inst >> 13) & 0x3, toswiz((inst >> 15) & 0x7), toswiz((inst >> 18) & 0x7), toswiz((inst >> 21) & 0x7),
+ (inst >> 24) & 0x3, (inst >> 29) & 0x3);
+
+
+ fprintf(stderr,"\t4 ALPHA_INST:0x%08x:", code->inst[n].inst4);
+ inst = code->inst[n].inst4;
+ fprintf(stderr,"%s dest:%d%s alp_A_src:%d %s %d alp_B_src:%d %s %d targ %d w:%d\n", to_alpha_op(inst & 0xf),
+ (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
+ (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), (inst >> 17) & 0x3,
+ (inst >> 19) & 0x3, toswiz((inst >> 21) & 0x7), (inst >> 24) & 0x3,
+ (inst >> 29) & 0x3,
+ (inst >> 31) & 0x1);
+
+ fprintf(stderr,"\t5 RGBA_INST: 0x%08x:", code->inst[n].inst5);
+ inst = code->inst[n].inst5;
+ fprintf(stderr,"%s dest:%d%s rgb_C_src:%d %s/%s/%s %d alp_C_src:%d %s %d\n", toop(inst & 0xf),
+ (inst >> 4) & 0x7f, inst & (1<<11) ? "(rel)":"",
+ (inst >> 12) & 0x3, toswiz((inst >> 14) & 0x7), toswiz((inst >> 17) & 0x7), toswiz((inst >> 20) & 0x7),
+ (inst >> 23) & 0x3,
+ (inst >> 25) & 0x3, toswiz((inst >> 27) & 0x7), (inst >> 30) & 0x3);
+ break;
+ case 2:
+ break;
+ case 3:
+ inst = code->inst[n].inst1;
+ fprintf(stderr,"\t1:TEX_INST: 0x%08x: id: %d op:%s, %s, %s %s\n", inst, (inst >> 16) & 0xf,
+ to_texop((inst >> 22) & 0x7), (inst & (1<<25)) ? "ACQ" : "",
+ (inst & (1<<26)) ? "IGNUNC" : "", (inst & (1<<27)) ? "UNSCALED" : "SCALED");
+ inst = code->inst[n].inst2;
+ fprintf(stderr,"\t2:TEX_ADDR: 0x%08x: src: %d%s %s/%s/%s/%s dst: %d%s %s/%s/%s/%s\n", inst,
+ inst & 127, inst & (1<<7) ? "(rel)" : "",
+ toswiz((inst >> 8) & 0x3), toswiz((inst >> 10) & 0x3),
+ toswiz((inst >> 12) & 0x3), toswiz((inst >> 14) & 0x3),
+ (inst >> 16) & 127, inst & (1<<23) ? "(rel)" : "",
+ toswiz((inst >> 24) & 0x3), toswiz((inst >> 26) & 0x3),
+ toswiz((inst >> 28) & 0x3), toswiz((inst >> 30) & 0x3));
+
+ fprintf(stderr,"\t3:TEX_DXDY: 0x%08x\n", code->inst[n].inst3);
+ break;
+ }
+ fprintf(stderr,"\n");
+ }
+
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
new file mode 100644
index 0000000000..4efbae7ba6
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/*
+ * Authors:
+ * Ben Skeggs <darktama@iinet.net.au>
+ * Jerome Glisse <j.glisse@gmail.com>
+ */
+#ifndef __R500_FRAGPROG_H_
+#define __R500_FRAGPROG_H_
+
+#include "radeon_compiler.h"
+#include "radeon_swizzle.h"
+
+extern void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler);
+
+extern void r500FragmentProgramDump(struct rX00_fragment_program_code *c);
+
+extern struct rc_swizzle_caps r500_swizzle_caps;
+
+extern int r500_transform_IF(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
+ void* data);
+
+#endif
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
new file mode 100644
index 0000000000..fb2d8b5a9c
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
@@ -0,0 +1,495 @@
+/*
+ * Copyright (C) 2005 Ben Skeggs.
+ *
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file
+ *
+ * \author Ben Skeggs <darktama@iinet.net.au>
+ *
+ * \author Jerome Glisse <j.glisse@gmail.com>
+ *
+ * \author Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ */
+
+#include "r500_fragprog.h"
+
+#include "../r300_reg.h"
+
+#include "radeon_program_pair.h"
+
+
+#define PROG_CODE \
+ struct r500_fragment_program_code *code = &c->code->code.r500
+
+#define error(fmt, args...) do { \
+ rc_error(&c->Base, "%s::%s(): " fmt "\n", \
+ __FILE__, __FUNCTION__, ##args); \
+ } while(0)
+
+
+struct branch_info {
+ int If;
+ int Else;
+ int Endif;
+};
+
+struct emit_state {
+ struct radeon_compiler * C;
+ struct r500_fragment_program_code * Code;
+
+ struct branch_info * Branches;
+ unsigned int CurrentBranchDepth;
+ unsigned int BranchesReserved;
+
+ unsigned int MaxBranchDepth;
+};
+
+static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
+{
+ switch(opcode) {
+ case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
+ case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
+ case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
+ case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
+ case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;
+ case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;
+ default:
+ error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
+ /* fall through */
+ case RC_OPCODE_NOP:
+ /* fall through */
+ case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD;
+ case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX;
+ case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN;
+ case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP;
+ }
+}
+
+static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
+{
+ switch(opcode) {
+ case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP;
+ case RC_OPCODE_COS: return R500_ALPHA_OP_COS;
+ case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH;
+ case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV;
+ case RC_OPCODE_DP3: return R500_ALPHA_OP_DP;
+ case RC_OPCODE_DP4: return R500_ALPHA_OP_DP;
+ case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2;
+ case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC;
+ case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2;
+ default:
+ error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
+ /* fall through */
+ case RC_OPCODE_NOP:
+ /* fall through */
+ case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD;
+ case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX;
+ case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN;
+ case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP;
+ case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ;
+ case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN;
+ }
+}
+
+static unsigned int fix_hw_swizzle(unsigned int swz)
+{
+ switch (swz) {
+ case RC_SWIZZLE_ZERO:
+ case RC_SWIZZLE_UNUSED:
+ swz = 4;
+ break;
+ case RC_SWIZZLE_HALF:
+ swz = 5;
+ break;
+ case RC_SWIZZLE_ONE:
+ swz = 6;
+ break;
+ }
+
+ return swz;
+}
+
+static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg)
+{
+ unsigned int t = inst->RGB.Arg[arg].Source;
+ int comp;
+ t |= inst->RGB.Arg[arg].Negate << 11;
+ t |= inst->RGB.Arg[arg].Abs << 12;
+
+ for(comp = 0; comp < 3; ++comp)
+ t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2);
+
+ return t;
+}
+
+static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i)
+{
+ unsigned int t = inst->Alpha.Arg[i].Source;
+ t |= fix_hw_swizzle(inst->Alpha.Arg[i].Swizzle) << 2;
+ t |= inst->Alpha.Arg[i].Negate << 5;
+ t |= inst->Alpha.Arg[i].Abs << 6;
+ return t;
+}
+
+static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func)
+{
+ switch(func) {
+ case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ;
+ case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT;
+ case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE;
+ case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE;
+ default:
+ rc_error(&c->Base, "%s: unsupported compare func %i\n", __FUNCTION__, func);
+ return 0;
+ }
+}
+
+static void use_temporary(struct r500_fragment_program_code* code, unsigned int index)
+{
+ if (index > code->max_temp_idx)
+ code->max_temp_idx = index;
+}
+
+static unsigned int use_source(struct r500_fragment_program_code* code, struct radeon_pair_instruction_source src)
+{
+ if (src.File == RC_FILE_CONSTANT) {
+ return src.Index | 0x100;
+ } else if (src.File == RC_FILE_TEMPORARY) {
+ use_temporary(code, src.Index);
+ return src.Index;
+ }
+
+ return 0;
+}
+
+/**
+ * NOP the specified instruction if it is not a texture lookup.
+ */
+static void alu_nop(struct r300_fragment_program_compiler *c, int ip)
+{
+ PROG_CODE;
+
+ if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) {
+ code->inst[ip].inst0 |= R500_INST_NOP;
+ }
+}
+
+/**
+ * Emit a paired ALU instruction.
+ */
+static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst)
+{
+ PROG_CODE;
+
+ if (code->inst_end >= 511) {
+ error("emit_alu: Too many instructions");
+ return;
+ }
+
+ int ip = ++code->inst_end;
+
+ /* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */
+ if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX ||
+ inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) {
+ if (ip > 0) {
+ alu_nop(c, ip - 1);
+ }
+ }
+
+ code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode);
+ code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode);
+
+ if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) {
+ code->inst[ip].inst0 = R500_INST_TYPE_OUT;
+ if (inst->WriteALUResult) {
+ error("%s: cannot write output and ALU result at the same time");
+ return;
+ }
+ } else {
+ code->inst[ip].inst0 = R500_INST_TYPE_ALU;
+ }
+ code->inst[ip].inst0 |= R500_INST_TEX_SEM_WAIT;
+
+ code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11) | (inst->Alpha.WriteMask << 14);
+ code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
+ if (inst->Alpha.DepthWriteMask) {
+ code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
+ c->code->writes_depth = 1;
+ }
+
+ code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex);
+ code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex);
+ use_temporary(code, inst->Alpha.DestIndex);
+ use_temporary(code, inst->RGB.DestIndex);
+
+ if (inst->RGB.Saturate)
+ code->inst[ip].inst0 |= R500_INST_RGB_CLAMP;
+ if (inst->Alpha.Saturate)
+ code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP;
+
+ code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0]));
+ code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1]));
+ code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2]));
+
+ code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0]));
+ code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1]));
+ code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2]));
+
+ code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT;
+ code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT;
+ code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT;
+
+ code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT;
+ code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT;
+ code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT;
+
+ code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target);
+ code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target);
+
+ if (inst->WriteALUResult) {
+ code->inst[ip].inst3 |= R500_ALU_RGB_WMASK;
+
+ if (inst->WriteALUResult == RC_ALURESULT_X)
+ code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED;
+ else
+ code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA;
+
+ code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare);
+ }
+}
+
+static unsigned int translate_strq_swizzle(unsigned int swizzle)
+{
+ unsigned int swiz = 0;
+ int i;
+ for (i = 0; i < 4; i++)
+ swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2;
+ return swiz;
+}
+
+/**
+ * Emit a single TEX instruction
+ */
+static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
+{
+ PROG_CODE;
+
+ if (code->inst_end >= 511) {
+ error("emit_tex: Too many instructions");
+ return 0;
+ }
+
+ int ip = ++code->inst_end;
+
+ code->inst[ip].inst0 = R500_INST_TYPE_TEX
+ | (inst->DstReg.WriteMask << 11)
+ | R500_INST_TEX_SEM_WAIT;
+ code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
+ | R500_TEX_SEM_ACQUIRE | R500_TEX_IGNORE_UNCOVERED;
+
+ if (inst->TexSrcTarget == RC_TEXTURE_RECT)
+ code->inst[ip].inst1 |= R500_TEX_UNSCALED;
+
+ switch (inst->Opcode) {
+ case RC_OPCODE_KIL:
+ code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;
+ break;
+ case RC_OPCODE_TEX:
+ code->inst[ip].inst1 |= R500_TEX_INST_LD;
+ break;
+ case RC_OPCODE_TXB:
+ code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;
+ break;
+ case RC_OPCODE_TXP:
+ code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
+ break;
+ default:
+ error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name);
+ }
+
+ use_temporary(code, inst->SrcReg[0].Index);
+ if (inst->Opcode != RC_OPCODE_KIL)
+ use_temporary(code, inst->DstReg.Index);
+
+ code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
+ | (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8)
+ | R500_TEX_DST_ADDR(inst->DstReg.Index)
+ | R500_TEX_DST_R_SWIZ_R | R500_TEX_DST_G_SWIZ_G
+ | R500_TEX_DST_B_SWIZ_B | R500_TEX_DST_A_SWIZ_A;
+
+ return 1;
+}
+
+static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst)
+{
+ if (s->Code->inst_end >= 511) {
+ rc_error(s->C, "emit_tex: Too many instructions");
+ return;
+ }
+
+ unsigned int newip = ++s->Code->inst_end;
+
+ s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT;
+
+ if (inst->U.I.Opcode == RC_OPCODE_IF) {
+ if (s->CurrentBranchDepth >= 32) {
+ rc_error(s->C, "Branch depth exceeds hardware limit");
+ return;
+ }
+
+ memory_pool_array_reserve(&s->C->Pool, struct branch_info,
+ s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1);
+
+ struct branch_info * branch = &s->Branches[s->CurrentBranchDepth++];
+ branch->If = newip;
+ branch->Else = -1;
+ branch->Endif = -1;
+
+ if (s->CurrentBranchDepth > s->MaxBranchDepth)
+ s->MaxBranchDepth = s->CurrentBranchDepth;
+
+ /* actual instruction is filled in at ENDIF time */
+ } else if (inst->U.I.Opcode == RC_OPCODE_ELSE) {
+ if (!s->CurrentBranchDepth) {
+ rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
+ return;
+ }
+
+ struct branch_info * branch = &s->Branches[s->CurrentBranchDepth - 1];
+ branch->Else = newip;
+
+ /* actual instruction is filled in at ENDIF time */
+ } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) {
+ if (!s->CurrentBranchDepth) {
+ rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
+ return;
+ }
+
+ struct branch_info * branch = &s->Branches[s->CurrentBranchDepth - 1];
+ branch->Endif = newip;
+
+ s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP
+ | R500_FC_A_OP_NONE /* no address stack */
+ | R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
+ | R500_FC_B_OP0_INCR /* increment branch counter if stay */
+ ;
+
+ if (branch->Else >= 0) {
+ /* increment branch counter also if jump */
+ s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR;
+ s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1);
+
+ s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP
+ | R500_FC_A_OP_NONE /* no address stack */
+ | R500_FC_B_ELSE /* all active pixels want to jump */
+ | R500_FC_B_OP0_NONE /* no counter op if stay */
+ | R500_FC_B_OP1_DECR /* decrement branch counter if jump */
+ | R500_FC_B_POP_CNT(1)
+ ;
+ s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
+ } else {
+ /* don't touch branch counter on jump */
+ s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE;
+ s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
+ }
+
+ s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
+ | R500_FC_A_OP_NONE /* no address stack */
+ | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
+ | R500_FC_B_OP0_DECR /* decrement branch counter if stay */
+ | R500_FC_B_OP1_NONE /* no branch counter if stay */
+ | R500_FC_B_POP_CNT(1)
+ ;
+ s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
+
+ s->CurrentBranchDepth--;
+ } else {
+ rc_error(s->C, "%s: unknown opcode %s\n", __FUNCTION__, rc_get_opcode_info(inst->U.I.Opcode)->Name);
+ }
+}
+
+void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compiler)
+{
+ struct emit_state s;
+ struct r500_fragment_program_code *code = &compiler->code->code.r500;
+
+ memset(&s, 0, sizeof(s));
+ s.C = &compiler->Base;
+ s.Code = code;
+
+ memset(code, 0, sizeof(*code));
+ code->max_temp_idx = 1;
+ code->inst_end = -1;
+
+ for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
+ inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
+ inst = inst->Next) {
+ if (inst->Type == RC_INSTRUCTION_NORMAL) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ if (opcode->IsFlowControl) {
+ emit_flowcontrol(&s, inst);
+ } else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
+ continue;
+ } else {
+ emit_tex(compiler, &inst->U.I);
+ }
+ } else {
+ emit_paired(compiler, &inst->U.P);
+ }
+ }
+
+ if (code->max_temp_idx >= 128)
+ rc_error(&compiler->Base, "Too many hardware temporaries used");
+
+ if (compiler->Base.Error)
+ return;
+
+ if (code->inst_end == -1 ||
+ (code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
+ /* This may happen when dead-code elimination is disabled or
+ * when most of the fragment program logic is leading to a KIL */
+ if (code->inst_end >= 511) {
+ rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions");
+ return;
+ }
+
+ int ip = ++code->inst_end;
+ code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
+ }
+
+ if (s.MaxBranchDepth >= 4) {
+ if (code->max_temp_idx < 1)
+ code->max_temp_idx = 1;
+
+ code->us_fc_ctrl |= R500_FC_FULL_FC_EN;
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.c b/src/mesa/drivers/dri/r300/compiler/radeon_code.c
new file mode 100644
index 0000000000..0eab18c344
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.c
@@ -0,0 +1,173 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_code.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "radeon_program.h"
+
+void rc_constants_init(struct rc_constant_list * c)
+{
+ memset(c, 0, sizeof(*c));
+}
+
+/**
+ * Copy a constants structure, assuming that the destination structure
+ * is not initialized.
+ */
+void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src)
+{
+ dst->Constants = malloc(sizeof(struct rc_constant) * src->Count);
+ memcpy(dst->Constants, src->Constants, sizeof(struct rc_constant) * src->Count);
+ dst->Count = src->Count;
+ dst->_Reserved = src->Count;
+}
+
+void rc_constants_destroy(struct rc_constant_list * c)
+{
+ free(c->Constants);
+ memset(c, 0, sizeof(*c));
+}
+
+unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant)
+{
+ unsigned index = c->Count;
+
+ if (c->Count >= c->_Reserved) {
+ struct rc_constant * newlist;
+
+ c->_Reserved = c->_Reserved * 2;
+ if (!c->_Reserved)
+ c->_Reserved = 16;
+
+ newlist = malloc(sizeof(struct rc_constant) * c->_Reserved);
+ memcpy(newlist, c->Constants, sizeof(struct rc_constant) * c->Count);
+
+ free(c->Constants);
+ c->Constants = newlist;
+ }
+
+ c->Constants[index] = *constant;
+ c->Count++;
+
+ return index;
+}
+
+
+/**
+ * Add a state vector to the constant list, while trying to avoid duplicates.
+ */
+unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state0, unsigned state1)
+{
+ unsigned index;
+ struct rc_constant constant;
+
+ for(index = 0; index < c->Count; ++index) {
+ if (c->Constants[index].Type == RC_CONSTANT_STATE) {
+ if (c->Constants[index].u.State[0] == state0 &&
+ c->Constants[index].u.State[1] == state1)
+ return index;
+ }
+ }
+
+ memset(&constant, 0, sizeof(constant));
+ constant.Type = RC_CONSTANT_STATE;
+ constant.Size = 4;
+ constant.u.State[0] = state0;
+ constant.u.State[1] = state1;
+
+ return rc_constants_add(c, &constant);
+}
+
+
+/**
+ * Add an immediate vector to the constant list, while trying to avoid
+ * duplicates.
+ */
+unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data)
+{
+ unsigned index;
+ struct rc_constant constant;
+
+ for(index = 0; index < c->Count; ++index) {
+ if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) {
+ if (!memcmp(c->Constants[index].u.Immediate, data, sizeof(float)*4))
+ return index;
+ }
+ }
+
+ memset(&constant, 0, sizeof(constant));
+ constant.Type = RC_CONSTANT_IMMEDIATE;
+ constant.Size = 4;
+ memcpy(constant.u.Immediate, data, sizeof(float) * 4);
+
+ return rc_constants_add(c, &constant);
+}
+
+
+/**
+ * Add an immediate scalar to the constant list, while trying to avoid
+ * duplicates.
+ */
+unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle)
+{
+ unsigned index;
+ int free_index = -1;
+ struct rc_constant constant;
+
+ for(index = 0; index < c->Count; ++index) {
+ if (c->Constants[index].Type == RC_CONSTANT_IMMEDIATE) {
+ unsigned comp;
+ for(comp = 0; comp < c->Constants[index].Size; ++comp) {
+ if (c->Constants[index].u.Immediate[comp] == data) {
+ *swizzle = RC_MAKE_SWIZZLE_SMEAR(comp);
+ return index;
+ }
+ }
+
+ if (c->Constants[index].Size < 4)
+ free_index = index;
+ }
+ }
+
+ if (free_index >= 0) {
+ unsigned comp = c->Constants[free_index].Size++;
+ c->Constants[free_index].u.Immediate[comp] = data;
+ *swizzle = RC_MAKE_SWIZZLE_SMEAR(comp);
+ return free_index;
+ }
+
+ memset(&constant, 0, sizeof(constant));
+ constant.Type = RC_CONSTANT_IMMEDIATE;
+ constant.Size = 1;
+ constant.u.Immediate[0] = data;
+ *swizzle = RC_SWIZZLE_XXXX;
+
+ return rc_constants_add(c, &constant);
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
new file mode 100644
index 0000000000..1979e7e4e4
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
@@ -0,0 +1,265 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef RADEON_CODE_H
+#define RADEON_CODE_H
+
+#include <stdint.h>
+
+#define R300_PFS_MAX_ALU_INST 64
+#define R300_PFS_MAX_TEX_INST 32
+#define R300_PFS_MAX_TEX_INDIRECT 4
+#define R300_PFS_NUM_TEMP_REGS 32
+#define R300_PFS_NUM_CONST_REGS 32
+
+#define R500_PFS_MAX_INST 512
+#define R500_PFS_NUM_TEMP_REGS 128
+#define R500_PFS_NUM_CONST_REGS 256
+
+
+#define STATE_R300_WINDOW_DIMENSION (STATE_INTERNAL_DRIVER+0)
+
+enum {
+ /**
+ * External constants are constants whose meaning is unknown to this
+ * compiler. For example, a Mesa gl_program's constants are turned
+ * into external constants.
+ */
+ RC_CONSTANT_EXTERNAL = 0,
+
+ RC_CONSTANT_IMMEDIATE,
+
+ /**
+ * Constant referring to state that is known by this compiler,
+ * see RC_STATE_xxx, i.e. *not* arbitrary Mesa (or other) state.
+ */
+ RC_CONSTANT_STATE
+};
+
+enum {
+ RC_STATE_SHADOW_AMBIENT = 0,
+
+ RC_STATE_R300_WINDOW_DIMENSION,
+ RC_STATE_R300_TEXRECT_FACTOR,
+ RC_STATE_R300_VIEWPORT_SCALE,
+ RC_STATE_R300_VIEWPORT_OFFSET
+};
+
+struct rc_constant {
+ unsigned Type:2; /**< RC_CONSTANT_xxx */
+ unsigned Size:3;
+
+ union {
+ unsigned External;
+ float Immediate[4];
+ unsigned State[2];
+ } u;
+};
+
+struct rc_constant_list {
+ struct rc_constant * Constants;
+ unsigned Count;
+
+ unsigned _Reserved;
+};
+
+void rc_constants_init(struct rc_constant_list * c);
+void rc_constants_copy(struct rc_constant_list * dst, struct rc_constant_list * src);
+void rc_constants_destroy(struct rc_constant_list * c);
+unsigned rc_constants_add(struct rc_constant_list * c, struct rc_constant * constant);
+unsigned rc_constants_add_state(struct rc_constant_list * c, unsigned state1, unsigned state2);
+unsigned rc_constants_add_immediate_vec4(struct rc_constant_list * c, const float * data);
+unsigned rc_constants_add_immediate_scalar(struct rc_constant_list * c, float data, unsigned * swizzle);
+
+/**
+ * Compare functions.
+ *
+ * \note By design, RC_COMPARE_FUNC_xxx + GL_NEVER gives you
+ * the correct GL compare function.
+ */
+typedef enum {
+ RC_COMPARE_FUNC_NEVER = 0,
+ RC_COMPARE_FUNC_LESS,
+ RC_COMPARE_FUNC_EQUAL,
+ RC_COMPARE_FUNC_LEQUAL,
+ RC_COMPARE_FUNC_GREATER,
+ RC_COMPARE_FUNC_NOTEQUAL,
+ RC_COMPARE_FUNC_GEQUAL,
+ RC_COMPARE_FUNC_ALWAYS
+} rc_compare_func;
+
+/**
+ * Coordinate wrapping modes.
+ *
+ * These are not quite the same as their GL counterparts yet.
+ */
+typedef enum {
+ RC_WRAP_NONE = 0,
+ RC_WRAP_REPEAT,
+ RC_WRAP_MIRRORED_REPEAT,
+ RC_WRAP_MIRRORED_CLAMP
+} rc_wrap_mode;
+
+/**
+ * Stores state that influences the compilation of a fragment program.
+ */
+struct r300_fragment_program_external_state {
+ struct {
+ /**
+ * If the sampler is used as a shadow sampler,
+ * this field is:
+ * 0 - GL_LUMINANCE
+ * 1 - GL_INTENSITY
+ * 2 - GL_ALPHA
+ * depending on the depth texture mode.
+ */
+ unsigned depth_texture_swizzle:12;
+
+ /**
+ * If the sampler is used as a shadow sampler,
+ * this field specifies the compare function.
+ *
+ * Otherwise, this field is \ref RC_COMPARE_FUNC_NEVER (aka 0).
+ * \sa rc_compare_func
+ */
+ unsigned texture_compare_func : 3;
+
+ /**
+ * No matter what the sampler type is,
+ * this field turns it into a shadow sampler.
+ */
+ unsigned compare_mode_enabled : 1;
+
+ /**
+ * If the sampler needs to fake NPOT, this field is set.
+ */
+ unsigned fake_npot : 1;
+
+ /**
+ * If the sampler will recieve non-normalized coords,
+ * this field is set.
+ */
+ unsigned non_normalized_coords : 1;
+
+ /**
+ * This field specifies wrapping modes for the sampler.
+ *
+ * If this field is \ref RC_WRAP_NONE (aka 0), no wrapping maths
+ * will be performed on the coordinates.
+ */
+ unsigned wrap_mode : 2;
+ } unit[16];
+};
+
+
+
+struct r300_fragment_program_node {
+ int tex_offset; /**< first tex instruction */
+ int tex_end; /**< last tex instruction, relative to tex_offset */
+ int alu_offset; /**< first ALU instruction */
+ int alu_end; /**< last ALU instruction, relative to alu_offset */
+ int flags;
+};
+
+/**
+ * Stores an R300 fragment program in its compiled-to-hardware form.
+ */
+struct r300_fragment_program_code {
+ struct {
+ int length; /**< total # of texture instructions used */
+ uint32_t inst[R300_PFS_MAX_TEX_INST];
+ } tex;
+
+ struct {
+ int length; /**< total # of ALU instructions used */
+ struct {
+ uint32_t rgb_inst;
+ uint32_t rgb_addr;
+ uint32_t alpha_inst;
+ uint32_t alpha_addr;
+ } inst[R300_PFS_MAX_ALU_INST];
+ } alu;
+
+ uint32_t config; /* US_CONFIG */
+ uint32_t pixsize; /* US_PIXSIZE */
+ uint32_t code_offset; /* US_CODE_OFFSET */
+ uint32_t code_addr[4]; /* US_CODE_ADDR */
+};
+
+
+struct r500_fragment_program_code {
+ struct {
+ uint32_t inst0;
+ uint32_t inst1;
+ uint32_t inst2;
+ uint32_t inst3;
+ uint32_t inst4;
+ uint32_t inst5;
+ } inst[R500_PFS_MAX_INST];
+
+ int inst_end; /* Number of instructions - 1; also, last instruction to be executed */
+
+ int max_temp_idx;
+
+ uint32_t us_fc_ctrl;
+};
+
+struct rX00_fragment_program_code {
+ union {
+ struct r300_fragment_program_code r300;
+ struct r500_fragment_program_code r500;
+ } code;
+
+ unsigned writes_depth:1;
+
+ struct rc_constant_list constants;
+};
+
+
+#define VSF_MAX_FRAGMENT_LENGTH (255*4)
+#define VSF_MAX_FRAGMENT_TEMPS (14)
+
+#define VSF_MAX_INPUTS 32
+#define VSF_MAX_OUTPUTS 32
+
+struct r300_vertex_program_code {
+ int length;
+ union {
+ uint32_t d[VSF_MAX_FRAGMENT_LENGTH];
+ float f[VSF_MAX_FRAGMENT_LENGTH];
+ } body;
+
+ int pos_end;
+ int num_temporaries; /* Number of temp vars used by program */
+ int inputs[VSF_MAX_INPUTS];
+ int outputs[VSF_MAX_OUTPUTS];
+
+ struct rc_constant_list constants;
+
+ uint32_t InputsRead;
+ uint32_t OutputsWritten;
+};
+
+void r300_vertex_program_dump(struct r300_vertex_program_code * vs);
+
+#endif /* RADEON_CODE_H */
+
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
new file mode 100644
index 0000000000..1c8ba864a4
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.c
@@ -0,0 +1,309 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_compiler.h"
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "radeon_program.h"
+
+
+void rc_init(struct radeon_compiler * c)
+{
+ memset(c, 0, sizeof(*c));
+
+ memory_pool_init(&c->Pool);
+ c->Program.Instructions.Prev = &c->Program.Instructions;
+ c->Program.Instructions.Next = &c->Program.Instructions;
+ c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE;
+}
+
+void rc_destroy(struct radeon_compiler * c)
+{
+ rc_constants_destroy(&c->Program.Constants);
+ memory_pool_destroy(&c->Pool);
+ free(c->ErrorMsg);
+}
+
+void rc_debug(struct radeon_compiler * c, const char * fmt, ...)
+{
+ va_list ap;
+
+ if (!c->Debug)
+ return;
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+}
+
+void rc_error(struct radeon_compiler * c, const char * fmt, ...)
+{
+ va_list ap;
+
+ c->Error = 1;
+
+ if (!c->ErrorMsg) {
+ /* Only remember the first error */
+ char buf[1024];
+ int written;
+
+ va_start(ap, fmt);
+ written = vsnprintf(buf, sizeof(buf), fmt, ap);
+ va_end(ap);
+
+ if (written < sizeof(buf)) {
+ c->ErrorMsg = strdup(buf);
+ } else {
+ c->ErrorMsg = malloc(written + 1);
+
+ va_start(ap, fmt);
+ vsnprintf(c->ErrorMsg, written + 1, fmt, ap);
+ va_end(ap);
+ }
+ }
+
+ if (c->Debug) {
+ fprintf(stderr, "r300compiler error: ");
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ }
+}
+
+int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion)
+{
+ rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion);
+ return 1;
+}
+
+/**
+ * Recompute c->Program.InputsRead and c->Program.OutputsWritten
+ * based on which inputs and outputs are actually referenced
+ * in program instructions.
+ */
+void rc_calculate_inputs_outputs(struct radeon_compiler * c)
+{
+ struct rc_instruction *inst;
+
+ c->Program.InputsRead = 0;
+ c->Program.OutputsWritten = 0;
+
+ for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next)
+ {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ int i;
+
+ for (i = 0; i < opcode->NumSrcRegs; ++i) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT)
+ c->Program.InputsRead |= 1 << inst->U.I.SrcReg[i].Index;
+ }
+
+ if (opcode->HasDstReg) {
+ if (inst->U.I.DstReg.File == RC_FILE_OUTPUT)
+ c->Program.OutputsWritten |= 1 << inst->U.I.DstReg.Index;
+ }
+ }
+}
+
+/**
+ * Rewrite the program such that everything that source the given input
+ * register will source new_input instead.
+ */
+void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input)
+{
+ struct rc_instruction * inst;
+
+ c->Program.InputsRead &= ~(1 << input);
+
+ for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ unsigned i;
+
+ for(i = 0; i < opcode->NumSrcRegs; ++i) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == input) {
+ inst->U.I.SrcReg[i].File = new_input.File;
+ inst->U.I.SrcReg[i].Index = new_input.Index;
+ inst->U.I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->U.I.SrcReg[i].Swizzle);
+ if (!inst->U.I.SrcReg[i].Abs) {
+ inst->U.I.SrcReg[i].Negate ^= new_input.Negate;
+ inst->U.I.SrcReg[i].Abs = new_input.Abs;
+ }
+
+ c->Program.InputsRead |= 1 << new_input.Index;
+ }
+ }
+ }
+}
+
+
+/**
+ * Rewrite the program such that everything that writes into the given
+ * output register will instead write to new_output. The new_output
+ * writemask is honoured.
+ */
+void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask)
+{
+ struct rc_instruction * inst;
+
+ c->Program.OutputsWritten &= ~(1 << output);
+
+ for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ if (opcode->HasDstReg) {
+ if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
+ inst->U.I.DstReg.Index = new_output;
+ inst->U.I.DstReg.WriteMask &= writemask;
+
+ c->Program.OutputsWritten |= 1 << new_output;
+ }
+ }
+ }
+}
+
+
+/**
+ * Rewrite the program such that a given output is duplicated.
+ */
+void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output)
+{
+ unsigned tempreg = rc_find_free_temporary(c);
+ struct rc_instruction * inst;
+
+ for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ if (opcode->HasDstReg) {
+ if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) {
+ inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst->U.I.DstReg.Index = tempreg;
+ }
+ }
+ }
+
+ inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.DstReg.File = RC_FILE_OUTPUT;
+ inst->U.I.DstReg.Index = output;
+
+ inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[0].Index = tempreg;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
+
+ inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.DstReg.File = RC_FILE_OUTPUT;
+ inst->U.I.DstReg.Index = dup_output;
+
+ inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[0].Index = tempreg;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
+
+ c->Program.OutputsWritten |= 1 << dup_output;
+}
+
+
+/**
+ * Introduce standard code fragment to deal with fragment.position.
+ */
+void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input,
+ int full_vtransform)
+{
+ unsigned tempregi = rc_find_free_temporary(c);
+ struct rc_instruction * inst_rcp;
+ struct rc_instruction * inst_mul;
+ struct rc_instruction * inst_mad;
+ struct rc_instruction * inst;
+
+ c->Program.InputsRead &= ~(1 << wpos);
+ c->Program.InputsRead |= 1 << new_input;
+
+ /* perspective divide */
+ inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions);
+ inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
+
+ inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_rcp->U.I.DstReg.Index = tempregi;
+ inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
+
+ inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT;
+ inst_rcp->U.I.SrcReg[0].Index = new_input;
+ inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
+
+ inst_mul = rc_insert_new_instruction(c, inst_rcp);
+ inst_mul->U.I.Opcode = RC_OPCODE_MUL;
+
+ inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mul->U.I.DstReg.Index = tempregi;
+ inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+
+ inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT;
+ inst_mul->U.I.SrcReg[0].Index = new_input;
+
+ inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+ inst_mul->U.I.SrcReg[1].Index = tempregi;
+ inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
+
+ /* viewport transformation */
+ inst_mad = rc_insert_new_instruction(c, inst_mul);
+ inst_mad->U.I.Opcode = RC_OPCODE_MAD;
+
+ inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mad->U.I.DstReg.Index = tempregi;
+ inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+
+ inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst_mad->U.I.SrcReg[0].Index = tempregi;
+ inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
+
+ inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
+ inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
+
+ inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT;
+ inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0;
+
+ if (full_vtransform) {
+ inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0);
+ inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_OFFSET, 0);
+ } else {
+ inst_mad->U.I.SrcReg[1].Index =
+ inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0);
+ }
+
+ for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ unsigned i;
+
+ for(i = 0; i < opcode->NumSrcRegs; i++) {
+ if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT &&
+ inst->U.I.SrcReg[i].Index == wpos) {
+ inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[i].Index = tempregi;
+ }
+ }
+ }
+}
+
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
new file mode 100644
index 0000000000..f15905d79d
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_compiler.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef RADEON_COMPILER_H
+#define RADEON_COMPILER_H
+
+#include "../../../../main/compiler.h"
+
+#include "memory_pool.h"
+#include "radeon_code.h"
+#include "radeon_program.h"
+
+struct rc_swizzle_caps;
+
+struct radeon_compiler {
+ struct memory_pool Pool;
+ struct rc_program Program;
+ unsigned Debug:1;
+ unsigned Error:1;
+ char * ErrorMsg;
+
+ /* Hardware specification. */
+ unsigned is_r500;
+ unsigned max_temp_regs;
+
+ /**
+ * Variables used internally, not be touched by callers
+ * of the compiler
+ */
+ /*@{*/
+ struct rc_swizzle_caps * SwizzleCaps;
+ /*@}*/
+};
+
+void rc_init(struct radeon_compiler * c);
+void rc_destroy(struct radeon_compiler * c);
+
+void rc_debug(struct radeon_compiler * c, const char * fmt, ...);
+void rc_error(struct radeon_compiler * c, const char * fmt, ...);
+
+int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion);
+
+/**
+ * This macro acts like an if-statement that can be used to implement
+ * non-aborting assertions in the compiler.
+ *
+ * It checks whether \p cond is true. If not, an internal compiler error is
+ * flagged and the if-clause is run.
+ *
+ * A typical use-case would be:
+ *
+ * if (rc_assert(c, condition-that-must-be-true))
+ * return;
+ */
+#define rc_assert(c, cond) \
+ (!(cond) && rc_if_fail_helper(c, __FILE__, __LINE__, #cond))
+
+void rc_calculate_inputs_outputs(struct radeon_compiler * c);
+
+void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input);
+void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask);
+void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output);
+void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input,
+ int full_vtransform);
+
+struct r300_fragment_program_compiler {
+ struct radeon_compiler Base;
+ struct rX00_fragment_program_code *code;
+ /* Optional transformations and features. */
+ struct r300_fragment_program_external_state state;
+ unsigned enable_shadow_ambient;
+ /* Register corresponding to the depthbuffer. */
+ unsigned OutputDepth;
+ /* Registers corresponding to the four colorbuffers. */
+ unsigned OutputColor[4];
+
+ void * UserData;
+ void (*AllocateHwInputs)(
+ struct r300_fragment_program_compiler * c,
+ void (*allocate)(void * data, unsigned input, unsigned hwreg),
+ void * mydata);
+};
+
+void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c);
+
+
+struct r300_vertex_program_compiler {
+ struct radeon_compiler Base;
+ struct r300_vertex_program_code *code;
+ uint32_t RequiredOutputs;
+
+ void * UserData;
+ void (*SetHwInputOutput)(struct r300_vertex_program_compiler * c);
+};
+
+void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* c);
+
+#endif /* RADEON_COMPILER_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
new file mode 100644
index 0000000000..0e6c62541f
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.c
@@ -0,0 +1,281 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_program.h"
+
+
+static void reads_normal(struct rc_instruction * fullinst, rc_read_write_chan_fn cb, void * userdata)
+{
+ struct rc_sub_instruction * inst = &fullinst->U.I;
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
+
+ for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+ unsigned int refmask = 0;
+
+ if (inst->SrcReg[src].File == RC_FILE_NONE)
+ return;
+
+ for(unsigned int chan = 0; chan < 4; ++chan)
+ refmask |= 1 << GET_SWZ(inst->SrcReg[src].Swizzle, chan);
+
+ refmask &= RC_MASK_XYZW;
+
+ if (refmask)
+ cb(userdata, fullinst, inst->SrcReg[src].File, inst->SrcReg[src].Index, refmask);
+
+ if (refmask && inst->SrcReg[src].RelAddr)
+ cb(userdata, fullinst, RC_FILE_ADDRESS, 0, RC_MASK_X);
+ }
+}
+
+static void reads_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)
+{
+ struct rc_pair_instruction * inst = &fullinst->U.P;
+ unsigned int refmasks[3] = { 0, 0, 0 };
+
+ if (inst->RGB.Opcode != RC_OPCODE_NOP) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->RGB.Opcode);
+
+ for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
+ for(unsigned int chan = 0; chan < 3; ++chan) {
+ unsigned int swz = GET_SWZ(inst->RGB.Arg[arg].Swizzle, chan);
+ if (swz < 4)
+ refmasks[inst->RGB.Arg[arg].Source] |= 1 << swz;
+ }
+ }
+ }
+
+ if (inst->Alpha.Opcode != RC_OPCODE_NOP) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Alpha.Opcode);
+
+ for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
+ if (inst->Alpha.Arg[arg].Swizzle < 4)
+ refmasks[inst->Alpha.Arg[arg].Source] |= 1 << inst->Alpha.Arg[arg].Swizzle;
+ }
+ }
+
+ for(unsigned int src = 0; src < 3; ++src) {
+ if (inst->RGB.Src[src].Used && (refmasks[src] & RC_MASK_XYZ))
+ cb(userdata, fullinst, inst->RGB.Src[src].File, inst->RGB.Src[src].Index,
+ refmasks[src] & RC_MASK_XYZ);
+
+ if (inst->Alpha.Src[src].Used && (refmasks[src] & RC_MASK_W))
+ cb(userdata, fullinst, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, RC_MASK_W);
+ }
+}
+
+/**
+ * Calls a callback function for all register reads.
+ *
+ * This is conservative, i.e. if the same register is referenced multiple times,
+ * the callback may also be called multiple times.
+ * Also, the writemask of the instruction is not taken into account.
+ */
+void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata)
+{
+ if (inst->Type == RC_INSTRUCTION_NORMAL) {
+ reads_normal(inst, cb, userdata);
+ } else {
+ reads_pair(inst, cb, userdata);
+ }
+}
+
+
+
+static void writes_normal(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)
+{
+ struct rc_sub_instruction * inst = &fullinst->U.I;
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
+
+ if (opcode->HasDstReg && inst->DstReg.WriteMask)
+ cb(userdata, fullinst, inst->DstReg.File, inst->DstReg.Index, inst->DstReg.WriteMask);
+
+ if (inst->WriteALUResult)
+ cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X);
+}
+
+static void writes_pair(struct rc_instruction * fullinst, rc_read_write_mask_fn cb, void * userdata)
+{
+ struct rc_pair_instruction * inst = &fullinst->U.P;
+
+ if (inst->RGB.WriteMask)
+ cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->RGB.DestIndex, inst->RGB.WriteMask);
+
+ if (inst->Alpha.WriteMask)
+ cb(userdata, fullinst, RC_FILE_TEMPORARY, inst->Alpha.DestIndex, RC_MASK_W);
+
+ if (inst->WriteALUResult)
+ cb(userdata, fullinst, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT, RC_MASK_X);
+}
+
+/**
+ * Calls a callback function for all register writes in the instruction,
+ * reporting writemasks to the callback function.
+ *
+ * \warning Does not report output registers for paired instructions!
+ */
+void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata)
+{
+ if (inst->Type == RC_INSTRUCTION_NORMAL) {
+ writes_normal(inst, cb, userdata);
+ } else {
+ writes_pair(inst, cb, userdata);
+ }
+}
+
+
+struct mask_to_chan_data {
+ void * UserData;
+ rc_read_write_chan_fn Fn;
+};
+
+static void mask_to_chan_cb(void * data, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int mask)
+{
+ struct mask_to_chan_data * d = data;
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ if (GET_BIT(mask, chan))
+ d->Fn(d->UserData, inst, file, index, chan);
+ }
+}
+
+/**
+ * Calls a callback function for all sourced register channels.
+ *
+ * This is conservative, i.e. channels may be called multiple times,
+ * and the writemask of the instruction is not taken into account.
+ */
+void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata)
+{
+ struct mask_to_chan_data d;
+ d.UserData = userdata;
+ d.Fn = cb;
+ rc_for_all_reads_mask(inst, &mask_to_chan_cb, &d);
+}
+
+/**
+ * Calls a callback function for all written register channels.
+ *
+ * \warning Does not report output registers for paired instructions!
+ */
+void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata)
+{
+ struct mask_to_chan_data d;
+ d.UserData = userdata;
+ d.Fn = cb;
+ rc_for_all_writes_mask(inst, &mask_to_chan_cb, &d);
+}
+
+static void remap_normal_instruction(struct rc_instruction * fullinst,
+ rc_remap_register_fn cb, void * userdata)
+{
+ struct rc_sub_instruction * inst = &fullinst->U.I;
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
+
+ if (opcode->HasDstReg) {
+ rc_register_file file = inst->DstReg.File;
+ unsigned int index = inst->DstReg.Index;
+
+ cb(userdata, fullinst, &file, &index);
+
+ inst->DstReg.File = file;
+ inst->DstReg.Index = index;
+ }
+
+ for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+ rc_register_file file = inst->SrcReg[src].File;
+ unsigned int index = inst->SrcReg[src].Index;
+
+ cb(userdata, fullinst, &file, &index);
+
+ inst->SrcReg[src].File = file;
+ inst->SrcReg[src].Index = index;
+ }
+}
+
+static void remap_pair_instruction(struct rc_instruction * fullinst,
+ rc_remap_register_fn cb, void * userdata)
+{
+ struct rc_pair_instruction * inst = &fullinst->U.P;
+
+ if (inst->RGB.WriteMask) {
+ rc_register_file file = RC_FILE_TEMPORARY;
+ unsigned int index = inst->RGB.DestIndex;
+
+ cb(userdata, fullinst, &file, &index);
+
+ inst->RGB.DestIndex = index;
+ }
+
+ if (inst->Alpha.WriteMask) {
+ rc_register_file file = RC_FILE_TEMPORARY;
+ unsigned int index = inst->Alpha.DestIndex;
+
+ cb(userdata, fullinst, &file, &index);
+
+ inst->Alpha.DestIndex = index;
+ }
+
+ for(unsigned int src = 0; src < 3; ++src) {
+ if (inst->RGB.Src[src].Used) {
+ rc_register_file file = inst->RGB.Src[src].File;
+ unsigned int index = inst->RGB.Src[src].Index;
+
+ cb(userdata, fullinst, &file, &index);
+
+ inst->RGB.Src[src].File = file;
+ inst->RGB.Src[src].Index = index;
+ }
+
+ if (inst->Alpha.Src[src].Used) {
+ rc_register_file file = inst->Alpha.Src[src].File;
+ unsigned int index = inst->Alpha.Src[src].Index;
+
+ cb(userdata, fullinst, &file, &index);
+
+ inst->Alpha.Src[src].File = file;
+ inst->Alpha.Src[src].Index = index;
+ }
+ }
+}
+
+
+/**
+ * Remap all register accesses according to the given function.
+ * That is, call the function \p cb for each referenced register (both read and written)
+ * and update the given instruction \p inst accordingly
+ * if it modifies its \ref pfile and \ref pindex contents.
+ */
+void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata)
+{
+ if (inst->Type == RC_INSTRUCTION_NORMAL)
+ remap_normal_instruction(inst, cb, userdata);
+ else
+ remap_pair_instruction(inst, cb, userdata);
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
new file mode 100644
index 0000000000..60a6e192a9
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_DATAFLOW_H
+#define RADEON_DATAFLOW_H
+
+#include "radeon_program_constants.h"
+
+struct radeon_compiler;
+struct rc_instruction;
+struct rc_swizzle_caps;
+
+
+/**
+ * Help analyze and modify the register accesses of instructions.
+ */
+/*@{*/
+typedef void (*rc_read_write_chan_fn)(void * userdata, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int chan);
+void rc_for_all_reads_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata);
+void rc_for_all_writes_chan(struct rc_instruction * inst, rc_read_write_chan_fn cb, void * userdata);
+
+typedef void (*rc_read_write_mask_fn)(void * userdata, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int mask);
+void rc_for_all_reads_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata);
+void rc_for_all_writes_mask(struct rc_instruction * inst, rc_read_write_mask_fn cb, void * userdata);
+
+typedef void (*rc_remap_register_fn)(void * userdata, struct rc_instruction * inst,
+ rc_register_file * pfile, unsigned int * pindex);
+void rc_remap_registers(struct rc_instruction * inst, rc_remap_register_fn cb, void * userdata);
+/*@}*/
+
+
+/**
+ * Compiler passes based on dataflow analysis.
+ */
+/*@{*/
+typedef void (*rc_dataflow_mark_outputs_fn)(void * userdata, void * data,
+ void (*mark_fn)(void * data, unsigned int index, unsigned int mask));
+void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_fn dce, void * userdata);
+void rc_dataflow_swizzles(struct radeon_compiler * c);
+/*@}*/
+
+void rc_optimize(struct radeon_compiler * c);
+
+#endif /* RADEON_DATAFLOW_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
new file mode 100644
index 0000000000..e3c2c83c0c
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
@@ -0,0 +1,284 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_compiler.h"
+
+
+struct updatemask_state {
+ unsigned char Output[RC_REGISTER_MAX_INDEX];
+ unsigned char Temporary[RC_REGISTER_MAX_INDEX];
+ unsigned char Address;
+ unsigned char Special[RC_NUM_SPECIAL_REGISTERS];
+};
+
+struct instruction_state {
+ unsigned char WriteMask:4;
+ unsigned char WriteALUResult:1;
+ unsigned char SrcReg[3];
+};
+
+struct branchinfo {
+ unsigned int HaveElse:1;
+
+ struct updatemask_state StoreEndif;
+ struct updatemask_state StoreElse;
+};
+
+struct deadcode_state {
+ struct radeon_compiler * C;
+ struct instruction_state * Instructions;
+
+ struct updatemask_state R;
+
+ struct branchinfo * BranchStack;
+ unsigned int BranchStackSize;
+ unsigned int BranchStackReserved;
+};
+
+
+static void or_updatemasks(
+ struct updatemask_state * dst,
+ struct updatemask_state * a,
+ struct updatemask_state * b)
+{
+ for(unsigned int i = 0; i < RC_REGISTER_MAX_INDEX; ++i) {
+ dst->Output[i] = a->Output[i] | b->Output[i];
+ dst->Temporary[i] = a->Temporary[i] | b->Temporary[i];
+ }
+
+ for(unsigned int i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i)
+ dst->Special[i] = a->Special[i] | b->Special[i];
+
+ dst->Address = a->Address | b->Address;
+}
+
+static void push_branch(struct deadcode_state * s)
+{
+ memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack,
+ s->BranchStackSize, s->BranchStackReserved, 1);
+
+ struct branchinfo * branch = &s->BranchStack[s->BranchStackSize++];
+ branch->HaveElse = 0;
+ memcpy(&branch->StoreEndif, &s->R, sizeof(s->R));
+}
+
+static unsigned char * get_used_ptr(struct deadcode_state *s, rc_register_file file, unsigned int index)
+{
+ if (file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) {
+ if (index >= RC_REGISTER_MAX_INDEX) {
+ rc_error(s->C, "%s: index %i is out of bounds for file %i\n", __FUNCTION__, index, file);
+ return 0;
+ }
+
+ if (file == RC_FILE_OUTPUT)
+ return &s->R.Output[index];
+ else
+ return &s->R.Temporary[index];
+ } else if (file == RC_FILE_ADDRESS) {
+ return &s->R.Address;
+ } else if (file == RC_FILE_SPECIAL) {
+ if (index >= RC_NUM_SPECIAL_REGISTERS) {
+ rc_error(s->C, "%s: special file index %i out of bounds\n", __FUNCTION__, index);
+ return 0;
+ }
+
+ return &s->R.Special[index];
+ }
+
+ return 0;
+}
+
+static void mark_used(struct deadcode_state * s, rc_register_file file, unsigned int index, unsigned int mask)
+{
+ unsigned char * pused = get_used_ptr(s, file, index);
+ if (pused)
+ *pused |= mask;
+}
+
+static void update_instruction(struct deadcode_state * s, struct rc_instruction * inst)
+{
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ struct instruction_state * insts = &s->Instructions[inst->IP];
+ unsigned int usedmask = 0;
+
+ if (opcode->HasDstReg) {
+ unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index);
+ if (pused) {
+ usedmask = *pused & inst->U.I.DstReg.WriteMask;
+ *pused &= ~usedmask;
+ }
+ }
+
+ insts->WriteMask |= usedmask;
+
+ if (inst->U.I.WriteALUResult) {
+ unsigned char * pused = get_used_ptr(s, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT);
+ if (pused && *pused) {
+ if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
+ usedmask |= RC_MASK_X;
+ else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
+ usedmask |= RC_MASK_W;
+
+ *pused = 0;
+ insts->WriteALUResult = 1;
+ }
+ }
+
+ unsigned int srcmasks[3];
+ rc_compute_sources_for_writemask(inst, usedmask, srcmasks);
+
+ for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+ unsigned int refmask = 0;
+ unsigned int newsrcmask = srcmasks[src] & ~insts->SrcReg[src];
+ insts->SrcReg[src] |= newsrcmask;
+
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ if (GET_BIT(newsrcmask, chan))
+ refmask |= 1 << GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
+ }
+
+ /* get rid of spurious bits from ZERO, ONE, etc. swizzles */
+ refmask &= RC_MASK_XYZW;
+
+ if (!refmask)
+ continue;
+
+ mark_used(s, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index, refmask);
+
+ if (inst->U.I.SrcReg[src].RelAddr)
+ mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X);
+ }
+}
+
+static void mark_output_use(void * data, unsigned int index, unsigned int mask)
+{
+ struct deadcode_state * s = data;
+
+ mark_used(s, RC_FILE_OUTPUT, index, mask);
+}
+
+void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_fn dce, void * userdata)
+{
+ struct deadcode_state s;
+ unsigned int nr_instructions;
+
+ memset(&s, 0, sizeof(s));
+ s.C = c;
+
+ nr_instructions = rc_recompute_ips(c);
+ s.Instructions = memory_pool_malloc(&c->Pool, sizeof(struct instruction_state)*nr_instructions);
+ memset(s.Instructions, 0, sizeof(struct instruction_state)*nr_instructions);
+
+ dce(userdata, &s, &mark_output_use);
+
+ for(struct rc_instruction * inst = c->Program.Instructions.Prev;
+ inst != &c->Program.Instructions;
+ inst = inst->Prev) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ if (opcode->IsFlowControl) {
+ if (opcode->Opcode == RC_OPCODE_ENDIF) {
+ push_branch(&s);
+ } else {
+ if (s.BranchStackSize) {
+ struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1];
+
+ if (opcode->Opcode == RC_OPCODE_IF) {
+ or_updatemasks(&s.R,
+ &s.R,
+ branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif);
+
+ s.BranchStackSize--;
+ } else if (opcode->Opcode == RC_OPCODE_ELSE) {
+ if (branch->HaveElse) {
+ rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __FUNCTION__);
+ } else {
+ memcpy(&branch->StoreElse, &s.R, sizeof(s.R));
+ memcpy(&s.R, &branch->StoreEndif, sizeof(s.R));
+ branch->HaveElse = 1;
+ }
+ } else {
+ rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name);
+ }
+ } else {
+ rc_error(c, "%s: Unexpected control flow instruction\n", __FUNCTION__);
+ }
+ }
+ }
+
+ update_instruction(&s, inst);
+ }
+
+ unsigned int ip = 0;
+ for(struct rc_instruction * inst = c->Program.Instructions.Next;
+ inst != &c->Program.Instructions;
+ inst = inst->Next, ++ip) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ int dead = 1;
+
+ if (!opcode->HasDstReg) {
+ dead = 0;
+ } else {
+ inst->U.I.DstReg.WriteMask = s.Instructions[ip].WriteMask;
+ if (s.Instructions[ip].WriteMask)
+ dead = 0;
+
+ if (s.Instructions[ip].WriteALUResult)
+ dead = 0;
+ else
+ inst->U.I.WriteALUResult = RC_ALURESULT_NONE;
+ }
+
+ if (dead) {
+ struct rc_instruction * todelete = inst;
+ inst = inst->Prev;
+ rc_remove_instruction(todelete);
+ continue;
+ }
+
+ unsigned int srcmasks[3];
+ unsigned int usemask = s.Instructions[ip].WriteMask;
+
+ if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
+ usemask |= RC_MASK_X;
+ else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
+ usemask |= RC_MASK_W;
+
+ rc_compute_sources_for_writemask(inst, usemask, srcmasks);
+
+ for(unsigned int src = 0; src < 3; ++src) {
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ if (!GET_BIT(srcmasks[src], chan))
+ SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED);
+ }
+ }
+ }
+
+ rc_calculate_inputs_outputs(c);
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c
new file mode 100644
index 0000000000..33acbd30f4
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_swizzles.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_compiler.h"
+#include "radeon_swizzle.h"
+
+
+static void rewrite_source(struct radeon_compiler * c,
+ struct rc_instruction * inst, unsigned src)
+{
+ struct rc_swizzle_split split;
+ unsigned int tempreg = rc_find_free_temporary(c);
+ unsigned int usemask;
+
+ usemask = 0;
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) != RC_SWIZZLE_UNUSED)
+ usemask |= 1 << chan;
+ }
+
+ c->SwizzleCaps->Split(inst->U.I.SrcReg[src], usemask, &split);
+
+ for(unsigned int phase = 0; phase < split.NumPhases; ++phase) {
+ struct rc_instruction * mov = rc_insert_new_instruction(c, inst->Prev);
+ unsigned int phase_refmask;
+ unsigned int masked_negate;
+
+ mov->U.I.Opcode = RC_OPCODE_MOV;
+ mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ mov->U.I.DstReg.Index = tempreg;
+ mov->U.I.DstReg.WriteMask = split.Phase[phase];
+ mov->U.I.SrcReg[0] = inst->U.I.SrcReg[src];
+
+ phase_refmask = 0;
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ if (!GET_BIT(split.Phase[phase], chan))
+ SET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan, RC_SWIZZLE_UNUSED);
+ else
+ phase_refmask |= 1 << GET_SWZ(mov->U.I.SrcReg[0].Swizzle, chan);
+ }
+
+ phase_refmask &= RC_MASK_XYZW;
+
+ masked_negate = split.Phase[phase] & mov->U.I.SrcReg[0].Negate;
+ if (masked_negate == 0)
+ mov->U.I.SrcReg[0].Negate = 0;
+ else if (masked_negate == split.Phase[phase])
+ mov->U.I.SrcReg[0].Negate = RC_MASK_XYZW;
+
+ }
+
+ inst->U.I.SrcReg[src].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[src].Index = tempreg;
+ inst->U.I.SrcReg[src].Swizzle = 0;
+ inst->U.I.SrcReg[src].Negate = RC_MASK_NONE;
+ inst->U.I.SrcReg[src].Abs = 0;
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan,
+ GET_BIT(usemask, chan) ? chan : RC_SWIZZLE_UNUSED);
+ }
+}
+
+void rc_dataflow_swizzles(struct radeon_compiler * c)
+{
+ struct rc_instruction * inst;
+
+ for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ unsigned int src;
+
+ for(src = 0; src < opcode->NumSrcRegs; ++src) {
+ if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
+ rewrite_source(c, inst, src);
+ }
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c
new file mode 100644
index 0000000000..863654cf68
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.c
@@ -0,0 +1,331 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_emulate_branches.h"
+
+#include <stdio.h>
+
+#include "radeon_compiler.h"
+#include "radeon_dataflow.h"
+
+#define VERBOSE 0
+
+#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
+
+
+struct proxy_info {
+ unsigned int Proxied:1;
+ unsigned int Index:RC_REGISTER_INDEX_BITS;
+};
+
+struct register_proxies {
+ struct proxy_info Temporary[RC_REGISTER_MAX_INDEX];
+};
+
+struct branch_info {
+ struct rc_instruction * If;
+ struct rc_instruction * Else;
+};
+
+struct emulate_branch_state {
+ struct radeon_compiler * C;
+
+ struct branch_info * Branches;
+ unsigned int BranchCount;
+ unsigned int BranchReserved;
+};
+
+
+static void handle_if(struct emulate_branch_state * s, struct rc_instruction * inst)
+{
+ memory_pool_array_reserve(&s->C->Pool, struct branch_info,
+ s->Branches, s->BranchCount, s->BranchReserved, 1);
+
+ DBG("%s\n", __FUNCTION__);
+
+ struct branch_info * branch = &s->Branches[s->BranchCount++];
+ memset(branch, 0, sizeof(struct branch_info));
+ branch->If = inst;
+
+ /* Make a safety copy of the decision register, because we will need
+ * it at ENDIF time and it might be overwritten in both branches. */
+ struct rc_instruction * inst_mov = rc_insert_new_instruction(s->C, inst->Prev);
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.DstReg.Index = rc_find_free_temporary(s->C);
+ inst_mov->U.I.DstReg.WriteMask = RC_MASK_X;
+ inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+
+ inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index;
+ inst->U.I.SrcReg[0].Swizzle = 0;
+ inst->U.I.SrcReg[0].Abs = 0;
+ inst->U.I.SrcReg[0].Negate = 0;
+}
+
+static void handle_else(struct emulate_branch_state * s, struct rc_instruction * inst)
+{
+ if (!s->BranchCount) {
+ rc_error(s->C, "Encountered ELSE outside of branches");
+ return;
+ }
+
+ DBG("%s\n", __FUNCTION__);
+
+ struct branch_info * branch = &s->Branches[s->BranchCount - 1];
+ branch->Else = inst;
+}
+
+
+struct state_and_proxies {
+ struct emulate_branch_state * S;
+ struct register_proxies * Proxies;
+};
+
+static struct proxy_info * get_proxy_info(struct state_and_proxies * sap,
+ rc_register_file file, unsigned int index)
+{
+ if (file == RC_FILE_TEMPORARY) {
+ return &sap->Proxies->Temporary[index];
+ } else {
+ return 0;
+ }
+}
+
+static void scan_write(void * userdata, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int comp)
+{
+ struct state_and_proxies * sap = userdata;
+ struct proxy_info * proxy = get_proxy_info(sap, file, index);
+
+ if (proxy && !proxy->Proxied) {
+ proxy->Proxied = 1;
+ proxy->Index = rc_find_free_temporary(sap->S->C);
+ }
+}
+
+static void remap_proxy_function(void * userdata, struct rc_instruction * inst,
+ rc_register_file * pfile, unsigned int * pindex)
+{
+ struct state_and_proxies * sap = userdata;
+ struct proxy_info * proxy = get_proxy_info(sap, *pfile, *pindex);
+
+ if (proxy && proxy->Proxied) {
+ *pfile = RC_FILE_TEMPORARY;
+ *pindex = proxy->Index;
+ }
+}
+
+/**
+ * Redirect all writes in the instruction range [begin, end) to proxy
+ * temporary registers.
+ */
+static void allocate_and_insert_proxies(struct emulate_branch_state * s,
+ struct register_proxies * proxies,
+ struct rc_instruction * begin,
+ struct rc_instruction * end)
+{
+ struct state_and_proxies sap;
+
+ sap.S = s;
+ sap.Proxies = proxies;
+
+ for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
+ rc_for_all_writes_mask(inst, scan_write, &sap);
+ rc_remap_registers(inst, remap_proxy_function, &sap);
+ }
+
+ for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) {
+ if (proxies->Temporary[index].Proxied) {
+ struct rc_instruction * inst_mov = rc_insert_new_instruction(s->C, begin->Prev);
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.DstReg.Index = proxies->Temporary[index].Index;
+ inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+ inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.SrcReg[0].Index = index;
+ }
+ }
+}
+
+
+static void inject_cmp(struct emulate_branch_state * s,
+ struct rc_instruction * inst_if,
+ struct rc_instruction * inst_endif,
+ rc_register_file file, unsigned int index,
+ struct proxy_info ifproxy,
+ struct proxy_info elseproxy)
+{
+ struct rc_instruction * inst_cmp = rc_insert_new_instruction(s->C, inst_endif);
+ inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
+ inst_cmp->U.I.DstReg.File = file;
+ inst_cmp->U.I.DstReg.Index = index;
+ inst_cmp->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+ inst_cmp->U.I.SrcReg[0] = inst_if->U.I.SrcReg[0];
+ inst_cmp->U.I.SrcReg[0].Abs = 1;
+ inst_cmp->U.I.SrcReg[0].Negate = RC_MASK_XYZW;
+ inst_cmp->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+ inst_cmp->U.I.SrcReg[1].Index = ifproxy.Proxied ? ifproxy.Index : index;
+ inst_cmp->U.I.SrcReg[2].File = RC_FILE_TEMPORARY;
+ inst_cmp->U.I.SrcReg[2].Index = elseproxy.Proxied ? elseproxy.Index : index;
+}
+
+static void handle_endif(struct emulate_branch_state * s, struct rc_instruction * inst)
+{
+ if (!s->BranchCount) {
+ rc_error(s->C, "Encountered ENDIF outside of branches");
+ return;
+ }
+
+ DBG("%s\n", __FUNCTION__);
+
+ struct branch_info * branch = &s->Branches[s->BranchCount - 1];
+ struct register_proxies IfProxies;
+ struct register_proxies ElseProxies;
+
+ memset(&IfProxies, 0, sizeof(IfProxies));
+ memset(&ElseProxies, 0, sizeof(ElseProxies));
+
+ allocate_and_insert_proxies(s, &IfProxies, branch->If->Next, branch->Else ? branch->Else : inst);
+
+ if (branch->Else)
+ allocate_and_insert_proxies(s, &ElseProxies, branch->Else->Next, inst);
+
+ /* Insert the CMP instructions at the end. */
+ for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) {
+ if (IfProxies.Temporary[index].Proxied || ElseProxies.Temporary[index].Proxied) {
+ inject_cmp(s, branch->If, inst, RC_FILE_TEMPORARY, index,
+ IfProxies.Temporary[index], ElseProxies.Temporary[index]);
+ }
+ }
+
+ /* Remove all traces of the branch instructions */
+ rc_remove_instruction(branch->If);
+ if (branch->Else)
+ rc_remove_instruction(branch->Else);
+ rc_remove_instruction(inst);
+
+ s->BranchCount--;
+
+ if (VERBOSE) {
+ DBG("Program after ENDIF handling:\n");
+ rc_print_program(&s->C->Program);
+ }
+}
+
+
+struct remap_output_data {
+ unsigned int Output:RC_REGISTER_INDEX_BITS;
+ unsigned int Temporary:RC_REGISTER_INDEX_BITS;
+};
+
+static void remap_output_function(void * userdata, struct rc_instruction * inst,
+ rc_register_file * pfile, unsigned int * pindex)
+{
+ struct remap_output_data * data = userdata;
+
+ if (*pfile == RC_FILE_OUTPUT && *pindex == data->Output) {
+ *pfile = RC_FILE_TEMPORARY;
+ *pindex = data->Temporary;
+ }
+}
+
+
+/**
+ * Output registers cannot be read from and so cannot be dealt with like
+ * temporary registers.
+ *
+ * We do the simplest thing: If an output registers is written within
+ * a branch, then *all* writes to this register are proxied to a
+ * temporary register, and a final MOV is appended to the end of
+ * the program.
+ */
+static void fix_output_writes(struct emulate_branch_state * s, struct rc_instruction * inst)
+{
+ if (!s->BranchCount)
+ return;
+
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ if (!opcode->HasDstReg)
+ return;
+
+ if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) {
+ struct remap_output_data remap;
+
+ remap.Output = inst->U.I.DstReg.Index;
+ remap.Temporary = rc_find_free_temporary(s->C);
+
+ for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
+ inst != &s->C->Program.Instructions;
+ inst = inst->Next) {
+ rc_remap_registers(inst, &remap_output_function, &remap);
+ }
+
+ struct rc_instruction * inst_mov = rc_insert_new_instruction(s->C, s->C->Program.Instructions.Prev);
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg.File = RC_FILE_OUTPUT;
+ inst_mov->U.I.DstReg.Index = remap.Output;
+ inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+ inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.SrcReg[0].Index = remap.Temporary;
+ }
+}
+
+/**
+ * Remove branch instructions; instead, execute both branches
+ * on different register sets and choose between their results
+ * using CMP instructions in place of the original ENDIF.
+ */
+void rc_emulate_branches(struct radeon_compiler * c)
+{
+ struct emulate_branch_state s;
+
+ memset(&s, 0, sizeof(s));
+ s.C = c;
+
+ /* Untypical loop because we may remove the current instruction */
+ struct rc_instruction * ptr = c->Program.Instructions.Next;
+ while(ptr != &c->Program.Instructions) {
+ struct rc_instruction * inst = ptr;
+ ptr = ptr->Next;
+
+ if (inst->Type == RC_INSTRUCTION_NORMAL) {
+ switch(inst->U.I.Opcode) {
+ case RC_OPCODE_IF:
+ handle_if(&s, inst);
+ break;
+ case RC_OPCODE_ELSE:
+ handle_else(&s, inst);
+ break;
+ case RC_OPCODE_ENDIF:
+ handle_endif(&s, inst);
+ break;
+ default:
+ fix_output_writes(&s, inst);
+ break;
+ }
+ } else {
+ rc_error(c, "%s: unhandled instruction type\n", __FUNCTION__);
+ }
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.h b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.h
new file mode 100644
index 0000000000..e07279f093
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_branches.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef RADEON_EMULATE_BRANCHES_H
+#define RADEON_EMULATE_BRANCHES_H
+
+struct radeon_compiler;
+
+void rc_emulate_branches(struct radeon_compiler * c);
+
+#endif /* RADEON_EMULATE_BRANCHES_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
new file mode 100644
index 0000000000..4c5d29f421
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
@@ -0,0 +1,474 @@
+/*
+ * Copyright 2010 Tom Stellard <tstellar@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file
+ */
+
+#include "radeon_emulate_loops.h"
+
+#include "radeon_compiler.h"
+#include "radeon_dataflow.h"
+
+#define VERBOSE 0
+
+#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
+
+struct emulate_loop_state {
+ struct radeon_compiler * C;
+ struct loop_info * Loops;
+ unsigned int LoopCount;
+ unsigned int LoopReserved;
+};
+
+struct loop_info {
+ struct rc_instruction * BeginLoop;
+ struct rc_instruction * Cond;
+ struct rc_instruction * If;
+ struct rc_instruction * Brk;
+ struct rc_instruction * EndIf;
+ struct rc_instruction * EndLoop;
+};
+
+struct const_value {
+
+ struct radeon_compiler * C;
+ struct rc_src_register * Src;
+ float Value;
+ int HasValue;
+};
+
+struct count_inst {
+ struct radeon_compiler * C;
+ int Index;
+ rc_swizzle Swz;
+ float Amount;
+ int Unknown;
+};
+
+static float get_constant_value(struct radeon_compiler * c,
+ struct rc_src_register * src,
+ int chan)
+{
+ float base = 1.0f;
+ int swz = GET_SWZ(src->Swizzle, chan);
+ if(swz >= 4 || src->Index >= c->Program.Constants.Count ){
+ rc_error(c, "get_constant_value: Can't find a value.\n");
+ return 0.0f;
+ }
+ if(GET_BIT(src->Negate, chan)){
+ base = -1.0f;
+ }
+ return base *
+ c->Program.Constants.Constants[src->Index].u.Immediate[swz];
+}
+
+static int src_reg_is_immediate(struct rc_src_register * src,
+ struct radeon_compiler * c)
+{
+ return src->File == RC_FILE_CONSTANT &&
+ c->Program.Constants.Constants[src->Index].Type==RC_CONSTANT_IMMEDIATE;
+}
+
+static unsigned int loop_count_instructions(struct loop_info * loop)
+{
+ unsigned int count = 0;
+ struct rc_instruction * inst = loop->BeginLoop->Next;
+ while(inst != loop->EndLoop){
+ count++;
+ inst = inst->Next;
+ }
+ return count;
+}
+
+static unsigned int loop_calc_iterations(struct loop_info * loop,
+ unsigned int loop_count, unsigned int max_instructions)
+{
+ unsigned int icount = loop_count_instructions(loop);
+ return max_instructions / (loop_count * icount);
+}
+
+static void loop_unroll(struct emulate_loop_state * s,
+ struct loop_info *loop, unsigned int iterations)
+{
+ unsigned int i;
+ struct rc_instruction * ptr;
+ struct rc_instruction * first = loop->BeginLoop->Next;
+ struct rc_instruction * last = loop->EndLoop->Prev;
+ struct rc_instruction * append_to = last;
+ rc_remove_instruction(loop->BeginLoop);
+ rc_remove_instruction(loop->EndLoop);
+ for( i = 1; i < iterations; i++){
+ for(ptr = first; ptr != last->Next; ptr = ptr->Next){
+ struct rc_instruction *new = rc_alloc_instruction(s->C);
+ memcpy(new, ptr, sizeof(struct rc_instruction));
+ rc_insert_instruction(append_to, new);
+ append_to = new;
+ }
+ }
+}
+
+
+static void update_const_value(void * data, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int mask)
+{
+ struct const_value * value = data;
+ if(value->Src->File != file ||
+ value->Src->Index != index ||
+ !(1 << GET_SWZ(value->Src->Swizzle, 0) & mask)){
+ return;
+ }
+ switch(inst->U.I.Opcode){
+ case RC_OPCODE_MOV:
+ if(!src_reg_is_immediate(&inst->U.I.SrcReg[0], value->C)){
+ return;
+ }
+ value->HasValue = 1;
+ value->Value =
+ get_constant_value(value->C, &inst->U.I.SrcReg[0], 0);
+ break;
+ }
+}
+
+static void get_incr_amount(void * data, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int mask)
+{
+ struct count_inst * count_inst = data;
+ int amnt_src_index;
+ const struct rc_opcode_info * opcode;
+ float amount;
+
+ if(file != RC_FILE_TEMPORARY ||
+ count_inst->Index != index ||
+ (1 << GET_SWZ(count_inst->Swz,0) != mask)){
+ return;
+ }
+ /* Find the index of the counter register. */
+ opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ if(opcode->NumSrcRegs != 2){
+ count_inst->Unknown = 1;
+ return;
+ }
+ if(inst->U.I.SrcReg[0].File == RC_FILE_TEMPORARY &&
+ inst->U.I.SrcReg[0].Index == count_inst->Index &&
+ inst->U.I.SrcReg[0].Swizzle == count_inst->Swz){
+ amnt_src_index = 1;
+ } else if( inst->U.I.SrcReg[1].File == RC_FILE_TEMPORARY &&
+ inst->U.I.SrcReg[1].Index == count_inst->Index &&
+ inst->U.I.SrcReg[1].Swizzle == count_inst->Swz){
+ amnt_src_index = 0;
+ }
+ else{
+ count_inst->Unknown = 1;
+ return;
+ }
+ if(src_reg_is_immediate(&inst->U.I.SrcReg[amnt_src_index],
+ count_inst->C)){
+ amount = get_constant_value(count_inst->C,
+ &inst->U.I.SrcReg[amnt_src_index], 0);
+ }
+ else{
+ count_inst->Unknown = 1 ;
+ return;
+ }
+ switch(inst->U.I.Opcode){
+ case RC_OPCODE_ADD:
+ count_inst->Amount += amount;
+ break;
+ case RC_OPCODE_SUB:
+ if(amnt_src_index == 0){
+ count_inst->Unknown = 0;
+ return;
+ }
+ count_inst->Amount -= amount;
+ break;
+ default:
+ count_inst->Unknown = 1;
+ return;
+ }
+
+}
+
+static int transform_const_loop(struct emulate_loop_state * s,
+ struct loop_info * loop,
+ struct rc_instruction * cond)
+{
+ int end_loops = 1;
+ int iterations;
+ struct count_inst count_inst;
+ float limit_value;
+ struct rc_src_register * counter;
+ struct rc_src_register * limit;
+ struct const_value counter_value;
+ struct rc_instruction * inst;
+
+ /* Find the counter and the upper limit */
+
+ if(src_reg_is_immediate(&cond->U.I.SrcReg[0], s->C)){
+ limit = &cond->U.I.SrcReg[0];
+ counter = &cond->U.I.SrcReg[1];
+ }
+ else if(src_reg_is_immediate(&cond->U.I.SrcReg[1], s->C)){
+ limit = &cond->U.I.SrcReg[1];
+ counter = &cond->U.I.SrcReg[0];
+ }
+ else{
+ DBG("No constant limit.\n");
+ return 0;
+ }
+
+ /* Find the initial value of the counter */
+ counter_value.Src = counter;
+ counter_value.Value = 0.0f;
+ counter_value.HasValue = 0;
+ counter_value.C = s->C;
+ for(inst = s->C->Program.Instructions.Next; inst != loop->BeginLoop;
+ inst = inst->Next){
+ rc_for_all_writes_mask(inst, update_const_value, &counter_value);
+ }
+ if(!counter_value.HasValue){
+ DBG("Initial counter value cannot be determined.\n");
+ return 0;
+ }
+ DBG("Initial counter value is %f\n", counter_value.Value);
+ /* Determine how the counter is modified each loop */
+ count_inst.C = s->C;
+ count_inst.Index = counter->Index;
+ count_inst.Swz = counter->Swizzle;
+ count_inst.Amount = 0.0f;
+ count_inst.Unknown = 0;
+ for(inst = loop->BeginLoop->Next; end_loops > 0; inst = inst->Next){
+ switch(inst->U.I.Opcode){
+ /* XXX In the future we might want to try to unroll nested
+ * loops here.*/
+ case RC_OPCODE_BGNLOOP:
+ end_loops++;
+ break;
+ case RC_OPCODE_ENDLOOP:
+ loop->EndLoop = inst;
+ end_loops--;
+ break;
+ /* XXX Check if the counter is modified within an if statement.
+ */
+ case RC_OPCODE_IF:
+ break;
+ default:
+ rc_for_all_writes_mask(inst, get_incr_amount, &count_inst);
+ if(count_inst.Unknown){
+ return 0;
+ }
+ break;
+ }
+ }
+ /* Infinite loop */
+ if(count_inst.Amount == 0.0f){
+ return 0;
+ }
+ DBG("Counter is increased by %f each iteration.\n", count_inst.Amount);
+ /* Calculate the number of iterations of this loop. Keeping this
+ * simple, since we only support increment and decrement loops.
+ */
+ limit_value = get_constant_value(s->C, limit, 0);
+ iterations = (int) ((limit_value - counter_value.Value) /
+ count_inst.Amount);
+
+ DBG("Loop will have %d iterations.\n", iterations);
+
+ /* Prepare loop for unrolling */
+ rc_remove_instruction(loop->Cond);
+ rc_remove_instruction(loop->If);
+ rc_remove_instruction(loop->Brk);
+ rc_remove_instruction(loop->EndIf);
+
+ loop_unroll(s, loop, iterations);
+ loop->EndLoop = NULL;
+ return 1;
+}
+
+/**
+ * This function prepares a loop to be unrolled by converting it into an if
+ * statement. Here is an outline of the conversion process:
+ * BGNLOOP; -> BGNLOOP;
+ * <Additional conditional code> -> <Additional conditional code>
+ * SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2];
+ * IF temp[0]; -> IF temp[0];
+ * BRK; ->
+ * ENDIF; -> <Loop Body>
+ * <Loop Body> -> ENDIF;
+ * ENDLOOP; -> ENDLOOP
+ *
+ * @param inst A pointer to a BGNLOOP instruction.
+ * @return If the loop can be unrolled, a pointer to the first instruction of
+ * the unrolled loop.
+ * Otherwise, A pointer to the ENDLOOP instruction.
+ * Null if there is an error.
+ */
+static struct rc_instruction * transform_loop(struct emulate_loop_state * s,
+ struct rc_instruction * inst)
+{
+ struct loop_info *loop;
+ struct rc_instruction * ptr;
+
+ memory_pool_array_reserve(&s->C->Pool, struct loop_info,
+ s->Loops, s->LoopCount, s->LoopReserved, 1);
+
+ loop = &s->Loops[s->LoopCount++];
+ memset(loop, 0, sizeof(struct loop_info));
+ if(inst->U.I.Opcode != RC_OPCODE_BGNLOOP){
+ rc_error(s->C, "expected BGNLOOP\n", __FUNCTION__);
+ return NULL;
+ }
+ loop->BeginLoop = inst;
+
+ for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next){
+ switch(ptr->U.I.Opcode){
+ case RC_OPCODE_BGNLOOP:
+ /* Nested loop */
+ ptr = transform_loop(s, ptr);
+ if(!ptr){
+ return NULL;
+ }
+ break;
+ case RC_OPCODE_BRK:
+ loop->Brk = ptr;
+ if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF){
+ rc_error(s->C,
+ "%s: expected ENDIF\n",__FUNCTION__);
+ return NULL;
+ }
+ loop->EndIf = ptr->Next;
+ if(ptr->Prev->U.I.Opcode != RC_OPCODE_IF){
+ rc_error(s->C,
+ "%s: expected IF\n", __FUNCTION__);
+ return NULL;
+ }
+ loop->If = ptr->Prev;
+ switch(loop->If->Prev->U.I.Opcode){
+ case RC_OPCODE_SLT:
+ case RC_OPCODE_SGE:
+ case RC_OPCODE_SGT:
+ case RC_OPCODE_SLE:
+ case RC_OPCODE_SEQ:
+ case RC_OPCODE_SNE:
+ break;
+ default:
+ rc_error(s->C, "%s expected conditional\n",
+ __FUNCTION__);
+ return NULL;
+ }
+ loop->Cond = loop->If->Prev;
+ ptr = loop->EndIf;
+ break;
+ case RC_OPCODE_ENDLOOP:
+ loop->EndLoop = ptr;
+ break;
+ }
+ }
+ /* Reverse the conditional instruction */
+ switch(loop->Cond->U.I.Opcode){
+ case RC_OPCODE_SGE:
+ loop->Cond->U.I.Opcode = RC_OPCODE_SLT;
+ break;
+ case RC_OPCODE_SLT:
+ loop->Cond->U.I.Opcode = RC_OPCODE_SGE;
+ break;
+ case RC_OPCODE_SLE:
+ loop->Cond->U.I.Opcode = RC_OPCODE_SGT;
+ break;
+ case RC_OPCODE_SGT:
+ loop->Cond->U.I.Opcode = RC_OPCODE_SLE;
+ break;
+ case RC_OPCODE_SEQ:
+ loop->Cond->U.I.Opcode = RC_OPCODE_SNE;
+ break;
+ case RC_OPCODE_SNE:
+ loop->Cond->U.I.Opcode = RC_OPCODE_SEQ;
+ break;
+ default:
+ rc_error(s->C, "loop->Cond is not a conditional.\n");
+ return NULL;
+ }
+
+ /* Check if the number of loops is known at compile time. */
+ if(transform_const_loop(s, loop, ptr)){
+ return loop->BeginLoop->Next;
+ }
+
+ /* Prepare the loop to be unrolled */
+ rc_remove_instruction(loop->Brk);
+ rc_remove_instruction(loop->EndIf);
+ rc_insert_instruction(loop->EndLoop->Prev, loop->EndIf);
+ return loop->EndLoop;
+}
+
+static void rc_transform_loops(struct emulate_loop_state * s)
+{
+ struct rc_instruction * ptr = s->C->Program.Instructions.Next;
+ while(ptr != &s->C->Program.Instructions) {
+ if(ptr->Type == RC_INSTRUCTION_NORMAL &&
+ ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
+ ptr = transform_loop(s, ptr);
+ if(!ptr){
+ return;
+ }
+ }
+ ptr = ptr->Next;
+ }
+}
+
+static void rc_unroll_loops(struct emulate_loop_state *s,
+ unsigned int max_instructions)
+{
+ int i;
+ /* Iterate backwards of the list of loops so that loops that nested
+ * loops are unrolled first.
+ */
+ for( i = s->LoopCount - 1; i >= 0; i-- ){
+ if(!s->Loops[i].EndLoop){
+ continue;
+ }
+ unsigned int iterations = loop_calc_iterations(&s->Loops[i],
+ s->LoopCount, max_instructions);
+ loop_unroll(s, &s->Loops[i], iterations);
+ }
+}
+
+void rc_emulate_loops(struct radeon_compiler *c, unsigned int max_instructions)
+{
+ struct emulate_loop_state s;
+
+ memset(&s, 0, sizeof(struct emulate_loop_state));
+ s.C = c;
+
+ /* We may need to move these two operations to r3xx_(vert|frag)prog.c
+ * and run the optimization passes between them in order to increase
+ * the number of unrolls we can do for each loop.
+ */
+ rc_transform_loops(&s);
+
+ rc_unroll_loops(&s, max_instructions);
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h
new file mode 100644
index 0000000000..ddcf1c0fab
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h
@@ -0,0 +1,12 @@
+
+
+#ifndef RADEON_EMULATE_LOOPS_H
+#define RADEON_EMULATE_LOOPS_H
+
+#define MAX_ITERATIONS 8
+
+struct radeon_compiler;
+
+void rc_emulate_loops(struct radeon_compiler *c, unsigned int max_instructions);
+
+#endif /* RADEON_EMULATE_LOOPS_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
new file mode 100644
index 0000000000..1dc16855dc
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.c
@@ -0,0 +1,472 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_opcodes.h"
+#include "radeon_program.h"
+
+#include "radeon_program_constants.h"
+
+struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE] = {
+ {
+ .Opcode = RC_OPCODE_NOP,
+ .Name = "NOP"
+ },
+ {
+ .Opcode = RC_OPCODE_ILLEGAL_OPCODE,
+ .Name = "ILLEGAL OPCODE"
+ },
+ {
+ .Opcode = RC_OPCODE_ABS,
+ .Name = "ABS",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_ADD,
+ .Name = "ADD",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_ARL,
+ .Name = "ARL",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_CEIL,
+ .Name = "CEIL",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_CMP,
+ .Name = "CMP",
+ .NumSrcRegs = 3,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_COS,
+ .Name = "COS",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
+ },
+ {
+ .Opcode = RC_OPCODE_DDX,
+ .Name = "DDX",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_DDY,
+ .Name = "DDY",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_DP3,
+ .Name = "DP3",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_DP4,
+ .Name = "DP4",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_DPH,
+ .Name = "DPH",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_DST,
+ .Name = "DST",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_EX2,
+ .Name = "EX2",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
+ },
+ {
+ .Opcode = RC_OPCODE_EXP,
+ .Name = "EXP",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_FLR,
+ .Name = "FLR",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_FRC,
+ .Name = "FRC",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_KIL,
+ .Name = "KIL",
+ .NumSrcRegs = 1
+ },
+ {
+ .Opcode = RC_OPCODE_LG2,
+ .Name = "LG2",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
+ },
+ {
+ .Opcode = RC_OPCODE_LIT,
+ .Name = "LIT",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_LOG,
+ .Name = "LOG",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_LRP,
+ .Name = "LRP",
+ .NumSrcRegs = 3,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_MAD,
+ .Name = "MAD",
+ .NumSrcRegs = 3,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_MAX,
+ .Name = "MAX",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_MIN,
+ .Name = "MIN",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_MOV,
+ .Name = "MOV",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_MUL,
+ .Name = "MUL",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_POW,
+ .Name = "POW",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
+ },
+ {
+ .Opcode = RC_OPCODE_RCP,
+ .Name = "RCP",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
+ },
+ {
+ .Opcode = RC_OPCODE_RSQ,
+ .Name = "RSQ",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SCS,
+ .Name = "SCS",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SEQ,
+ .Name = "SEQ",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SFL,
+ .Name = "SFL",
+ .NumSrcRegs = 0,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SGE,
+ .Name = "SGE",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SGT,
+ .Name = "SGT",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SIN,
+ .Name = "SIN",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsStandardScalar = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SLE,
+ .Name = "SLE",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SLT,
+ .Name = "SLT",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SNE,
+ .Name = "SNE",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SUB,
+ .Name = "SUB",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_SWZ,
+ .Name = "SWZ",
+ .NumSrcRegs = 1,
+ .HasDstReg = 1,
+ .IsComponentwise = 1
+ },
+ {
+ .Opcode = RC_OPCODE_XPD,
+ .Name = "XPD",
+ .NumSrcRegs = 2,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_TEX,
+ .Name = "TEX",
+ .HasTexture = 1,
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_TXB,
+ .Name = "TXB",
+ .HasTexture = 1,
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_TXD,
+ .Name = "TXD",
+ .HasTexture = 1,
+ .NumSrcRegs = 3,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_TXL,
+ .Name = "TXL",
+ .HasTexture = 1,
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_TXP,
+ .Name = "TXP",
+ .HasTexture = 1,
+ .NumSrcRegs = 1,
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_IF,
+ .Name = "IF",
+ .IsFlowControl = 1,
+ .NumSrcRegs = 1
+ },
+ {
+ .Opcode = RC_OPCODE_ELSE,
+ .Name = "ELSE",
+ .IsFlowControl = 1,
+ .NumSrcRegs = 0
+ },
+ {
+ .Opcode = RC_OPCODE_ENDIF,
+ .Name = "ENDIF",
+ .IsFlowControl = 1,
+ .NumSrcRegs = 0
+ },
+ {
+ .Opcode = RC_OPCODE_BGNLOOP,
+ .Name = "BGNLOOP",
+ .IsFlowControl = 1,
+ .NumSrcRegs = 0
+ },
+ {
+ .Opcode = RC_OPCODE_BRK,
+ .Name = "BRK",
+ .IsFlowControl = 1,
+ .NumSrcRegs = 0
+ },
+ {
+ .Opcode = RC_OPCODE_ENDLOOP,
+ .Name = "ENDLOOP",
+ .IsFlowControl = 1,
+ .NumSrcRegs = 0,
+ },
+ {
+ .Opcode = RC_OPCODE_REPL_ALPHA,
+ .Name = "REPL_ALPHA",
+ .HasDstReg = 1
+ },
+ {
+ .Opcode = RC_OPCODE_BEGIN_TEX,
+ .Name = "BEGIN_TEX"
+ }
+};
+
+void rc_compute_sources_for_writemask(
+ const struct rc_instruction *inst,
+ unsigned int writemask,
+ unsigned int *srcmasks)
+{
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ srcmasks[0] = 0;
+ srcmasks[1] = 0;
+ srcmasks[2] = 0;
+
+ if (opcode->Opcode == RC_OPCODE_KIL)
+ srcmasks[0] |= RC_MASK_XYZW;
+ else if (opcode->Opcode == RC_OPCODE_IF)
+ srcmasks[0] |= RC_MASK_X;
+
+ if (!writemask)
+ return;
+
+ if (opcode->IsComponentwise) {
+ for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src)
+ srcmasks[src] |= writemask;
+ } else if (opcode->IsStandardScalar) {
+ for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src)
+ srcmasks[src] |= RC_MASK_X;
+ } else {
+ switch(opcode->Opcode) {
+ case RC_OPCODE_ARL:
+ srcmasks[0] |= RC_MASK_X;
+ break;
+ case RC_OPCODE_DP3:
+ srcmasks[0] |= RC_MASK_XYZ;
+ srcmasks[1] |= RC_MASK_XYZ;
+ break;
+ case RC_OPCODE_DP4:
+ srcmasks[0] |= RC_MASK_XYZW;
+ srcmasks[1] |= RC_MASK_XYZW;
+ break;
+ case RC_OPCODE_TXB:
+ case RC_OPCODE_TXP:
+ srcmasks[0] |= RC_MASK_W;
+ /* Fall through */
+ case RC_OPCODE_TEX:
+ switch (inst->U.I.TexSrcTarget) {
+ case RC_TEXTURE_1D:
+ srcmasks[0] |= RC_MASK_X;
+ break;
+ case RC_TEXTURE_2D:
+ case RC_TEXTURE_RECT:
+ case RC_TEXTURE_1D_ARRAY:
+ srcmasks[0] |= RC_MASK_XY;
+ break;
+ case RC_TEXTURE_3D:
+ case RC_TEXTURE_CUBE:
+ case RC_TEXTURE_2D_ARRAY:
+ srcmasks[0] |= RC_MASK_XYZ;
+ break;
+ }
+ break;
+ case RC_OPCODE_DST:
+ srcmasks[0] |= RC_MASK_Y | RC_MASK_Z;
+ srcmasks[1] |= RC_MASK_Y | RC_MASK_W;
+ break;
+ case RC_OPCODE_EXP:
+ case RC_OPCODE_LOG:
+ srcmasks[0] |= RC_MASK_XY;
+ break;
+ case RC_OPCODE_LIT:
+ srcmasks[0] |= RC_MASK_X | RC_MASK_Y | RC_MASK_W;
+ break;
+ default:
+ break;
+ }
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
new file mode 100644
index 0000000000..91c82ac089
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_opcodes.h
@@ -0,0 +1,246 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_OPCODES_H
+#define RADEON_OPCODES_H
+
+#include <assert.h>
+
+/**
+ * Opcodes understood by the Radeon compiler.
+ */
+typedef enum {
+ RC_OPCODE_NOP = 0,
+ RC_OPCODE_ILLEGAL_OPCODE,
+
+ /** vec4 instruction: dst.c = abs(src0.c); */
+ RC_OPCODE_ABS,
+
+ /** vec4 instruction: dst.c = src0.c + src1.c; */
+ RC_OPCODE_ADD,
+
+ /** special instruction: load address register
+ * dst.x = floor(src.x), where dst must be an address register */
+ RC_OPCODE_ARL,
+
+ /** vec4 instruction: dst.c = ceil(src0.c) */
+ RC_OPCODE_CEIL,
+
+ /** vec4 instruction: dst.c = src0.c < 0.0 ? src1.c : src2.c */
+ RC_OPCODE_CMP,
+
+ /** scalar instruction: dst = cos(src0.x) */
+ RC_OPCODE_COS,
+
+ /** special instruction: take vec4 partial derivative in X direction
+ * dst.c = d src0.c / dx */
+ RC_OPCODE_DDX,
+
+ /** special instruction: take vec4 partial derivative in Y direction
+ * dst.c = d src0.c / dy */
+ RC_OPCODE_DDY,
+
+ /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z */
+ RC_OPCODE_DP3,
+
+ /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src0.w*src1.w */
+ RC_OPCODE_DP4,
+
+ /** scalar instruction: dst = src0.x*src1.x + src0.y*src1.y + src0.z*src1.z + src1.w */
+ RC_OPCODE_DPH,
+
+ /** special instruction, see ARB_fragment_program */
+ RC_OPCODE_DST,
+
+ /** scalar instruction: dst = 2**src0.x */
+ RC_OPCODE_EX2,
+
+ /** special instruction, see ARB_vertex_program */
+ RC_OPCODE_EXP,
+
+ /** vec4 instruction: dst.c = floor(src0.c) */
+ RC_OPCODE_FLR,
+
+ /** vec4 instruction: dst.c = src0.c - floor(src0.c) */
+ RC_OPCODE_FRC,
+
+ /** special instruction: stop execution if any component of src0 is negative */
+ RC_OPCODE_KIL,
+
+ /** scalar instruction: dst = log_2(src0.x) */
+ RC_OPCODE_LG2,
+
+ /** special instruction, see ARB_vertex_program */
+ RC_OPCODE_LIT,
+
+ /** special instruction, see ARB_vertex_program */
+ RC_OPCODE_LOG,
+
+ /** vec4 instruction: dst.c = src0.c*src1.c + (1 - src0.c)*src2.c */
+ RC_OPCODE_LRP,
+
+ /** vec4 instruction: dst.c = src0.c*src1.c + src2.c */
+ RC_OPCODE_MAD,
+
+ /** vec4 instruction: dst.c = max(src0.c, src1.c) */
+ RC_OPCODE_MAX,
+
+ /** vec4 instruction: dst.c = min(src0.c, src1.c) */
+ RC_OPCODE_MIN,
+
+ /** vec4 instruction: dst.c = src0.c */
+ RC_OPCODE_MOV,
+
+ /** vec4 instruction: dst.c = src0.c*src1.c */
+ RC_OPCODE_MUL,
+
+ /** scalar instruction: dst = src0.x ** src1.x */
+ RC_OPCODE_POW,
+
+ /** scalar instruction: dst = 1 / src0.x */
+ RC_OPCODE_RCP,
+
+ /** scalar instruction: dst = 1 / sqrt(src0.x) */
+ RC_OPCODE_RSQ,
+
+ /** special instruction, see ARB_fragment_program */
+ RC_OPCODE_SCS,
+
+ /** vec4 instruction: dst.c = (src0.c == src1.c) ? 1.0 : 0.0 */
+ RC_OPCODE_SEQ,
+
+ /** vec4 instruction: dst.c = 0.0 */
+ RC_OPCODE_SFL,
+
+ /** vec4 instruction: dst.c = (src0.c >= src1.c) ? 1.0 : 0.0 */
+ RC_OPCODE_SGE,
+
+ /** vec4 instruction: dst.c = (src0.c > src1.c) ? 1.0 : 0.0 */
+ RC_OPCODE_SGT,
+
+ /** scalar instruction: dst = sin(src0.x) */
+ RC_OPCODE_SIN,
+
+ /** vec4 instruction: dst.c = (src0.c <= src1.c) ? 1.0 : 0.0 */
+ RC_OPCODE_SLE,
+
+ /** vec4 instruction: dst.c = (src0.c < src1.c) ? 1.0 : 0.0 */
+ RC_OPCODE_SLT,
+
+ /** vec4 instruction: dst.c = (src0.c != src1.c) ? 1.0 : 0.0 */
+ RC_OPCODE_SNE,
+
+ /** vec4 instruction: dst.c = src0.c - src1.c */
+ RC_OPCODE_SUB,
+
+ /** vec4 instruction: dst.c = src0.c */
+ RC_OPCODE_SWZ,
+
+ /** special instruction, see ARB_fragment_program */
+ RC_OPCODE_XPD,
+
+ RC_OPCODE_TEX,
+ RC_OPCODE_TXB,
+ RC_OPCODE_TXD,
+ RC_OPCODE_TXL,
+ RC_OPCODE_TXP,
+
+ /** branch instruction:
+ * If src0.x != 0.0, continue with the next instruction;
+ * otherwise, jump to matching RC_OPCODE_ELSE or RC_OPCODE_ENDIF.
+ */
+ RC_OPCODE_IF,
+
+ /** branch instruction: jump to matching RC_OPCODE_ENDIF */
+ RC_OPCODE_ELSE,
+
+ /** branch instruction: has no effect */
+ RC_OPCODE_ENDIF,
+
+ RC_OPCODE_BGNLOOP,
+
+ RC_OPCODE_BRK,
+
+ RC_OPCODE_ENDLOOP,
+
+ /** special instruction, used in R300-R500 fragment program pair instructions
+ * indicates that the result of the alpha operation shall be replicated
+ * across all other channels */
+ RC_OPCODE_REPL_ALPHA,
+
+ /** special instruction, used in R300-R500 fragment programs
+ * to indicate the start of a block of texture instructions that
+ * can run simultaneously. */
+ RC_OPCODE_BEGIN_TEX,
+
+ MAX_RC_OPCODE
+} rc_opcode;
+
+
+struct rc_opcode_info {
+ rc_opcode Opcode;
+ const char * Name;
+
+ /** true if the instruction reads from a texture.
+ *
+ * \note This is false for the KIL instruction, even though KIL is
+ * a texture instruction from a hardware point of view. */
+ unsigned int HasTexture:1;
+
+ unsigned int NumSrcRegs:2;
+ unsigned int HasDstReg:1;
+
+ /** true if this instruction affects control flow */
+ unsigned int IsFlowControl:1;
+
+ /** true if this is a vector instruction that operates on components in parallel
+ * without any cross-component interaction */
+ unsigned int IsComponentwise:1;
+
+ /** true if this instruction sources only its operands X components
+ * to compute one result which is smeared across all output channels */
+ unsigned int IsStandardScalar:1;
+};
+
+extern struct rc_opcode_info rc_opcodes[MAX_RC_OPCODE];
+
+static inline const struct rc_opcode_info * rc_get_opcode_info(rc_opcode opcode)
+{
+ assert((unsigned int)opcode < MAX_RC_OPCODE);
+ assert(rc_opcodes[opcode].Opcode == opcode);
+
+ return &rc_opcodes[opcode];
+}
+
+struct rc_instruction;
+
+void rc_compute_sources_for_writemask(
+ const struct rc_instruction *inst,
+ unsigned int writemask,
+ unsigned int *srcmasks);
+
+#endif /* RADEON_OPCODES_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
new file mode 100644
index 0000000000..21d7210888
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
@@ -0,0 +1,446 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_dataflow.h"
+
+#include "radeon_compiler.h"
+#include "radeon_swizzle.h"
+
+
+static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
+{
+ struct rc_src_register combine;
+ combine.File = inner.File;
+ combine.Index = inner.Index;
+ combine.RelAddr = inner.RelAddr;
+ if (outer.Abs) {
+ combine.Abs = 1;
+ combine.Negate = outer.Negate;
+ } else {
+ combine.Abs = inner.Abs;
+ combine.Negate = 0;
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ unsigned int swz = GET_SWZ(outer.Swizzle, chan);
+ if (swz < 4)
+ combine.Negate |= GET_BIT(inner.Negate, swz) << chan;
+ }
+ combine.Negate ^= outer.Negate;
+ }
+ combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
+ return combine;
+}
+
+struct peephole_state {
+ struct radeon_compiler * C;
+ struct rc_instruction * Mov;
+ unsigned int Conflict:1;
+
+ /** Whether Mov's source has been clobbered */
+ unsigned int SourceClobbered:1;
+
+ /** Which components of Mov's destination register are still from that Mov? */
+ unsigned int MovMask:4;
+
+ /** Which components of Mov's destination register are clearly *not* from that Mov */
+ unsigned int DefinedMask:4;
+
+ /** Which components of Mov's source register are sourced */
+ unsigned int SourcedMask:4;
+
+ /** Branch depth beyond Mov; negative value indicates we left the Mov's block */
+ int BranchDepth;
+};
+
+static void peephole_scan_read(void * data, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int mask)
+{
+ struct peephole_state * s = data;
+
+ if (file != RC_FILE_TEMPORARY || index != s->Mov->U.I.DstReg.Index)
+ return;
+
+ /* These instructions cannot read from the constants file.
+ * see radeonTransformTEX()
+ */
+ if(s->Mov->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
+ s->Mov->U.I.SrcReg[0].File != RC_FILE_INPUT &&
+ (inst->U.I.Opcode == RC_OPCODE_TEX ||
+ inst->U.I.Opcode == RC_OPCODE_TXB ||
+ inst->U.I.Opcode == RC_OPCODE_TXP ||
+ inst->U.I.Opcode == RC_OPCODE_KIL)){
+ s->Conflict = 1;
+ return;
+ }
+ if ((mask & s->MovMask) == mask) {
+ if (s->SourceClobbered) {
+ s->Conflict = 1;
+ }
+ } else if ((mask & s->DefinedMask) == mask) {
+ /* read from something entirely written by other instruction: this is okay */
+ } else {
+ /* read from component combination that is not well-defined without
+ * the MOV: cannot remove it */
+ s->Conflict = 1;
+ }
+}
+
+static void peephole_scan_write(void * data, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int mask)
+{
+ struct peephole_state * s = data;
+
+ if (s->BranchDepth < 0)
+ return;
+
+ if (file == s->Mov->U.I.DstReg.File && index == s->Mov->U.I.DstReg.Index) {
+ s->MovMask &= ~mask;
+ if (s->BranchDepth == 0)
+ s->DefinedMask |= mask;
+ else
+ s->DefinedMask &= ~mask;
+ }
+ if (file == s->Mov->U.I.SrcReg[0].File && index == s->Mov->U.I.SrcReg[0].Index) {
+ if (mask & s->SourcedMask)
+ s->SourceClobbered = 1;
+ } else if (s->Mov->U.I.SrcReg[0].RelAddr && file == RC_FILE_ADDRESS) {
+ s->SourceClobbered = 1;
+ }
+}
+
+static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mov)
+{
+ struct peephole_state s;
+
+ if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || inst_mov->U.I.WriteALUResult)
+ return;
+
+ memset(&s, 0, sizeof(s));
+ s.C = c;
+ s.Mov = inst_mov;
+ s.MovMask = inst_mov->U.I.DstReg.WriteMask;
+ s.DefinedMask = RC_MASK_XYZW & ~s.MovMask;
+
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ unsigned int swz = GET_SWZ(inst_mov->U.I.SrcReg[0].Swizzle, chan);
+ s.SourcedMask |= (1 << swz) & RC_MASK_XYZW;
+ }
+
+ /* 1st pass: Check whether all subsequent readers can be changed */
+ for(struct rc_instruction * inst = inst_mov->Next;
+ inst != &c->Program.Instructions;
+ inst = inst->Next) {
+ rc_for_all_reads_mask(inst, peephole_scan_read, &s);
+ rc_for_all_writes_mask(inst, peephole_scan_write, &s);
+ if (s.Conflict)
+ return;
+
+ if (s.BranchDepth >= 0) {
+ if (inst->U.I.Opcode == RC_OPCODE_IF) {
+ s.BranchDepth++;
+ } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) {
+ s.BranchDepth--;
+ if (s.BranchDepth < 0) {
+ s.DefinedMask &= ~s.MovMask;
+ s.MovMask = 0;
+ }
+ }
+ }
+ }
+
+ if (s.Conflict)
+ return;
+
+ /* 2nd pass: We can satisfy all readers, so switch them over all at once */
+ s.MovMask = inst_mov->U.I.DstReg.WriteMask;
+ s.BranchDepth = 0;
+
+ for(struct rc_instruction * inst = inst_mov->Next;
+ inst != &c->Program.Instructions;
+ inst = inst->Next) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+ if (inst->U.I.SrcReg[src].File == RC_FILE_TEMPORARY &&
+ inst->U.I.SrcReg[src].Index == s.Mov->U.I.DstReg.Index) {
+ unsigned int refmask = 0;
+
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ unsigned int swz = GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
+ refmask |= (1 << swz) & RC_MASK_XYZW;
+ }
+
+ if ((refmask & s.MovMask) == refmask)
+ inst->U.I.SrcReg[src] = chain_srcregs(inst->U.I.SrcReg[src], s.Mov->U.I.SrcReg[0]);
+ }
+ }
+
+ if (opcode->HasDstReg) {
+ if (inst->U.I.DstReg.File == RC_FILE_TEMPORARY &&
+ inst->U.I.DstReg.Index == s.Mov->U.I.DstReg.Index) {
+ s.MovMask &= ~inst->U.I.DstReg.WriteMask;
+ }
+ }
+
+ if (s.BranchDepth >= 0) {
+ if (inst->U.I.Opcode == RC_OPCODE_IF) {
+ s.BranchDepth++;
+ } else if (inst->U.I.Opcode == RC_OPCODE_ENDIF) {
+ s.BranchDepth--;
+ if (s.BranchDepth < 0)
+ break; /* no more readers after this point */
+ }
+ }
+ }
+
+ /* Finally, remove the original MOV instruction */
+ rc_remove_instruction(inst_mov);
+}
+
+/**
+ * Check if a source register is actually always the same
+ * swizzle constant.
+ */
+static int is_src_uniform_constant(struct rc_src_register src,
+ rc_swizzle * pswz, unsigned int * pnegate)
+{
+ int have_used = 0;
+
+ if (src.File != RC_FILE_NONE) {
+ *pswz = 0;
+ return 0;
+ }
+
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ unsigned int swz = GET_SWZ(src.Swizzle, chan);
+ if (swz < 4) {
+ *pswz = 0;
+ return 0;
+ }
+ if (swz == RC_SWIZZLE_UNUSED)
+ continue;
+
+ if (!have_used) {
+ *pswz = swz;
+ *pnegate = GET_BIT(src.Negate, chan);
+ have_used = 1;
+ } else {
+ if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) {
+ *pswz = 0;
+ return 0;
+ }
+ }
+ }
+
+ return 1;
+}
+
+
+static void constant_folding_mad(struct rc_instruction * inst)
+{
+ rc_swizzle swz;
+ unsigned int negate;
+
+ if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) {
+ if (swz == RC_SWIZZLE_ZERO) {
+ inst->U.I.Opcode = RC_OPCODE_MUL;
+ return;
+ }
+ }
+
+ if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
+ if (swz == RC_SWIZZLE_ONE) {
+ inst->U.I.Opcode = RC_OPCODE_ADD;
+ if (negate)
+ inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
+ inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2];
+ return;
+ } else if (swz == RC_SWIZZLE_ZERO) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
+ return;
+ }
+ }
+
+ if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
+ if (swz == RC_SWIZZLE_ONE) {
+ inst->U.I.Opcode = RC_OPCODE_ADD;
+ if (negate)
+ inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
+ inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
+ return;
+ } else if (swz == RC_SWIZZLE_ZERO) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
+ return;
+ }
+ }
+}
+
+static void constant_folding_mul(struct rc_instruction * inst)
+{
+ rc_swizzle swz;
+ unsigned int negate;
+
+ if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
+ if (swz == RC_SWIZZLE_ONE) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
+ if (negate)
+ inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
+ return;
+ } else if (swz == RC_SWIZZLE_ZERO) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
+ return;
+ }
+ }
+
+ if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
+ if (swz == RC_SWIZZLE_ONE) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ if (negate)
+ inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
+ return;
+ } else if (swz == RC_SWIZZLE_ZERO) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
+ return;
+ }
+ }
+}
+
+static void constant_folding_add(struct rc_instruction * inst)
+{
+ rc_swizzle swz;
+ unsigned int negate;
+
+ if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
+ if (swz == RC_SWIZZLE_ZERO) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
+ return;
+ }
+ }
+
+ if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
+ if (swz == RC_SWIZZLE_ZERO) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ return;
+ }
+ }
+}
+
+
+/**
+ * Replace 0.0, 1.0 and 0.5 immediate constants by their
+ * respective swizzles. Simplify instructions like ADD dst, src, 0;
+ */
+static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)
+{
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
+ for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
+ if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||
+ inst->U.I.SrcReg[src].RelAddr ||
+ inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)
+ continue;
+
+ struct rc_constant * constant =
+ &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index];
+
+ if (constant->Type != RC_CONSTANT_IMMEDIATE)
+ continue;
+
+ struct rc_src_register newsrc = inst->U.I.SrcReg[src];
+ int have_real_reference = 0;
+ for(unsigned int chan = 0; chan < 4; ++chan) {
+ unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);
+ if (swz >= 4)
+ continue;
+
+ unsigned int newswz;
+ float imm = constant->u.Immediate[swz];
+ float baseimm = imm;
+ if (imm < 0.0)
+ baseimm = -baseimm;
+
+ if (baseimm == 0.0) {
+ newswz = RC_SWIZZLE_ZERO;
+ } else if (baseimm == 1.0) {
+ newswz = RC_SWIZZLE_ONE;
+ } else if (baseimm == 0.5) {
+ newswz = RC_SWIZZLE_HALF;
+ } else {
+ have_real_reference = 1;
+ continue;
+ }
+
+ SET_SWZ(newsrc.Swizzle, chan, newswz);
+ if (imm < 0.0 && !newsrc.Abs)
+ newsrc.Negate ^= 1 << chan;
+ }
+
+ if (!have_real_reference) {
+ newsrc.File = RC_FILE_NONE;
+ newsrc.Index = 0;
+ }
+
+ /* don't make the swizzle worse */
+ if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) &&
+ c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
+ continue;
+
+ inst->U.I.SrcReg[src] = newsrc;
+ }
+
+ /* Simplify instructions based on constants */
+ if (inst->U.I.Opcode == RC_OPCODE_MAD)
+ constant_folding_mad(inst);
+
+ /* note: MAD can simplify to MUL or ADD */
+ if (inst->U.I.Opcode == RC_OPCODE_MUL)
+ constant_folding_mul(inst);
+ else if (inst->U.I.Opcode == RC_OPCODE_ADD)
+ constant_folding_add(inst);
+}
+
+void rc_optimize(struct radeon_compiler * c)
+{
+ struct rc_instruction * inst = c->Program.Instructions.Next;
+ while(inst != &c->Program.Instructions) {
+ struct rc_instruction * cur = inst;
+ inst = inst->Next;
+
+ constant_folding(c, cur);
+
+ if (cur->U.I.Opcode == RC_OPCODE_MOV) {
+ peephole(c, cur);
+ /* cur may no longer be part of the program */
+ }
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
new file mode 100644
index 0000000000..8a912da461
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_regalloc.c
@@ -0,0 +1,280 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program_pair.h"
+
+#include <stdio.h>
+
+#include "radeon_compiler.h"
+#include "radeon_dataflow.h"
+
+
+#define VERBOSE 0
+
+#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
+
+
+struct live_intervals {
+ int Start;
+ int End;
+ struct live_intervals * Next;
+};
+
+struct register_info {
+ struct live_intervals Live;
+
+ unsigned int Used:1;
+ unsigned int Allocated:1;
+ unsigned int File:3;
+ unsigned int Index:RC_REGISTER_INDEX_BITS;
+};
+
+struct hardware_register {
+ struct live_intervals * Used;
+};
+
+struct regalloc_state {
+ struct radeon_compiler * C;
+
+ struct register_info Input[RC_REGISTER_MAX_INDEX];
+ struct register_info Temporary[RC_REGISTER_MAX_INDEX];
+
+ struct hardware_register * HwTemporary;
+ unsigned int NumHwTemporaries;
+};
+
+static void print_live_intervals(struct live_intervals * src)
+{
+ if (!src) {
+ DBG("(null)");
+ return;
+ }
+
+ while(src) {
+ DBG("(%i,%i)", src->Start, src->End);
+ src = src->Next;
+ }
+}
+
+static void add_live_intervals(struct regalloc_state * s,
+ struct live_intervals ** dst, struct live_intervals * src)
+{
+ struct live_intervals ** dst_backup = dst;
+
+ if (VERBOSE) {
+ DBG("add_live_intervals: ");
+ print_live_intervals(*dst);
+ DBG(" to ");
+ print_live_intervals(src);
+ DBG("\n");
+ }
+
+ while(src) {
+ if (*dst && (*dst)->End < src->Start) {
+ dst = &(*dst)->Next;
+ } else if (!*dst || (*dst)->Start > src->End) {
+ struct live_intervals * li = memory_pool_malloc(&s->C->Pool, sizeof(*li));
+ li->Start = src->Start;
+ li->End = src->End;
+ li->Next = *dst;
+ *dst = li;
+ src = src->Next;
+ } else {
+ if (src->End > (*dst)->End)
+ (*dst)->End = src->End;
+ if (src->Start < (*dst)->Start)
+ (*dst)->Start = src->Start;
+ src = src->Next;
+ }
+ }
+
+ if (VERBOSE) {
+ DBG(" result: ");
+ print_live_intervals(*dst_backup);
+ DBG("\n");
+ }
+}
+
+static int overlap_live_intervals(struct live_intervals * dst, struct live_intervals * src)
+{
+ if (VERBOSE) {
+ DBG("overlap_live_intervals: ");
+ print_live_intervals(dst);
+ DBG(" to ");
+ print_live_intervals(src);
+ DBG("\n");
+ }
+
+ while(src && dst) {
+ if (dst->End <= src->Start) {
+ dst = dst->Next;
+ } else if (dst->End <= src->End) {
+ DBG(" overlap\n");
+ return 1;
+ } else if (dst->Start < src->End) {
+ DBG(" overlap\n");
+ return 1;
+ } else {
+ src = src->Next;
+ }
+ }
+
+ DBG(" no overlap\n");
+
+ return 0;
+}
+
+static int try_add_live_intervals(struct regalloc_state * s,
+ struct live_intervals ** dst, struct live_intervals * src)
+{
+ if (overlap_live_intervals(*dst, src))
+ return 0;
+
+ add_live_intervals(s, dst, src);
+ return 1;
+}
+
+static void scan_callback(void * data, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int mask)
+{
+ struct regalloc_state * s = data;
+ struct register_info * reg;
+
+ if (file == RC_FILE_TEMPORARY)
+ reg = &s->Temporary[index];
+ else if (file == RC_FILE_INPUT)
+ reg = &s->Input[index];
+ else
+ return;
+
+ if (!reg->Used) {
+ reg->Used = 1;
+ if (file == RC_FILE_INPUT)
+ reg->Live.Start = -1;
+ else
+ reg->Live.Start = inst->IP;
+ reg->Live.End = inst->IP;
+ } else {
+ if (inst->IP > reg->Live.End)
+ reg->Live.End = inst->IP;
+ }
+}
+
+static void compute_live_intervals(struct regalloc_state * s)
+{
+ rc_recompute_ips(s->C);
+
+ for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
+ inst != &s->C->Program.Instructions;
+ inst = inst->Next) {
+ rc_for_all_reads_mask(inst, scan_callback, s);
+ rc_for_all_writes_mask(inst, scan_callback, s);
+ }
+}
+
+static void remap_register(void * data, struct rc_instruction * inst,
+ rc_register_file * file, unsigned int * index)
+{
+ struct regalloc_state * s = data;
+ const struct register_info * reg;
+
+ if (*file == RC_FILE_TEMPORARY)
+ reg = &s->Temporary[*index];
+ else if (*file == RC_FILE_INPUT)
+ reg = &s->Input[*index];
+ else
+ return;
+
+ if (reg->Allocated) {
+ *file = reg->File;
+ *index = reg->Index;
+ }
+}
+
+static void do_regalloc(struct regalloc_state * s)
+{
+ /* Simple and stupid greedy register allocation */
+ for(unsigned int index = 0; index < RC_REGISTER_MAX_INDEX; ++index) {
+ struct register_info * reg = &s->Temporary[index];
+
+ if (!reg->Used)
+ continue;
+
+ for(unsigned int hwreg = 0; hwreg < s->NumHwTemporaries; ++hwreg) {
+ if (try_add_live_intervals(s, &s->HwTemporary[hwreg].Used, &reg->Live)) {
+ reg->Allocated = 1;
+ reg->File = RC_FILE_TEMPORARY;
+ reg->Index = hwreg;
+ goto success;
+ }
+ }
+
+ rc_error(s->C, "Ran out of hardware temporaries\n");
+ return;
+
+ success:;
+ }
+
+ /* Rewrite all instructions based on the translation table we built */
+ for(struct rc_instruction * inst = s->C->Program.Instructions.Next;
+ inst != &s->C->Program.Instructions;
+ inst = inst->Next) {
+ rc_remap_registers(inst, &remap_register, s);
+ }
+}
+
+static void alloc_input(void * data, unsigned int input, unsigned int hwreg)
+{
+ struct regalloc_state * s = data;
+
+ if (!s->Input[input].Used)
+ return;
+
+ add_live_intervals(s, &s->HwTemporary[hwreg].Used, &s->Input[input].Live);
+
+ s->Input[input].Allocated = 1;
+ s->Input[input].File = RC_FILE_TEMPORARY;
+ s->Input[input].Index = hwreg;
+
+}
+
+void rc_pair_regalloc(struct r300_fragment_program_compiler *c, unsigned maxtemps)
+{
+ struct regalloc_state s;
+
+ memset(&s, 0, sizeof(s));
+ s.C = &c->Base;
+ s.NumHwTemporaries = maxtemps;
+ s.HwTemporary = memory_pool_malloc(&s.C->Pool, maxtemps*sizeof(struct hardware_register));
+ memset(s.HwTemporary, 0, maxtemps*sizeof(struct hardware_register));
+
+ compute_live_intervals(&s);
+
+ c->AllocateHwInputs(c, &alloc_input, &s);
+
+ do_regalloc(&s);
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
new file mode 100644
index 0000000000..a279549ff8
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_schedule.c
@@ -0,0 +1,501 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program_pair.h"
+
+#include <stdio.h>
+
+#include "radeon_compiler.h"
+#include "radeon_dataflow.h"
+
+
+#define VERBOSE 0
+
+#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
+
+struct schedule_instruction {
+ struct rc_instruction * Instruction;
+
+ /** Next instruction in the linked list of ready instructions. */
+ struct schedule_instruction *NextReady;
+
+ /** Values that this instruction reads and writes */
+ struct reg_value * WriteValues[4];
+ struct reg_value * ReadValues[12];
+ unsigned int NumWriteValues:3;
+ unsigned int NumReadValues:4;
+
+ /**
+ * Number of (read and write) dependencies that must be resolved before
+ * this instruction can be scheduled.
+ */
+ unsigned int NumDependencies:5;
+};
+
+
+/**
+ * Used to keep track of which instructions read a value.
+ */
+struct reg_value_reader {
+ struct schedule_instruction *Reader;
+ struct reg_value_reader *Next;
+};
+
+/**
+ * Used to keep track which values are stored in each component of a
+ * RC_FILE_TEMPORARY.
+ */
+struct reg_value {
+ struct schedule_instruction * Writer;
+
+ /**
+ * Unordered linked list of instructions that read from this value.
+ * When this value becomes available, we increase all readers'
+ * dependency count.
+ */
+ struct reg_value_reader *Readers;
+
+ /**
+ * Number of readers of this value. This is decremented each time
+ * a reader of the value is committed.
+ * When the reader cound reaches zero, the dependency count
+ * of the instruction writing \ref Next is decremented.
+ */
+ unsigned int NumReaders;
+
+ struct reg_value *Next; /**< Pointer to the next value to be written to the same register */
+};
+
+struct register_state {
+ struct reg_value * Values[4];
+};
+
+struct schedule_state {
+ struct radeon_compiler * C;
+ struct schedule_instruction * Current;
+
+ struct register_state Temporary[RC_REGISTER_MAX_INDEX];
+
+ /**
+ * Linked lists of instructions that can be scheduled right now,
+ * based on which ALU/TEX resources they require.
+ */
+ /*@{*/
+ struct schedule_instruction *ReadyFullALU;
+ struct schedule_instruction *ReadyRGB;
+ struct schedule_instruction *ReadyAlpha;
+ struct schedule_instruction *ReadyTEX;
+ /*@}*/
+};
+
+static struct reg_value ** get_reg_valuep(struct schedule_state * s,
+ rc_register_file file, unsigned int index, unsigned int chan)
+{
+ if (file != RC_FILE_TEMPORARY)
+ return 0;
+
+ if (index >= RC_REGISTER_MAX_INDEX) {
+ rc_error(s->C, "%s: index %i out of bounds\n", __FUNCTION__, index);
+ return 0;
+ }
+
+ return &s->Temporary[index].Values[chan];
+}
+
+static struct reg_value * get_reg_value(struct schedule_state * s,
+ rc_register_file file, unsigned int index, unsigned int chan)
+{
+ struct reg_value ** pv = get_reg_valuep(s, file, index, chan);
+ if (!pv)
+ return 0;
+ return *pv;
+}
+
+static void add_inst_to_list(struct schedule_instruction ** list, struct schedule_instruction * inst)
+{
+ inst->NextReady = *list;
+ *list = inst;
+}
+
+static void instruction_ready(struct schedule_state * s, struct schedule_instruction * sinst)
+{
+ DBG("%i is now ready\n", sinst->Instruction->IP);
+
+ if (sinst->Instruction->Type == RC_INSTRUCTION_NORMAL)
+ add_inst_to_list(&s->ReadyTEX, sinst);
+ else if (sinst->Instruction->U.P.Alpha.Opcode == RC_OPCODE_NOP)
+ add_inst_to_list(&s->ReadyRGB, sinst);
+ else if (sinst->Instruction->U.P.RGB.Opcode == RC_OPCODE_NOP)
+ add_inst_to_list(&s->ReadyAlpha, sinst);
+ else
+ add_inst_to_list(&s->ReadyFullALU, sinst);
+}
+
+static void decrease_dependencies(struct schedule_state * s, struct schedule_instruction * sinst)
+{
+ assert(sinst->NumDependencies > 0);
+ sinst->NumDependencies--;
+ if (!sinst->NumDependencies)
+ instruction_ready(s, sinst);
+}
+
+static void commit_instruction(struct schedule_state * s, struct schedule_instruction * sinst)
+{
+ DBG("%i: commit\n", sinst->Instruction->IP);
+
+ for(unsigned int i = 0; i < sinst->NumReadValues; ++i) {
+ struct reg_value * v = sinst->ReadValues[i];
+ assert(v->NumReaders > 0);
+ v->NumReaders--;
+ if (!v->NumReaders) {
+ if (v->Next)
+ decrease_dependencies(s, v->Next->Writer);
+ }
+ }
+
+ for(unsigned int i = 0; i < sinst->NumWriteValues; ++i) {
+ struct reg_value * v = sinst->WriteValues[i];
+ if (v->NumReaders) {
+ for(struct reg_value_reader * r = v->Readers; r; r = r->Next) {
+ decrease_dependencies(s, r->Reader);
+ }
+ } else {
+ /* This happens in instruction sequences of the type
+ * OP r.x, ...;
+ * OP r.x, r.x, ...;
+ * See also the subtlety in how instructions that both
+ * read and write the same register are scanned.
+ */
+ if (v->Next)
+ decrease_dependencies(s, v->Next->Writer);
+ }
+ }
+}
+
+/**
+ * Emit all ready texture instructions in a single block.
+ *
+ * Emit as a single block to (hopefully) sample many textures in parallel,
+ * and to avoid hardware indirections on R300.
+ */
+static void emit_all_tex(struct schedule_state * s, struct rc_instruction * before)
+{
+ struct schedule_instruction *readytex;
+
+ assert(s->ReadyTEX);
+
+ /* Don't let the ready list change under us! */
+ readytex = s->ReadyTEX;
+ s->ReadyTEX = 0;
+
+ /* Node marker for R300 */
+ struct rc_instruction * inst_begin = rc_insert_new_instruction(s->C, before->Prev);
+ inst_begin->U.I.Opcode = RC_OPCODE_BEGIN_TEX;
+
+ /* Link texture instructions back in */
+ while(readytex) {
+ struct schedule_instruction * tex = readytex;
+ readytex = readytex->NextReady;
+
+ rc_insert_instruction(before->Prev, tex->Instruction);
+ commit_instruction(s, tex);
+ }
+}
+
+
+static int destructive_merge_instructions(
+ struct rc_pair_instruction * rgb,
+ struct rc_pair_instruction * alpha)
+{
+ assert(rgb->Alpha.Opcode == RC_OPCODE_NOP);
+ assert(alpha->RGB.Opcode == RC_OPCODE_NOP);
+
+ /* Copy alpha args into rgb */
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(alpha->Alpha.Opcode);
+
+ for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
+ unsigned int srcrgb = 0;
+ unsigned int srcalpha = 0;
+ unsigned int oldsrc = alpha->Alpha.Arg[arg].Source;
+ rc_register_file file = 0;
+ unsigned int index = 0;
+
+ if (alpha->Alpha.Arg[arg].Swizzle < 3) {
+ srcrgb = 1;
+ file = alpha->RGB.Src[oldsrc].File;
+ index = alpha->RGB.Src[oldsrc].Index;
+ } else if (alpha->Alpha.Arg[arg].Swizzle < 4) {
+ srcalpha = 1;
+ file = alpha->Alpha.Src[oldsrc].File;
+ index = alpha->Alpha.Src[oldsrc].Index;
+ }
+
+ int source = rc_pair_alloc_source(rgb, srcrgb, srcalpha, file, index);
+ if (source < 0)
+ return 0;
+
+ rgb->Alpha.Arg[arg].Source = source;
+ rgb->Alpha.Arg[arg].Swizzle = alpha->Alpha.Arg[arg].Swizzle;
+ rgb->Alpha.Arg[arg].Abs = alpha->Alpha.Arg[arg].Abs;
+ rgb->Alpha.Arg[arg].Negate = alpha->Alpha.Arg[arg].Negate;
+ }
+
+ /* Copy alpha opcode into rgb */
+ rgb->Alpha.Opcode = alpha->Alpha.Opcode;
+ rgb->Alpha.DestIndex = alpha->Alpha.DestIndex;
+ rgb->Alpha.WriteMask = alpha->Alpha.WriteMask;
+ rgb->Alpha.OutputWriteMask = alpha->Alpha.OutputWriteMask;
+ rgb->Alpha.DepthWriteMask = alpha->Alpha.DepthWriteMask;
+ rgb->Alpha.Saturate = alpha->Alpha.Saturate;
+
+ /* Merge ALU result writing */
+ if (alpha->WriteALUResult) {
+ if (rgb->WriteALUResult)
+ return 0;
+
+ rgb->WriteALUResult = alpha->WriteALUResult;
+ rgb->ALUResultCompare = alpha->ALUResultCompare;
+ }
+
+ return 1;
+}
+
+/**
+ * Try to merge the given instructions into the rgb instructions.
+ *
+ * Return true on success; on failure, return false, and keep
+ * the instructions untouched.
+ */
+static int merge_instructions(struct rc_pair_instruction * rgb, struct rc_pair_instruction * alpha)
+{
+ struct rc_pair_instruction backup;
+
+ memcpy(&backup, rgb, sizeof(struct rc_pair_instruction));
+
+ if (destructive_merge_instructions(rgb, alpha))
+ return 1;
+
+ memcpy(rgb, &backup, sizeof(struct rc_pair_instruction));
+ return 0;
+}
+
+
+/**
+ * Find a good ALU instruction or pair of ALU instruction and emit it.
+ *
+ * Prefer emitting full ALU instructions, so that when we reach a point
+ * where no full ALU instruction can be emitted, we have more candidates
+ * for RGB/Alpha pairing.
+ */
+static void emit_one_alu(struct schedule_state *s, struct rc_instruction * before)
+{
+ struct schedule_instruction * sinst;
+
+ if (s->ReadyFullALU || !(s->ReadyRGB && s->ReadyAlpha)) {
+ if (s->ReadyFullALU) {
+ sinst = s->ReadyFullALU;
+ s->ReadyFullALU = s->ReadyFullALU->NextReady;
+ } else if (s->ReadyRGB) {
+ sinst = s->ReadyRGB;
+ s->ReadyRGB = s->ReadyRGB->NextReady;
+ } else {
+ sinst = s->ReadyAlpha;
+ s->ReadyAlpha = s->ReadyAlpha->NextReady;
+ }
+
+ rc_insert_instruction(before->Prev, sinst->Instruction);
+ commit_instruction(s, sinst);
+ } else {
+ struct schedule_instruction **prgb;
+ struct schedule_instruction **palpha;
+
+ /* Some pairings might fail because they require too
+ * many source slots; try all possible pairings if necessary */
+ for(prgb = &s->ReadyRGB; *prgb; prgb = &(*prgb)->NextReady) {
+ for(palpha = &s->ReadyAlpha; *palpha; palpha = &(*palpha)->NextReady) {
+ struct schedule_instruction * psirgb = *prgb;
+ struct schedule_instruction * psialpha = *palpha;
+
+ if (!merge_instructions(&psirgb->Instruction->U.P, &psialpha->Instruction->U.P))
+ continue;
+
+ *prgb = (*prgb)->NextReady;
+ *palpha = (*palpha)->NextReady;
+ rc_insert_instruction(before->Prev, psirgb->Instruction);
+ commit_instruction(s, psirgb);
+ commit_instruction(s, psialpha);
+ goto success;
+ }
+ }
+
+ /* No success in pairing; just take the first RGB instruction */
+ sinst = s->ReadyRGB;
+ s->ReadyRGB = s->ReadyRGB->NextReady;
+
+ rc_insert_instruction(before->Prev, sinst->Instruction);
+ commit_instruction(s, sinst);
+ success: ;
+ }
+}
+
+static void scan_read(void * data, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int chan)
+{
+ struct schedule_state * s = data;
+ struct reg_value * v = get_reg_value(s, file, index, chan);
+
+ if (!v)
+ return;
+
+ if (v->Writer == s->Current) {
+ /* The instruction reads and writes to a register component.
+ * In this case, we only want to increment dependencies by one.
+ */
+ return;
+ }
+
+ DBG("%i: read %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
+
+ struct reg_value_reader * reader = memory_pool_malloc(&s->C->Pool, sizeof(*reader));
+ reader->Reader = s->Current;
+ reader->Next = v->Readers;
+ v->Readers = reader;
+ v->NumReaders++;
+
+ s->Current->NumDependencies++;
+
+ if (s->Current->NumReadValues >= 12) {
+ rc_error(s->C, "%s: NumReadValues overflow\n", __FUNCTION__);
+ } else {
+ s->Current->ReadValues[s->Current->NumReadValues++] = v;
+ }
+}
+
+static void scan_write(void * data, struct rc_instruction * inst,
+ rc_register_file file, unsigned int index, unsigned int chan)
+{
+ struct schedule_state * s = data;
+ struct reg_value ** pv = get_reg_valuep(s, file, index, chan);
+
+ if (!pv)
+ return;
+
+ DBG("%i: write %i[%i] chan %i\n", s->Current->Instruction->IP, file, index, chan);
+
+ struct reg_value * newv = memory_pool_malloc(&s->C->Pool, sizeof(*newv));
+ memset(newv, 0, sizeof(*newv));
+
+ newv->Writer = s->Current;
+
+ if (*pv) {
+ (*pv)->Next = newv;
+ s->Current->NumDependencies++;
+ }
+
+ *pv = newv;
+
+ if (s->Current->NumWriteValues >= 4) {
+ rc_error(s->C, "%s: NumWriteValues overflow\n", __FUNCTION__);
+ } else {
+ s->Current->WriteValues[s->Current->NumWriteValues++] = newv;
+ }
+}
+
+static void schedule_block(struct r300_fragment_program_compiler * c,
+ struct rc_instruction * begin, struct rc_instruction * end)
+{
+ struct schedule_state s;
+
+ memset(&s, 0, sizeof(s));
+ s.C = &c->Base;
+
+ /* Scan instructions for data dependencies */
+ unsigned int ip = 0;
+ for(struct rc_instruction * inst = begin; inst != end; inst = inst->Next) {
+ s.Current = memory_pool_malloc(&c->Base.Pool, sizeof(*s.Current));
+ memset(s.Current, 0, sizeof(struct schedule_instruction));
+
+ s.Current->Instruction = inst;
+ inst->IP = ip++;
+
+ DBG("%i: Scanning\n", inst->IP);
+
+ /* The order of things here is subtle and maybe slightly
+ * counter-intuitive, to account for the case where an
+ * instruction writes to the same register as it reads
+ * from. */
+ rc_for_all_writes_chan(inst, &scan_write, &s);
+ rc_for_all_reads_chan(inst, &scan_read, &s);
+
+ DBG("%i: Has %i dependencies\n", inst->IP, s.Current->NumDependencies);
+
+ if (!s.Current->NumDependencies)
+ instruction_ready(&s, s.Current);
+ }
+
+ /* Temporarily unlink all instructions */
+ begin->Prev->Next = end;
+ end->Prev = begin->Prev;
+
+ /* Schedule instructions back */
+ while(!s.C->Error &&
+ (s.ReadyTEX || s.ReadyRGB || s.ReadyAlpha || s.ReadyFullALU)) {
+ if (s.ReadyTEX)
+ emit_all_tex(&s, end);
+
+ while(!s.C->Error && (s.ReadyFullALU || s.ReadyRGB || s.ReadyAlpha))
+ emit_one_alu(&s, end);
+ }
+}
+
+static int is_controlflow(struct rc_instruction * inst)
+{
+ if (inst->Type == RC_INSTRUCTION_NORMAL) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ return opcode->IsFlowControl;
+ }
+ return 0;
+}
+
+void rc_pair_schedule(struct r300_fragment_program_compiler *c)
+{
+ struct rc_instruction * inst = c->Base.Program.Instructions.Next;
+ while(inst != &c->Base.Program.Instructions) {
+ if (is_controlflow(inst)) {
+ inst = inst->Next;
+ continue;
+ }
+
+ struct rc_instruction * first = inst;
+
+ while(inst != &c->Base.Program.Instructions && !is_controlflow(inst))
+ inst = inst->Next;
+
+ DBG("Schedule one block\n");
+ schedule_block(c, first, inst);
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
new file mode 100644
index 0000000000..407a0a55ee
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_pair_translate.c
@@ -0,0 +1,256 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program_pair.h"
+
+#include "radeon_compiler.h"
+
+
+/**
+ * Finally rewrite ADD, MOV, MUL as the appropriate native instruction
+ * and reverse the order of arguments for CMP.
+ */
+static void final_rewrite(struct rc_sub_instruction *inst)
+{
+ struct rc_src_register tmp;
+
+ switch(inst->Opcode) {
+ case RC_OPCODE_ADD:
+ inst->SrcReg[2] = inst->SrcReg[1];
+ inst->SrcReg[1].File = RC_FILE_NONE;
+ inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
+ inst->SrcReg[1].Negate = RC_MASK_NONE;
+ inst->Opcode = RC_OPCODE_MAD;
+ break;
+ case RC_OPCODE_CMP:
+ tmp = inst->SrcReg[2];
+ inst->SrcReg[2] = inst->SrcReg[0];
+ inst->SrcReg[0] = tmp;
+ break;
+ case RC_OPCODE_MOV:
+ /* AMD say we should use CMP.
+ * However, when we transform
+ * KIL -r0;
+ * into
+ * CMP tmp, -r0, -r0, 0;
+ * KIL tmp;
+ * we get incorrect behaviour on R500 when r0 == 0.0.
+ * It appears that the R500 KIL hardware treats -0.0 as less
+ * than zero.
+ */
+ inst->SrcReg[1].File = RC_FILE_NONE;
+ inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111;
+ inst->SrcReg[2].File = RC_FILE_NONE;
+ inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
+ inst->Opcode = RC_OPCODE_MAD;
+ break;
+ case RC_OPCODE_MUL:
+ inst->SrcReg[2].File = RC_FILE_NONE;
+ inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000;
+ inst->Opcode = RC_OPCODE_MAD;
+ break;
+ default:
+ /* nothing to do */
+ break;
+ }
+}
+
+
+/**
+ * Classify an instruction according to which ALUs etc. it needs
+ */
+static void classify_instruction(struct rc_sub_instruction * inst,
+ int * needrgb, int * needalpha, int * istranscendent)
+{
+ *needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0;
+ *needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0;
+ *istranscendent = 0;
+
+ if (inst->WriteALUResult == RC_ALURESULT_X)
+ *needrgb = 1;
+ else if (inst->WriteALUResult == RC_ALURESULT_W)
+ *needalpha = 1;
+
+ switch(inst->Opcode) {
+ case RC_OPCODE_ADD:
+ case RC_OPCODE_CMP:
+ case RC_OPCODE_DDX:
+ case RC_OPCODE_DDY:
+ case RC_OPCODE_FRC:
+ case RC_OPCODE_MAD:
+ case RC_OPCODE_MAX:
+ case RC_OPCODE_MIN:
+ case RC_OPCODE_MOV:
+ case RC_OPCODE_MUL:
+ break;
+ case RC_OPCODE_COS:
+ case RC_OPCODE_EX2:
+ case RC_OPCODE_LG2:
+ case RC_OPCODE_RCP:
+ case RC_OPCODE_RSQ:
+ case RC_OPCODE_SIN:
+ *istranscendent = 1;
+ *needalpha = 1;
+ break;
+ case RC_OPCODE_DP4:
+ *needalpha = 1;
+ /* fall through */
+ case RC_OPCODE_DP3:
+ *needrgb = 1;
+ break;
+ default:
+ break;
+ }
+}
+
+
+/**
+ * Fill the given ALU instruction's opcodes and source operands into the given pair,
+ * if possible.
+ */
+static void set_pair_instruction(struct r300_fragment_program_compiler *c,
+ struct rc_pair_instruction * pair,
+ struct rc_sub_instruction * inst)
+{
+ memset(pair, 0, sizeof(struct rc_pair_instruction));
+
+ int needrgb, needalpha, istranscendent;
+ classify_instruction(inst, &needrgb, &needalpha, &istranscendent);
+
+ if (needrgb) {
+ if (istranscendent)
+ pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA;
+ else
+ pair->RGB.Opcode = inst->Opcode;
+ if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
+ pair->RGB.Saturate = 1;
+ }
+ if (needalpha) {
+ pair->Alpha.Opcode = inst->Opcode;
+ if (inst->SaturateMode == RC_SATURATE_ZERO_ONE)
+ pair->Alpha.Saturate = 1;
+ }
+
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode);
+ int i;
+
+ for(i = 0; i < opcode->NumSrcRegs; ++i) {
+ int source;
+ if (needrgb && !istranscendent) {
+ unsigned int srcrgb = 0;
+ unsigned int srcalpha = 0;
+ int j;
+ for(j = 0; j < 3; ++j) {
+ unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j);
+ if (swz < 3)
+ srcrgb = 1;
+ else if (swz < 4)
+ srcalpha = 1;
+ }
+ source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
+ inst->SrcReg[i].File, inst->SrcReg[i].Index);
+ pair->RGB.Arg[i].Source = source;
+ pair->RGB.Arg[i].Swizzle = inst->SrcReg[i].Swizzle & 0x1ff;
+ pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs;
+ pair->RGB.Arg[i].Negate = !!(inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z));
+ }
+ if (needalpha) {
+ unsigned int srcrgb = 0;
+ unsigned int srcalpha = 0;
+ unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, istranscendent ? 0 : 3);
+ if (swz < 3)
+ srcrgb = 1;
+ else if (swz < 4)
+ srcalpha = 1;
+ source = rc_pair_alloc_source(pair, srcrgb, srcalpha,
+ inst->SrcReg[i].File, inst->SrcReg[i].Index);
+ pair->Alpha.Arg[i].Source = source;
+ pair->Alpha.Arg[i].Swizzle = swz;
+ pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs;
+ pair->Alpha.Arg[i].Negate = !!(inst->SrcReg[i].Negate & RC_MASK_W);
+ }
+ }
+
+ /* Destination handling */
+ if (inst->DstReg.File == RC_FILE_OUTPUT) {
+ if (inst->DstReg.Index == c->OutputDepth) {
+ pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
+ } else {
+ for (i = 0; i < 4; i++) {
+ if (inst->DstReg.Index == c->OutputColor[i]) {
+ pair->RGB.Target = i;
+ pair->Alpha.Target = i;
+ pair->RGB.OutputWriteMask |=
+ inst->DstReg.WriteMask & RC_MASK_XYZ;
+ pair->Alpha.OutputWriteMask |=
+ GET_BIT(inst->DstReg.WriteMask, 3);
+ break;
+ }
+ }
+ }
+ } else {
+ if (needrgb) {
+ pair->RGB.DestIndex = inst->DstReg.Index;
+ pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ;
+ }
+ if (needalpha) {
+ pair->Alpha.DestIndex = inst->DstReg.Index;
+ pair->Alpha.WriteMask |= GET_BIT(inst->DstReg.WriteMask, 3);
+ }
+ }
+
+ if (inst->WriteALUResult) {
+ pair->WriteALUResult = inst->WriteALUResult;
+ pair->ALUResultCompare = inst->ALUResultCompare;
+ }
+}
+
+
+/**
+ * Translate all ALU instructions into corresponding pair instructions,
+ * performing no other changes.
+ */
+void rc_pair_translate(struct r300_fragment_program_compiler *c)
+{
+ for(struct rc_instruction * inst = c->Base.Program.Instructions.Next;
+ inst != &c->Base.Program.Instructions;
+ inst = inst->Next) {
+ if (inst->Type != RC_INSTRUCTION_NORMAL)
+ continue;
+
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+
+ if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL)
+ continue;
+
+ struct rc_sub_instruction copy = inst->U.I;
+
+ final_rewrite(&copy);
+ inst->Type = RC_INSTRUCTION_PAIR;
+ set_pair_instruction(c, &inst->U.P, &copy);
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.c b/src/mesa/drivers/dri/r300/compiler/radeon_program.c
new file mode 100644
index 0000000000..a3c41d7bd4
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.c
@@ -0,0 +1,183 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program.h"
+
+#include <stdio.h>
+
+#include "radeon_compiler.h"
+
+
+/**
+ * Transform the given clause in the following way:
+ * 1. Replace it with an empty clause
+ * 2. For every instruction in the original clause, try the given
+ * transformations in order.
+ * 3. If one of the transformations returns GL_TRUE, assume that it
+ * has emitted the appropriate instruction(s) into the new clause;
+ * otherwise, copy the instruction verbatim.
+ *
+ * \note The transformation is currently not recursive; in other words,
+ * instructions emitted by transformations are not transformed.
+ *
+ * \note The transform is called 'local' because it can only look at
+ * one instruction at a time.
+ */
+void radeonLocalTransform(
+ struct radeon_compiler * c,
+ int num_transformations,
+ struct radeon_program_transformation* transformations)
+{
+ struct rc_instruction * inst = c->Program.Instructions.Next;
+
+ while(inst != &c->Program.Instructions) {
+ struct rc_instruction * current = inst;
+ int i;
+
+ inst = inst->Next;
+
+ for(i = 0; i < num_transformations; ++i) {
+ struct radeon_program_transformation* t = transformations + i;
+
+ if (t->function(c, current, t->userData))
+ break;
+ }
+ }
+}
+
+/**
+ * Left multiplication of a register with a swizzle
+ */
+struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg)
+{
+ struct rc_src_register tmp = srcreg;
+ int i;
+ tmp.Swizzle = 0;
+ tmp.Negate = 0;
+ for(i = 0; i < 4; ++i) {
+ rc_swizzle swz = GET_SWZ(swizzle, i);
+ if (swz < 4) {
+ tmp.Swizzle |= GET_SWZ(srcreg.Swizzle, swz) << (i*3);
+ tmp.Negate |= GET_BIT(srcreg.Negate, swz) << i;
+ } else {
+ tmp.Swizzle |= swz << (i*3);
+ }
+ }
+ return tmp;
+}
+
+unsigned int rc_find_free_temporary(struct radeon_compiler * c)
+{
+ char used[RC_REGISTER_MAX_INDEX];
+ unsigned int i;
+ struct rc_instruction * rcinst;
+
+ memset(used, 0, sizeof(used));
+
+ for (rcinst = c->Program.Instructions.Next; rcinst != &c->Program.Instructions; rcinst = rcinst->Next) {
+ const struct rc_sub_instruction *inst = &rcinst->U.I;
+ const struct rc_opcode_info *opcode = rc_get_opcode_info(inst->Opcode);
+ unsigned int k;
+
+ for (k = 0; k < opcode->NumSrcRegs; k++) {
+ if (inst->SrcReg[k].File == RC_FILE_TEMPORARY)
+ used[inst->SrcReg[k].Index] = 1;
+ }
+
+ if (opcode->HasDstReg) {
+ if (inst->DstReg.File == RC_FILE_TEMPORARY)
+ used[inst->DstReg.Index] = 1;
+ }
+ }
+
+ for (i = 0; i < RC_REGISTER_MAX_INDEX; i++) {
+ if (!used[i])
+ return i;
+ }
+
+ rc_error(c, "Ran out of temporary registers\n");
+ return 0;
+}
+
+
+struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c)
+{
+ struct rc_instruction * inst = memory_pool_malloc(&c->Pool, sizeof(struct rc_instruction));
+
+ memset(inst, 0, sizeof(struct rc_instruction));
+
+ inst->U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE;
+ inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
+ inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZW;
+ inst->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZW;
+
+ return inst;
+}
+
+void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst)
+{
+ inst->Prev = after;
+ inst->Next = after->Next;
+
+ inst->Prev->Next = inst;
+ inst->Next->Prev = inst;
+}
+
+struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after)
+{
+ struct rc_instruction * inst = rc_alloc_instruction(c);
+
+ rc_insert_instruction(after, inst);
+
+ return inst;
+}
+
+void rc_remove_instruction(struct rc_instruction * inst)
+{
+ inst->Prev->Next = inst->Next;
+ inst->Next->Prev = inst->Prev;
+}
+
+/**
+ * Return the number of instructions in the program.
+ */
+unsigned int rc_recompute_ips(struct radeon_compiler * c)
+{
+ unsigned int ip = 0;
+ struct rc_instruction * inst;
+
+ for(inst = c->Program.Instructions.Next;
+ inst != &c->Program.Instructions;
+ inst = inst->Next) {
+ inst->IP = ip++;
+ }
+
+ c->Program.Instructions.IP = 0xcafedead;
+
+ return ip;
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program.h b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
new file mode 100644
index 0000000000..e318867696
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program.h
@@ -0,0 +1,233 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __RADEON_PROGRAM_H_
+#define __RADEON_PROGRAM_H_
+
+#include <stdint.h>
+#include <string.h>
+
+#include "radeon_opcodes.h"
+#include "radeon_code.h"
+#include "radeon_program_constants.h"
+#include "radeon_program_pair.h"
+
+struct radeon_compiler;
+
+struct rc_src_register {
+ unsigned int File:3;
+
+ /** Negative values may be used for relative addressing. */
+ signed int Index:(RC_REGISTER_INDEX_BITS+1);
+ unsigned int RelAddr:1;
+
+ unsigned int Swizzle:12;
+
+ /** Take the component-wise absolute value */
+ unsigned int Abs:1;
+
+ /** Post-Abs negation. */
+ unsigned int Negate:4;
+};
+
+struct rc_dst_register {
+ unsigned int File:3;
+
+ /** Negative values may be used for relative addressing. */
+ signed int Index:(RC_REGISTER_INDEX_BITS+1);
+ unsigned int RelAddr:1;
+
+ unsigned int WriteMask:4;
+};
+
+/**
+ * Instructions are maintained by the compiler in a doubly linked list
+ * of these structures.
+ *
+ * This instruction format is intended to be expanded for hardware-specific
+ * trickery. At different stages of compilation, a different set of
+ * instruction types may be valid.
+ */
+struct rc_sub_instruction {
+ struct rc_src_register SrcReg[3];
+ struct rc_dst_register DstReg;
+
+ /**
+ * Opcode of this instruction, according to \ref rc_opcode enums.
+ */
+ unsigned int Opcode:8;
+
+ /**
+ * Saturate each value of the result to the range [0,1] or [-1,1],
+ * according to \ref rc_saturate_mode enums.
+ */
+ unsigned int SaturateMode:2;
+
+ /**
+ * Writing to the special register RC_SPECIAL_ALU_RESULT
+ */
+ /*@{*/
+ unsigned int WriteALUResult:2;
+ unsigned int ALUResultCompare:3;
+ /*@}*/
+
+ /**
+ * \name Extra fields for TEX, TXB, TXD, TXL, TXP instructions.
+ */
+ /*@{*/
+ /** Source texture unit. */
+ unsigned int TexSrcUnit:5;
+
+ /** Source texture target, one of the \ref rc_texture_target enums */
+ unsigned int TexSrcTarget:3;
+
+ /** True if tex instruction should do shadow comparison */
+ unsigned int TexShadow:1;
+ /*@}*/
+};
+
+typedef enum {
+ RC_INSTRUCTION_NORMAL = 0,
+ RC_INSTRUCTION_PAIR
+} rc_instruction_type;
+
+struct rc_instruction {
+ struct rc_instruction * Prev;
+ struct rc_instruction * Next;
+
+ rc_instruction_type Type;
+ union {
+ struct rc_sub_instruction I;
+ struct rc_pair_instruction P;
+ } U;
+
+ /**
+ * Warning: IPs are not stable. If you want to use them,
+ * you need to recompute them at the beginning of each pass
+ * using \ref rc_recompute_ips
+ */
+ unsigned int IP;
+};
+
+struct rc_program {
+ /**
+ * Instructions.Next points to the first instruction,
+ * Instructions.Prev points to the last instruction.
+ */
+ struct rc_instruction Instructions;
+
+ /* Long term, we should probably remove InputsRead & OutputsWritten,
+ * since updating dependent state can be fragile, and they aren't
+ * actually used very often. */
+ uint32_t InputsRead;
+ uint32_t OutputsWritten;
+ uint32_t ShadowSamplers; /**< Texture units used for shadow sampling. */
+
+ struct rc_constant_list Constants;
+};
+
+enum {
+ OPCODE_REPL_ALPHA = MAX_RC_OPCODE /**< used in paired instructions */
+};
+
+
+static inline rc_swizzle get_swz(unsigned int swz, rc_swizzle idx)
+{
+ if (idx & 0x4)
+ return idx;
+ return GET_SWZ(swz, idx);
+}
+
+static inline unsigned int combine_swizzles4(unsigned int src,
+ rc_swizzle swz_x, rc_swizzle swz_y, rc_swizzle swz_z, rc_swizzle swz_w)
+{
+ unsigned int ret = 0;
+
+ ret |= get_swz(src, swz_x);
+ ret |= get_swz(src, swz_y) << 3;
+ ret |= get_swz(src, swz_z) << 6;
+ ret |= get_swz(src, swz_w) << 9;
+
+ return ret;
+}
+
+static inline unsigned int combine_swizzles(unsigned int src, unsigned int swz)
+{
+ unsigned int ret = 0;
+
+ ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_X));
+ ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Y)) << 3;
+ ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_Z)) << 6;
+ ret |= get_swz(src, GET_SWZ(swz, RC_SWIZZLE_W)) << 9;
+
+ return ret;
+}
+
+struct rc_src_register lmul_swizzle(unsigned int swizzle, struct rc_src_register srcreg);
+
+static inline void reset_srcreg(struct rc_src_register* reg)
+{
+ memset(reg, 0, sizeof(struct rc_src_register));
+ reg->Swizzle = RC_SWIZZLE_XYZW;
+}
+
+
+/**
+ * A transformation that can be passed to \ref radeonLocalTransform.
+ *
+ * The function will be called once for each instruction.
+ * It has to either emit the appropriate transformed code for the instruction
+ * and return true, or return false if it doesn't understand the
+ * instruction.
+ *
+ * The function gets passed the userData as last parameter.
+ */
+struct radeon_program_transformation {
+ int (*function)(
+ struct radeon_compiler*,
+ struct rc_instruction*,
+ void*);
+ void *userData;
+};
+
+void radeonLocalTransform(
+ struct radeon_compiler *c,
+ int num_transformations,
+ struct radeon_program_transformation* transformations);
+
+unsigned int rc_find_free_temporary(struct radeon_compiler * c);
+
+struct rc_instruction *rc_alloc_instruction(struct radeon_compiler * c);
+struct rc_instruction *rc_insert_new_instruction(struct radeon_compiler * c, struct rc_instruction * after);
+void rc_insert_instruction(struct rc_instruction * after, struct rc_instruction * inst);
+void rc_remove_instruction(struct rc_instruction * inst);
+
+unsigned int rc_recompute_ips(struct radeon_compiler * c);
+
+void rc_print_program(const struct rc_program *prog);
+
+#endif
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
new file mode 100644
index 0000000000..c922d3d9a4
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
@@ -0,0 +1,975 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * @file
+ *
+ * Shareable transformations that transform "special" ALU instructions
+ * into ALU instructions that are supported by hardware.
+ *
+ */
+
+#include "radeon_program_alu.h"
+
+#include "radeon_compiler.h"
+
+
+static struct rc_instruction *emit1(
+ struct radeon_compiler * c, struct rc_instruction * after,
+ rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
+ struct rc_src_register SrcReg)
+{
+ struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
+
+ fpi->U.I.Opcode = Opcode;
+ fpi->U.I.SaturateMode = Saturate;
+ fpi->U.I.DstReg = DstReg;
+ fpi->U.I.SrcReg[0] = SrcReg;
+ return fpi;
+}
+
+static struct rc_instruction *emit2(
+ struct radeon_compiler * c, struct rc_instruction * after,
+ rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
+ struct rc_src_register SrcReg0, struct rc_src_register SrcReg1)
+{
+ struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
+
+ fpi->U.I.Opcode = Opcode;
+ fpi->U.I.SaturateMode = Saturate;
+ fpi->U.I.DstReg = DstReg;
+ fpi->U.I.SrcReg[0] = SrcReg0;
+ fpi->U.I.SrcReg[1] = SrcReg1;
+ return fpi;
+}
+
+static struct rc_instruction *emit3(
+ struct radeon_compiler * c, struct rc_instruction * after,
+ rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
+ struct rc_src_register SrcReg0, struct rc_src_register SrcReg1,
+ struct rc_src_register SrcReg2)
+{
+ struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
+
+ fpi->U.I.Opcode = Opcode;
+ fpi->U.I.SaturateMode = Saturate;
+ fpi->U.I.DstReg = DstReg;
+ fpi->U.I.SrcReg[0] = SrcReg0;
+ fpi->U.I.SrcReg[1] = SrcReg1;
+ fpi->U.I.SrcReg[2] = SrcReg2;
+ return fpi;
+}
+
+static struct rc_dst_register dstreg(int file, int index)
+{
+ struct rc_dst_register dst;
+ dst.File = file;
+ dst.Index = index;
+ dst.WriteMask = RC_MASK_XYZW;
+ dst.RelAddr = 0;
+ return dst;
+}
+
+static struct rc_dst_register dstregtmpmask(int index, int mask)
+{
+ struct rc_dst_register dst = {0};
+ dst.File = RC_FILE_TEMPORARY;
+ dst.Index = index;
+ dst.WriteMask = mask;
+ dst.RelAddr = 0;
+ return dst;
+}
+
+static const struct rc_src_register builtin_zero = {
+ .File = RC_FILE_NONE,
+ .Index = 0,
+ .Swizzle = RC_SWIZZLE_0000
+};
+static const struct rc_src_register builtin_one = {
+ .File = RC_FILE_NONE,
+ .Index = 0,
+ .Swizzle = RC_SWIZZLE_1111
+};
+static const struct rc_src_register srcreg_undefined = {
+ .File = RC_FILE_NONE,
+ .Index = 0,
+ .Swizzle = RC_SWIZZLE_XYZW
+};
+
+static struct rc_src_register srcreg(int file, int index)
+{
+ struct rc_src_register src = srcreg_undefined;
+ src.File = file;
+ src.Index = index;
+ return src;
+}
+
+static struct rc_src_register srcregswz(int file, int index, int swz)
+{
+ struct rc_src_register src = srcreg_undefined;
+ src.File = file;
+ src.Index = index;
+ src.Swizzle = swz;
+ return src;
+}
+
+static struct rc_src_register absolute(struct rc_src_register reg)
+{
+ struct rc_src_register newreg = reg;
+ newreg.Abs = 1;
+ newreg.Negate = RC_MASK_NONE;
+ return newreg;
+}
+
+static struct rc_src_register negate(struct rc_src_register reg)
+{
+ struct rc_src_register newreg = reg;
+ newreg.Negate = newreg.Negate ^ RC_MASK_XYZW;
+ return newreg;
+}
+
+static struct rc_src_register swizzle(struct rc_src_register reg,
+ rc_swizzle x, rc_swizzle y, rc_swizzle z, rc_swizzle w)
+{
+ struct rc_src_register swizzled = reg;
+ swizzled.Swizzle = combine_swizzles4(reg.Swizzle, x, y, z, w);
+ return swizzled;
+}
+
+static struct rc_src_register swizzle_smear(struct rc_src_register reg,
+ rc_swizzle x)
+{
+ return swizzle(reg, x, x, x, x);
+}
+
+static struct rc_src_register swizzle_xxxx(struct rc_src_register reg)
+{
+ return swizzle_smear(reg, RC_SWIZZLE_X);
+}
+
+static struct rc_src_register swizzle_yyyy(struct rc_src_register reg)
+{
+ return swizzle_smear(reg, RC_SWIZZLE_Y);
+}
+
+static struct rc_src_register swizzle_zzzz(struct rc_src_register reg)
+{
+ return swizzle_smear(reg, RC_SWIZZLE_Z);
+}
+
+static struct rc_src_register swizzle_wwww(struct rc_src_register reg)
+{
+ return swizzle_smear(reg, RC_SWIZZLE_W);
+}
+
+static void transform_ABS(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ struct rc_src_register src = inst->U.I.SrcReg[0];
+ src.Abs = 1;
+ src.Negate = RC_MASK_NONE;
+ emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, src);
+ rc_remove_instruction(inst);
+}
+
+static void transform_CEIL(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ /* Assuming:
+ * ceil(x) = -floor(-x)
+ *
+ * After inlining floor:
+ * ceil(x) = -(-x-frac(-x))
+ *
+ * After simplification:
+ * ceil(x) = x+frac(-x)
+ */
+
+ int tempreg = rc_find_free_temporary(c);
+ emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]));
+ emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, tempreg));
+ rc_remove_instruction(inst);
+}
+
+static void transform_DP3(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ struct rc_src_register src0 = inst->U.I.SrcReg[0];
+ struct rc_src_register src1 = inst->U.I.SrcReg[1];
+ src0.Negate &= ~RC_MASK_W;
+ src0.Swizzle &= ~(7 << (3 * 3));
+ src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
+ src1.Negate &= ~RC_MASK_W;
+ src1.Swizzle &= ~(7 << (3 * 3));
+ src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
+ emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1);
+ rc_remove_instruction(inst);
+}
+
+static void transform_DPH(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ struct rc_src_register src0 = inst->U.I.SrcReg[0];
+ src0.Negate &= ~RC_MASK_W;
+ src0.Swizzle &= ~(7 << (3 * 3));
+ src0.Swizzle |= RC_SWIZZLE_ONE << (3 * 3);
+ emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, inst->U.I.SrcReg[1]);
+ rc_remove_instruction(inst);
+}
+
+/**
+ * [1, src0.y*src1.y, src0.z, src1.w]
+ * So basically MUL with lotsa swizzling.
+ */
+static void transform_DST(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ emit2(c, inst->Prev, RC_OPCODE_MUL, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ONE),
+ swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_ONE, RC_SWIZZLE_W));
+ rc_remove_instruction(inst);
+}
+
+static void transform_FLR(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ int tempreg = rc_find_free_temporary(c);
+ emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0]);
+ emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, tempreg)));
+ rc_remove_instruction(inst);
+}
+
+/**
+ * Definition of LIT (from ARB_fragment_program):
+ *
+ * tmp = VectorLoad(op0);
+ * if (tmp.x < 0) tmp.x = 0;
+ * if (tmp.y < 0) tmp.y = 0;
+ * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
+ * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
+ * result.x = 1.0;
+ * result.y = tmp.x;
+ * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
+ * result.w = 1.0;
+ *
+ * The longest path of computation is the one leading to result.z,
+ * consisting of 5 operations. This implementation of LIT takes
+ * 5 slots, if the subsequent optimization passes are clever enough
+ * to pair instructions correctly.
+ */
+static void transform_LIT(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ unsigned int constant;
+ unsigned int constant_swizzle;
+ unsigned int temp;
+ struct rc_src_register srctemp;
+
+ constant = rc_constants_add_immediate_scalar(&c->Program.Constants, -127.999999, &constant_swizzle);
+
+ if (inst->U.I.DstReg.WriteMask != RC_MASK_XYZW || inst->U.I.DstReg.File != RC_FILE_TEMPORARY) {
+ struct rc_instruction * inst_mov;
+
+ inst_mov = emit1(c, inst,
+ RC_OPCODE_MOV, 0, inst->U.I.DstReg,
+ srcreg(RC_FILE_TEMPORARY, rc_find_free_temporary(c)));
+
+ inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index;
+ inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+ }
+
+ temp = inst->U.I.DstReg.Index;
+ srctemp = srcreg(RC_FILE_TEMPORARY, temp);
+
+ /* tmp.x = max(0.0, Src.x); */
+ /* tmp.y = max(0.0, Src.y); */
+ /* tmp.w = clamp(Src.z, -128+eps, 128-eps); */
+ emit2(c, inst->Prev, RC_OPCODE_MAX, 0,
+ dstregtmpmask(temp, RC_MASK_XYW),
+ inst->U.I.SrcReg[0],
+ swizzle(srcreg(RC_FILE_CONSTANT, constant),
+ RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, constant_swizzle&3));
+ emit2(c, inst->Prev, RC_OPCODE_MIN, 0,
+ dstregtmpmask(temp, RC_MASK_Z),
+ swizzle_wwww(srctemp),
+ negate(srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)));
+
+ /* tmp.w = Pow(tmp.y, tmp.w) */
+ emit1(c, inst->Prev, RC_OPCODE_LG2, 0,
+ dstregtmpmask(temp, RC_MASK_W),
+ swizzle_yyyy(srctemp));
+ emit2(c, inst->Prev, RC_OPCODE_MUL, 0,
+ dstregtmpmask(temp, RC_MASK_W),
+ swizzle_wwww(srctemp),
+ swizzle_zzzz(srctemp));
+ emit1(c, inst->Prev, RC_OPCODE_EX2, 0,
+ dstregtmpmask(temp, RC_MASK_W),
+ swizzle_wwww(srctemp));
+
+ /* tmp.z = (tmp.x > 0) ? tmp.w : 0.0 */
+ emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode,
+ dstregtmpmask(temp, RC_MASK_Z),
+ negate(swizzle_xxxx(srctemp)),
+ swizzle_wwww(srctemp),
+ builtin_zero);
+
+ /* tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 */
+ emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode,
+ dstregtmpmask(temp, RC_MASK_XYW),
+ swizzle(srctemp, RC_SWIZZLE_ONE, RC_SWIZZLE_X, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE));
+
+ rc_remove_instruction(inst);
+}
+
+static void transform_LRP(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ int tempreg = rc_find_free_temporary(c);
+
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0,
+ dstreg(RC_FILE_TEMPORARY, tempreg),
+ inst->U.I.SrcReg[1], negate(inst->U.I.SrcReg[2]));
+ emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode,
+ inst->U.I.DstReg,
+ inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[2]);
+
+ rc_remove_instruction(inst);
+}
+
+static void transform_POW(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ int tempreg = rc_find_free_temporary(c);
+ struct rc_dst_register tempdst = dstreg(RC_FILE_TEMPORARY, tempreg);
+ struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempreg);
+ tempdst.WriteMask = RC_MASK_W;
+ tempsrc.Swizzle = RC_SWIZZLE_WWWW;
+
+ emit1(c, inst->Prev, RC_OPCODE_LG2, 0, tempdst, swizzle_xxxx(inst->U.I.SrcReg[0]));
+ emit2(c, inst->Prev, RC_OPCODE_MUL, 0, tempdst, tempsrc, swizzle_xxxx(inst->U.I.SrcReg[1]));
+ emit1(c, inst->Prev, RC_OPCODE_EX2, inst->U.I.SaturateMode, inst->U.I.DstReg, tempsrc);
+
+ rc_remove_instruction(inst);
+}
+
+static void transform_RSQ(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ inst->U.I.SrcReg[0] = absolute(inst->U.I.SrcReg[0]);
+}
+
+static void transform_SEQ(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ int tempreg = rc_find_free_temporary(c);
+
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+ emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ negate(absolute(srcreg(RC_FILE_TEMPORARY, tempreg))), builtin_zero, builtin_one);
+
+ rc_remove_instruction(inst);
+}
+
+static void transform_SFL(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, builtin_zero);
+ rc_remove_instruction(inst);
+}
+
+static void transform_SGE(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ int tempreg = rc_find_free_temporary(c);
+
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+ emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ srcreg(RC_FILE_TEMPORARY, tempreg), builtin_zero, builtin_one);
+
+ rc_remove_instruction(inst);
+}
+
+static void transform_SGT(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ int tempreg = rc_find_free_temporary(c);
+
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
+ emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ srcreg(RC_FILE_TEMPORARY, tempreg), builtin_one, builtin_zero);
+
+ rc_remove_instruction(inst);
+}
+
+static void transform_SLE(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ int tempreg = rc_find_free_temporary(c);
+
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
+ emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ srcreg(RC_FILE_TEMPORARY, tempreg), builtin_zero, builtin_one);
+
+ rc_remove_instruction(inst);
+}
+
+static void transform_SLT(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ int tempreg = rc_find_free_temporary(c);
+
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+ emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ srcreg(RC_FILE_TEMPORARY, tempreg), builtin_one, builtin_zero);
+
+ rc_remove_instruction(inst);
+}
+
+static void transform_SNE(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ int tempreg = rc_find_free_temporary(c);
+
+ emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dstreg(RC_FILE_TEMPORARY, tempreg), inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
+ emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ negate(absolute(srcreg(RC_FILE_TEMPORARY, tempreg))), builtin_one, builtin_zero);
+
+ rc_remove_instruction(inst);
+}
+
+static void transform_SUB(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ inst->U.I.Opcode = RC_OPCODE_ADD;
+ inst->U.I.SrcReg[1] = negate(inst->U.I.SrcReg[1]);
+}
+
+static void transform_SWZ(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+}
+
+static void transform_XPD(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ int tempreg = rc_find_free_temporary(c);
+
+ emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstreg(RC_FILE_TEMPORARY, tempreg),
+ swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W),
+ swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W));
+ emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W),
+ swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W),
+ negate(srcreg(RC_FILE_TEMPORARY, tempreg)));
+
+ rc_remove_instruction(inst);
+}
+
+
+/**
+ * Can be used as a transformation for @ref radeonClauseLocalTransform,
+ * no userData necessary.
+ *
+ * Eliminates the following ALU instructions:
+ * ABS, CEIL, DPH, DST, FLR, LIT, LRP, POW, SEQ, SFL, SGE, SGT, SLE, SLT, SNE, SUB, SWZ, XPD
+ * using:
+ * MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP
+ *
+ * Transforms RSQ to Radeon's native RSQ by explicitly setting
+ * absolute value.
+ *
+ * @note should be applicable to R300 and R500 fragment programs.
+ */
+int radeonTransformALU(
+ struct radeon_compiler * c,
+ struct rc_instruction* inst,
+ void* unused)
+{
+ switch(inst->U.I.Opcode) {
+ case RC_OPCODE_ABS: transform_ABS(c, inst); return 1;
+ case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1;
+ case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
+ case RC_OPCODE_DST: transform_DST(c, inst); return 1;
+ case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;
+ case RC_OPCODE_LIT: transform_LIT(c, inst); return 1;
+ case RC_OPCODE_LRP: transform_LRP(c, inst); return 1;
+ case RC_OPCODE_POW: transform_POW(c, inst); return 1;
+ case RC_OPCODE_RSQ: transform_RSQ(c, inst); return 1;
+ case RC_OPCODE_SEQ: transform_SEQ(c, inst); return 1;
+ case RC_OPCODE_SFL: transform_SFL(c, inst); return 1;
+ case RC_OPCODE_SGE: transform_SGE(c, inst); return 1;
+ case RC_OPCODE_SGT: transform_SGT(c, inst); return 1;
+ case RC_OPCODE_SLE: transform_SLE(c, inst); return 1;
+ case RC_OPCODE_SLT: transform_SLT(c, inst); return 1;
+ case RC_OPCODE_SNE: transform_SNE(c, inst); return 1;
+ case RC_OPCODE_SUB: transform_SUB(c, inst); return 1;
+ case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1;
+ case RC_OPCODE_XPD: transform_XPD(c, inst); return 1;
+ default:
+ return 0;
+ }
+}
+
+
+static void transform_r300_vertex_ABS(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ /* Note: r500 can take absolute values, but r300 cannot. */
+ inst->U.I.Opcode = RC_OPCODE_MAX;
+ inst->U.I.SrcReg[1] = inst->U.I.SrcReg[0];
+ inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
+}
+
+static void transform_r300_vertex_CMP(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ /* There is no decent CMP available, so let's rig one up.
+ * CMP is defined as dst = src0 < 0.0 ? src1 : src2
+ * The following sequence consumes two temps and two extra slots
+ * (the second temp and the second slot is consumed by transform_LRP),
+ * but should be equivalent:
+ *
+ * SLT tmp0, src0, 0.0
+ * LRP dst, tmp0, src1, src2
+ *
+ * Yes, I know, I'm a mad scientist. ~ C. & M. */
+ int tempreg0 = rc_find_free_temporary(c);
+
+ /* SLT tmp0, src0, 0.0 */
+ emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
+ dstreg(RC_FILE_TEMPORARY, tempreg0),
+ inst->U.I.SrcReg[0], builtin_zero);
+
+ /* LRP dst, tmp0, src1, src2 */
+ transform_LRP(c,
+ emit3(c, inst->Prev, RC_OPCODE_LRP, 0,
+ inst->U.I.DstReg,
+ srcreg(RC_FILE_TEMPORARY, tempreg0), inst->U.I.SrcReg[1], inst->U.I.SrcReg[2]));
+
+ rc_remove_instruction(inst);
+}
+
+static void transform_r300_vertex_fix_LIT(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ int tempreg = rc_find_free_temporary(c);
+ unsigned constant_swizzle;
+ int constant = rc_constants_add_immediate_scalar(&c->Program.Constants,
+ 0.0000000000000000001,
+ &constant_swizzle);
+
+ /* MOV dst, src */
+ emit1(c, inst->Prev, RC_OPCODE_MOV, 0,
+ dstreg(RC_FILE_TEMPORARY, tempreg),
+ inst->U.I.SrcReg[0]);
+
+ /* MAX dst.z, src, 0.00...001 */
+ emit2(c, inst->Prev, RC_OPCODE_MAX, 0,
+ dstregtmpmask(tempreg, RC_MASK_Y),
+ srcreg(RC_FILE_TEMPORARY, tempreg),
+ srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle));
+
+ inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, tempreg);
+}
+
+static void transform_r300_vertex_SEQ(struct radeon_compiler *c,
+ struct rc_instruction *inst)
+{
+ /* x = y <==> x >= y && y >= x */
+ int tmp = rc_find_free_temporary(c);
+
+ /* x <= y */
+ emit2(c, inst->Prev, RC_OPCODE_SGE, 0,
+ dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask),
+ inst->U.I.SrcReg[0],
+ inst->U.I.SrcReg[1]);
+
+ /* y <= x */
+ emit2(c, inst->Prev, RC_OPCODE_SGE, 0,
+ inst->U.I.DstReg,
+ inst->U.I.SrcReg[1],
+ inst->U.I.SrcReg[0]);
+
+ /* x && y = x * y */
+ emit2(c, inst->Prev, RC_OPCODE_MUL, 0,
+ inst->U.I.DstReg,
+ srcreg(RC_FILE_TEMPORARY, tmp),
+ srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index));
+
+ rc_remove_instruction(inst);
+}
+
+static void transform_r300_vertex_SNE(struct radeon_compiler *c,
+ struct rc_instruction *inst)
+{
+ /* x != y <==> x < y || y < x */
+ int tmp = rc_find_free_temporary(c);
+
+ /* x < y */
+ emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
+ dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask),
+ inst->U.I.SrcReg[0],
+ inst->U.I.SrcReg[1]);
+
+ /* y < x */
+ emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
+ inst->U.I.DstReg,
+ inst->U.I.SrcReg[1],
+ inst->U.I.SrcReg[0]);
+
+ /* x || y = max(x, y) */
+ emit2(c, inst->Prev, RC_OPCODE_MAX, 0,
+ inst->U.I.DstReg,
+ srcreg(RC_FILE_TEMPORARY, tmp),
+ srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index));
+
+ rc_remove_instruction(inst);
+}
+
+static void transform_r300_vertex_SGT(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ /* x > y <==> -x < -y */
+ inst->U.I.Opcode = RC_OPCODE_SLT;
+ inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
+ inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
+}
+
+static void transform_r300_vertex_SLE(struct radeon_compiler* c,
+ struct rc_instruction* inst)
+{
+ /* x <= y <==> -x >= -y */
+ inst->U.I.Opcode = RC_OPCODE_SGE;
+ inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
+ inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
+}
+
+/**
+ * For use with radeonLocalTransform, this transforms non-native ALU
+ * instructions of the r300 up to r500 vertex engine.
+ */
+int r300_transform_vertex_alu(
+ struct radeon_compiler * c,
+ struct rc_instruction* inst,
+ void* unused)
+{
+ switch(inst->U.I.Opcode) {
+ case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1;
+ case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1;
+ case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1;
+ case RC_OPCODE_DP3: transform_DP3(c, inst); return 1;
+ case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
+ case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;
+ case RC_OPCODE_LIT: transform_r300_vertex_fix_LIT(c, inst); return 1;
+ case RC_OPCODE_LRP: transform_LRP(c, inst); return 1;
+ case RC_OPCODE_SEQ:
+ if (!c->is_r500) {
+ transform_r300_vertex_SEQ(c, inst);
+ return 1;
+ }
+ return 0;
+ case RC_OPCODE_SFL: transform_SFL(c, inst); return 1;
+ case RC_OPCODE_SGT: transform_r300_vertex_SGT(c, inst); return 1;
+ case RC_OPCODE_SLE: transform_r300_vertex_SLE(c, inst); return 1;
+ case RC_OPCODE_SNE:
+ if (!c->is_r500) {
+ transform_r300_vertex_SNE(c, inst);
+ return 1;
+ }
+ return 0;
+ case RC_OPCODE_SUB: transform_SUB(c, inst); return 1;
+ case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1;
+ case RC_OPCODE_XPD: transform_XPD(c, inst); return 1;
+ default:
+ return 0;
+ }
+}
+
+static void sincos_constants(struct radeon_compiler* c, unsigned int *constants)
+{
+ static const float SinCosConsts[2][4] = {
+ {
+ 1.273239545, /* 4/PI */
+ -0.405284735, /* -4/(PI*PI) */
+ 3.141592654, /* PI */
+ 0.2225 /* weight */
+ },
+ {
+ 0.75,
+ 0.5,
+ 0.159154943, /* 1/(2*PI) */
+ 6.283185307 /* 2*PI */
+ }
+ };
+ int i;
+
+ for(i = 0; i < 2; ++i)
+ constants[i] = rc_constants_add_immediate_vec4(&c->Program.Constants, SinCosConsts[i]);
+}
+
+/**
+ * Approximate sin(x), where x is clamped to (-pi/2, pi/2).
+ *
+ * MUL tmp.xy, src, { 4/PI, -4/(PI^2) }
+ * MAD tmp.x, tmp.y, |src|, tmp.x
+ * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x
+ * MAD dest, tmp.y, weight, tmp.x
+ */
+static void sin_approx(
+ struct radeon_compiler* c, struct rc_instruction * inst,
+ struct rc_dst_register dst, struct rc_src_register src, const unsigned int* constants)
+{
+ unsigned int tempreg = rc_find_free_temporary(c);
+
+ emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(tempreg, RC_MASK_XY),
+ swizzle_xxxx(src),
+ srcreg(RC_FILE_CONSTANT, constants[0]));
+ emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_X),
+ swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)),
+ absolute(swizzle_xxxx(src)),
+ swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)));
+ emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_Y),
+ swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)),
+ absolute(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))),
+ negate(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))));
+ emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dst,
+ swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)),
+ swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[0])),
+ swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)));
+}
+
+/**
+ * Translate the trigonometric functions COS, SIN, and SCS
+ * using only the basic instructions
+ * MOV, ADD, MUL, MAD, FRC
+ */
+int radeonTransformTrigSimple(struct radeon_compiler* c,
+ struct rc_instruction* inst,
+ void* unused)
+{
+ if (inst->U.I.Opcode != RC_OPCODE_COS &&
+ inst->U.I.Opcode != RC_OPCODE_SIN &&
+ inst->U.I.Opcode != RC_OPCODE_SCS)
+ return 0;
+
+ unsigned int constants[2];
+ unsigned int tempreg = rc_find_free_temporary(c);
+
+ sincos_constants(c, constants);
+
+ if (inst->U.I.Opcode == RC_OPCODE_COS) {
+ /* MAD tmp.x, src, 1/(2*PI), 0.75 */
+ /* FRC tmp.x, tmp.x */
+ /* MAD tmp.z, tmp.x, 2*PI, -PI */
+ emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
+ swizzle_xxxx(inst->U.I.SrcReg[0]),
+ swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])),
+ swizzle_xxxx(srcreg(RC_FILE_CONSTANT, constants[1])));
+ emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W),
+ swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)));
+ emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
+ swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
+ swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])),
+ negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0]))));
+
+ sin_approx(c, inst, inst->U.I.DstReg,
+ swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
+ constants);
+ } else if (inst->U.I.Opcode == RC_OPCODE_SIN) {
+ emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
+ swizzle_xxxx(inst->U.I.SrcReg[0]),
+ swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])),
+ swizzle_yyyy(srcreg(RC_FILE_CONSTANT, constants[1])));
+ emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W),
+ swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)));
+ emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
+ swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
+ swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])),
+ negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0]))));
+
+ sin_approx(c, inst, inst->U.I.DstReg,
+ swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
+ constants);
+ } else {
+ emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY),
+ swizzle_xxxx(inst->U.I.SrcReg[0]),
+ swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])),
+ swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W));
+ emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_XY),
+ srcreg(RC_FILE_TEMPORARY, tempreg));
+ emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY),
+ srcreg(RC_FILE_TEMPORARY, tempreg),
+ swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])),
+ negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0]))));
+
+ struct rc_dst_register dst = inst->U.I.DstReg;
+
+ dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_X;
+ sin_approx(c, inst, dst,
+ swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)),
+ constants);
+
+ dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_Y;
+ sin_approx(c, inst, dst,
+ swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)),
+ constants);
+ }
+
+ rc_remove_instruction(inst);
+
+ return 1;
+}
+
+static void r300_transform_SIN_COS_SCS(struct radeon_compiler *c,
+ struct rc_instruction *inst,
+ unsigned srctmp)
+{
+ if (inst->U.I.Opcode == RC_OPCODE_COS) {
+ emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, inst->U.I.DstReg,
+ srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
+ } else if (inst->U.I.Opcode == RC_OPCODE_SIN) {
+ emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode,
+ inst->U.I.DstReg, srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
+ } else if (inst->U.I.Opcode == RC_OPCODE_SCS) {
+ struct rc_dst_register moddst = inst->U.I.DstReg;
+
+ if (inst->U.I.DstReg.WriteMask & RC_MASK_X) {
+ moddst.WriteMask = RC_MASK_X;
+ emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, moddst,
+ srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
+ }
+ if (inst->U.I.DstReg.WriteMask & RC_MASK_Y) {
+ moddst.WriteMask = RC_MASK_Y;
+ emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, moddst,
+ srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
+ }
+ }
+
+ rc_remove_instruction(inst);
+}
+
+
+/**
+ * Transform the trigonometric functions COS, SIN, and SCS
+ * to include pre-scaling by 1/(2*PI) and taking the fractional
+ * part, so that the input to COS and SIN is always in the range [0,1).
+ * SCS is replaced by one COS and one SIN instruction.
+ *
+ * @warning This transformation implicitly changes the semantics of SIN and COS!
+ */
+int radeonTransformTrigScale(struct radeon_compiler* c,
+ struct rc_instruction* inst,
+ void* unused)
+{
+ if (inst->U.I.Opcode != RC_OPCODE_COS &&
+ inst->U.I.Opcode != RC_OPCODE_SIN &&
+ inst->U.I.Opcode != RC_OPCODE_SCS)
+ return 0;
+
+ static const float RCP_2PI = 0.15915494309189535;
+ unsigned int temp;
+ unsigned int constant;
+ unsigned int constant_swizzle;
+
+ temp = rc_find_free_temporary(c);
+ constant = rc_constants_add_immediate_scalar(&c->Program.Constants, RCP_2PI, &constant_swizzle);
+
+ emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(temp, RC_MASK_W),
+ swizzle_xxxx(inst->U.I.SrcReg[0]),
+ srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle));
+ emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W),
+ srcreg(RC_FILE_TEMPORARY, temp));
+
+ r300_transform_SIN_COS_SCS(c, inst, temp);
+ return 1;
+}
+
+/**
+ * Transform the trigonometric functions COS, SIN, and SCS
+ * so that the input to COS and SIN is always in the range [-PI, PI].
+ * SCS is replaced by one COS and one SIN instruction.
+ */
+int r300_transform_trig_scale_vertex(struct radeon_compiler *c,
+ struct rc_instruction *inst,
+ void *unused)
+{
+ if (inst->U.I.Opcode != RC_OPCODE_COS &&
+ inst->U.I.Opcode != RC_OPCODE_SIN &&
+ inst->U.I.Opcode != RC_OPCODE_SCS)
+ return 0;
+
+ /* Repeat x in the range [-PI, PI]:
+ *
+ * repeat(x) = frac(x / 2PI + 0.5) * 2PI - PI
+ */
+
+ static const float cons[4] = {0.15915494309189535, 0.5, 6.28318530717959, -3.14159265358979};
+ unsigned int temp;
+ unsigned int constant;
+
+ temp = rc_find_free_temporary(c);
+ constant = rc_constants_add_immediate_vec4(&c->Program.Constants, cons);
+
+ emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W),
+ swizzle_xxxx(inst->U.I.SrcReg[0]),
+ srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_XXXX),
+ srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_YYYY));
+ emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W),
+ srcreg(RC_FILE_TEMPORARY, temp));
+ emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W),
+ srcreg(RC_FILE_TEMPORARY, temp),
+ srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_ZZZZ),
+ srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_WWWW));
+
+ r300_transform_SIN_COS_SCS(c, inst, temp);
+ return 1;
+}
+
+/**
+ * Rewrite DDX/DDY instructions to properly work with r5xx shaders.
+ * The r5xx MDH/MDV instruction provides per-quad partial derivatives.
+ * It takes the form A*B+C. A and C are set by setting src0. B should be -1.
+ *
+ * @warning This explicitly changes the form of DDX and DDY!
+ */
+
+int radeonTransformDeriv(struct radeon_compiler* c,
+ struct rc_instruction* inst,
+ void* unused)
+{
+ if (inst->U.I.Opcode != RC_OPCODE_DDX && inst->U.I.Opcode != RC_OPCODE_DDY)
+ return 0;
+
+ inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_1111;
+ inst->U.I.SrcReg[1].Negate = RC_MASK_XYZW;
+
+ return 1;
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h
new file mode 100644
index 0000000000..77d444476f
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __RADEON_PROGRAM_ALU_H_
+#define __RADEON_PROGRAM_ALU_H_
+
+#include "radeon_program.h"
+
+int radeonTransformALU(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
+ void*);
+
+int r300_transform_vertex_alu(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
+ void*);
+
+int radeonTransformTrigSimple(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
+ void*);
+
+int radeonTransformTrigScale(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
+ void*);
+
+int r300_transform_trig_scale_vertex(
+ struct radeon_compiler *c,
+ struct rc_instruction *inst,
+ void*);
+
+int radeonTransformDeriv(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
+ void*);
+
+#endif /* __RADEON_PROGRAM_ALU_H_ */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h
new file mode 100644
index 0000000000..2ddf60b677
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_constants.h
@@ -0,0 +1,150 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_PROGRAM_CONSTANTS_H
+#define RADEON_PROGRAM_CONSTANTS_H
+
+typedef enum {
+ RC_SATURATE_NONE = 0,
+ RC_SATURATE_ZERO_ONE,
+ RC_SATURATE_MINUS_PLUS_ONE
+} rc_saturate_mode;
+
+typedef enum {
+ RC_TEXTURE_2D_ARRAY,
+ RC_TEXTURE_1D_ARRAY,
+ RC_TEXTURE_CUBE,
+ RC_TEXTURE_3D,
+ RC_TEXTURE_RECT,
+ RC_TEXTURE_2D,
+ RC_TEXTURE_1D
+} rc_texture_target;
+
+typedef enum {
+ /**
+ * Used to indicate unused register descriptions and
+ * source register that use a constant swizzle.
+ */
+ RC_FILE_NONE = 0,
+ RC_FILE_TEMPORARY,
+
+ /**
+ * Input register.
+ *
+ * \note The compiler attaches no implicit semantics to input registers.
+ * Fragment/vertex program specific semantics must be defined explicitly
+ * using the appropriate compiler interfaces.
+ */
+ RC_FILE_INPUT,
+
+ /**
+ * Output register.
+ *
+ * \note The compiler attaches no implicit semantics to input registers.
+ * Fragment/vertex program specific semantics must be defined explicitly
+ * using the appropriate compiler interfaces.
+ */
+ RC_FILE_OUTPUT,
+ RC_FILE_ADDRESS,
+
+ /**
+ * Indicates a constant from the \ref rc_constant_list .
+ */
+ RC_FILE_CONSTANT,
+
+ /**
+ * Indicates a special register, see RC_SPECIAL_xxx.
+ */
+ RC_FILE_SPECIAL
+} rc_register_file;
+
+enum {
+ /** R500 fragment program ALU result "register" */
+ RC_SPECIAL_ALU_RESULT = 0,
+
+ /** Must be last */
+ RC_NUM_SPECIAL_REGISTERS
+};
+
+#define RC_REGISTER_INDEX_BITS 10
+#define RC_REGISTER_MAX_INDEX (1 << RC_REGISTER_INDEX_BITS)
+
+typedef enum {
+ RC_SWIZZLE_X = 0,
+ RC_SWIZZLE_Y,
+ RC_SWIZZLE_Z,
+ RC_SWIZZLE_W,
+ RC_SWIZZLE_ZERO,
+ RC_SWIZZLE_ONE,
+ RC_SWIZZLE_HALF,
+ RC_SWIZZLE_UNUSED
+} rc_swizzle;
+
+#define RC_MAKE_SWIZZLE(a,b,c,d) (((a)<<0) | ((b)<<3) | ((c)<<6) | ((d)<<9))
+#define RC_MAKE_SWIZZLE_SMEAR(a) RC_MAKE_SWIZZLE((a),(a),(a),(a))
+#define GET_SWZ(swz, idx) (((swz) >> ((idx)*3)) & 0x7)
+#define GET_BIT(msk, idx) (((msk) >> (idx)) & 0x1)
+#define SET_SWZ(swz, idx, newv) \
+ do { \
+ (swz) = ((swz) & ~(7 << ((idx)*3))) | ((newv) << ((idx)*3)); \
+ } while(0)
+
+#define RC_SWIZZLE_XYZW RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W)
+#define RC_SWIZZLE_XYZ0 RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ZERO)
+#define RC_SWIZZLE_XYZZ RC_MAKE_SWIZZLE(RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_Z)
+#define RC_SWIZZLE_XXXX RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_X)
+#define RC_SWIZZLE_YYYY RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Y)
+#define RC_SWIZZLE_ZZZZ RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_Z)
+#define RC_SWIZZLE_WWWW RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_W)
+#define RC_SWIZZLE_0000 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ZERO)
+#define RC_SWIZZLE_1111 RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_ONE)
+#define RC_SWIZZLE_HHHH RC_MAKE_SWIZZLE_SMEAR(RC_SWIZZLE_HALF)
+
+/**
+ * \name Bitmasks for components of vectors.
+ *
+ * Used for write masks, negation masks, etc.
+ */
+/*@{*/
+#define RC_MASK_NONE 0
+#define RC_MASK_X 1
+#define RC_MASK_Y 2
+#define RC_MASK_Z 4
+#define RC_MASK_W 8
+#define RC_MASK_XY (RC_MASK_X|RC_MASK_Y)
+#define RC_MASK_XYZ (RC_MASK_X|RC_MASK_Y|RC_MASK_Z)
+#define RC_MASK_XYW (RC_MASK_X|RC_MASK_Y|RC_MASK_W)
+#define RC_MASK_XYZW (RC_MASK_X|RC_MASK_Y|RC_MASK_Z|RC_MASK_W)
+/*@}*/
+
+typedef enum {
+ RC_ALURESULT_NONE = 0,
+ RC_ALURESULT_X,
+ RC_ALURESULT_W
+} rc_write_aluresult;
+
+#endif /* RADEON_PROGRAM_CONSTANTS_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
new file mode 100644
index 0000000000..ee839596aa
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.c
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2008-2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program_pair.h"
+
+
+/**
+ * Return the source slot where we installed the given register access,
+ * or -1 if no slot was free anymore.
+ */
+int rc_pair_alloc_source(struct rc_pair_instruction *pair,
+ unsigned int rgb, unsigned int alpha,
+ rc_register_file file, unsigned int index)
+{
+ int candidate = -1;
+ int candidate_quality = -1;
+ int i;
+
+ if ((!rgb && !alpha) || file == RC_FILE_NONE)
+ return 0;
+
+ for(i = 0; i < 3; ++i) {
+ int q = 0;
+ if (rgb) {
+ if (pair->RGB.Src[i].Used) {
+ if (pair->RGB.Src[i].File != file ||
+ pair->RGB.Src[i].Index != index)
+ continue;
+ q++;
+ }
+ }
+ if (alpha) {
+ if (pair->Alpha.Src[i].Used) {
+ if (pair->Alpha.Src[i].File != file ||
+ pair->Alpha.Src[i].Index != index)
+ continue;
+ q++;
+ }
+ }
+ if (q > candidate_quality) {
+ candidate_quality = q;
+ candidate = i;
+ }
+ }
+
+ if (candidate >= 0) {
+ if (rgb) {
+ pair->RGB.Src[candidate].Used = 1;
+ pair->RGB.Src[candidate].File = file;
+ pair->RGB.Src[candidate].Index = index;
+ }
+ if (alpha) {
+ pair->Alpha.Src[candidate].Used = 1;
+ pair->Alpha.Src[candidate].File = file;
+ pair->Alpha.Src[candidate].Index = index;
+ }
+ }
+
+ return candidate;
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
new file mode 100644
index 0000000000..511cc707a3
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_pair.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2008 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __RADEON_PROGRAM_PAIR_H_
+#define __RADEON_PROGRAM_PAIR_H_
+
+#include "radeon_code.h"
+#include "radeon_opcodes.h"
+#include "radeon_program_constants.h"
+
+struct r300_fragment_program_compiler;
+
+
+/**
+ * \file
+ * Represents a paired ALU instruction, as found in R300 and R500
+ * fragment programs.
+ *
+ * Note that this representation is taking some liberties as far
+ * as register files are concerned, to allow separate register
+ * allocation.
+ *
+ * Also note that there are some subtleties in that the semantics
+ * of certain opcodes are implicitly changed in this representation;
+ * see \ref rc_pair_translate
+ */
+
+
+struct radeon_pair_instruction_source {
+ unsigned int Used:1;
+ unsigned int File:3;
+ unsigned int Index:RC_REGISTER_INDEX_BITS;
+};
+
+struct radeon_pair_instruction_rgb {
+ unsigned int Opcode:8;
+ unsigned int DestIndex:RC_REGISTER_INDEX_BITS;
+ unsigned int WriteMask:3;
+ unsigned int Target:2;
+ unsigned int OutputWriteMask:3;
+ unsigned int Saturate:1;
+
+ struct radeon_pair_instruction_source Src[3];
+
+ struct {
+ unsigned int Source:2;
+ unsigned int Swizzle:9;
+ unsigned int Abs:1;
+ unsigned int Negate:1;
+ } Arg[3];
+};
+
+struct radeon_pair_instruction_alpha {
+ unsigned int Opcode:8;
+ unsigned int DestIndex:RC_REGISTER_INDEX_BITS;
+ unsigned int WriteMask:1;
+ unsigned int Target:2;
+ unsigned int OutputWriteMask:1;
+ unsigned int DepthWriteMask:1;
+ unsigned int Saturate:1;
+
+ struct radeon_pair_instruction_source Src[3];
+
+ struct {
+ unsigned int Source:2;
+ unsigned int Swizzle:3;
+ unsigned int Abs:1;
+ unsigned int Negate:1;
+ } Arg[3];
+};
+
+struct rc_pair_instruction {
+ struct radeon_pair_instruction_rgb RGB;
+ struct radeon_pair_instruction_alpha Alpha;
+
+ unsigned int WriteALUResult:2;
+ unsigned int ALUResultCompare:3;
+};
+
+
+/**
+ * General helper functions for dealing with the paired instruction format.
+ */
+/*@{*/
+int rc_pair_alloc_source(struct rc_pair_instruction *pair,
+ unsigned int rgb, unsigned int alpha,
+ rc_register_file file, unsigned int index);
+/*@}*/
+
+
+/**
+ * Compiler passes that operate with the paired format.
+ */
+/*@{*/
+struct radeon_pair_handler;
+
+void rc_pair_translate(struct r300_fragment_program_compiler *c);
+void rc_pair_schedule(struct r300_fragment_program_compiler *c);
+void rc_pair_regalloc(struct r300_fragment_program_compiler *c, unsigned maxtemps);
+/*@}*/
+
+#endif /* __RADEON_PROGRAM_PAIR_H_ */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c
new file mode 100644
index 0000000000..28fb9eae92
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_print.c
@@ -0,0 +1,300 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "radeon_program.h"
+
+#include <stdio.h>
+
+static const char * textarget_to_string(rc_texture_target target)
+{
+ switch(target) {
+ case RC_TEXTURE_2D_ARRAY: return "2D_ARRAY";
+ case RC_TEXTURE_1D_ARRAY: return "1D_ARRAY";
+ case RC_TEXTURE_CUBE: return "CUBE";
+ case RC_TEXTURE_3D: return "3D";
+ case RC_TEXTURE_RECT: return "RECT";
+ case RC_TEXTURE_2D: return "2D";
+ case RC_TEXTURE_1D: return "1D";
+ default: return "BAD_TEXTURE_TARGET";
+ }
+}
+
+static void rc_print_comparefunc(FILE * f, const char * lhs, rc_compare_func func, const char * rhs)
+{
+ if (func == RC_COMPARE_FUNC_NEVER) {
+ fprintf(f, "false");
+ } else if (func == RC_COMPARE_FUNC_ALWAYS) {
+ fprintf(f, "true");
+ } else {
+ const char * op;
+ switch(func) {
+ case RC_COMPARE_FUNC_LESS: op = "<"; break;
+ case RC_COMPARE_FUNC_EQUAL: op = "=="; break;
+ case RC_COMPARE_FUNC_LEQUAL: op = "<="; break;
+ case RC_COMPARE_FUNC_GREATER: op = ">"; break;
+ case RC_COMPARE_FUNC_NOTEQUAL: op = "!="; break;
+ case RC_COMPARE_FUNC_GEQUAL: op = ">="; break;
+ default: op = "???"; break;
+ }
+ fprintf(f, "%s %s %s", lhs, op, rhs);
+ }
+}
+
+static void rc_print_register(FILE * f, rc_register_file file, int index, unsigned int reladdr)
+{
+ if (file == RC_FILE_NONE) {
+ fprintf(f, "none");
+ } else if (file == RC_FILE_SPECIAL) {
+ switch(index) {
+ case RC_SPECIAL_ALU_RESULT: fprintf(f, "aluresult"); break;
+ default: fprintf(f, "special[%i]", index); break;
+ }
+ } else {
+ const char * filename;
+ switch(file) {
+ case RC_FILE_TEMPORARY: filename = "temp"; break;
+ case RC_FILE_INPUT: filename = "input"; break;
+ case RC_FILE_OUTPUT: filename = "output"; break;
+ case RC_FILE_ADDRESS: filename = "addr"; break;
+ case RC_FILE_CONSTANT: filename = "const"; break;
+ default: filename = "BAD FILE"; break;
+ }
+ fprintf(f, "%s[%i%s]", filename, index, reladdr ? " + addr[0]" : "");
+ }
+}
+
+static void rc_print_mask(FILE * f, unsigned int mask)
+{
+ if (mask & RC_MASK_X) fprintf(f, "x");
+ if (mask & RC_MASK_Y) fprintf(f, "y");
+ if (mask & RC_MASK_Z) fprintf(f, "z");
+ if (mask & RC_MASK_W) fprintf(f, "w");
+}
+
+static void rc_print_dst_register(FILE * f, struct rc_dst_register dst)
+{
+ rc_print_register(f, dst.File, dst.Index, dst.RelAddr);
+ if (dst.WriteMask != RC_MASK_XYZW) {
+ fprintf(f, ".");
+ rc_print_mask(f, dst.WriteMask);
+ }
+}
+
+static char rc_swizzle_char(unsigned int swz)
+{
+ switch(swz) {
+ case RC_SWIZZLE_X: return 'x';
+ case RC_SWIZZLE_Y: return 'y';
+ case RC_SWIZZLE_Z: return 'z';
+ case RC_SWIZZLE_W: return 'w';
+ case RC_SWIZZLE_ZERO: return '0';
+ case RC_SWIZZLE_ONE: return '1';
+ case RC_SWIZZLE_HALF: return 'H';
+ case RC_SWIZZLE_UNUSED: return '_';
+ }
+ return '?';
+}
+
+static void rc_print_swizzle(FILE * f, unsigned int swizzle, unsigned int negate)
+{
+ unsigned int comp;
+ for(comp = 0; comp < 4; ++comp) {
+ rc_swizzle swz = GET_SWZ(swizzle, comp);
+ if (GET_BIT(negate, comp))
+ fprintf(f, "-");
+ fprintf(f, "%c", rc_swizzle_char(swz));
+ }
+}
+
+static void rc_print_src_register(FILE * f, struct rc_src_register src)
+{
+ int trivial_negate = (src.Negate == RC_MASK_NONE || src.Negate == RC_MASK_XYZW);
+
+ if (src.Negate == RC_MASK_XYZW)
+ fprintf(f, "-");
+ if (src.Abs)
+ fprintf(f, "|");
+
+ rc_print_register(f, src.File, src.Index, src.RelAddr);
+
+ if (src.Abs && !trivial_negate)
+ fprintf(f, "|");
+
+ if (src.Swizzle != RC_SWIZZLE_XYZW || !trivial_negate) {
+ fprintf(f, ".");
+ rc_print_swizzle(f, src.Swizzle, trivial_negate ? 0 : src.Negate);
+ }
+
+ if (src.Abs && trivial_negate)
+ fprintf(f, "|");
+}
+
+static void rc_print_normal_instruction(FILE * f, struct rc_instruction * inst)
+{
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
+ unsigned int reg;
+
+ fprintf(f, "%s", opcode->Name);
+
+ switch(inst->U.I.SaturateMode) {
+ case RC_SATURATE_NONE: break;
+ case RC_SATURATE_ZERO_ONE: fprintf(f, "_SAT"); break;
+ case RC_SATURATE_MINUS_PLUS_ONE: fprintf(f, "_SAT2"); break;
+ default: fprintf(f, "_BAD_SAT"); break;
+ }
+
+ if (opcode->HasDstReg) {
+ fprintf(f, " ");
+ rc_print_dst_register(f, inst->U.I.DstReg);
+ if (opcode->NumSrcRegs)
+ fprintf(f, ",");
+ }
+
+ for(reg = 0; reg < opcode->NumSrcRegs; ++reg) {
+ if (reg > 0)
+ fprintf(f, ",");
+ fprintf(f, " ");
+ rc_print_src_register(f, inst->U.I.SrcReg[reg]);
+ }
+
+ if (opcode->HasTexture) {
+ fprintf(f, ", %s%s[%u]",
+ textarget_to_string(inst->U.I.TexSrcTarget),
+ inst->U.I.TexShadow ? "SHADOW" : "",
+ inst->U.I.TexSrcUnit);
+ }
+
+ fprintf(f, ";");
+
+ if (inst->U.I.WriteALUResult) {
+ fprintf(f, " [aluresult = (");
+ rc_print_comparefunc(f,
+ (inst->U.I.WriteALUResult == RC_ALURESULT_X) ? "x" : "w",
+ inst->U.I.ALUResultCompare, "0");
+ fprintf(f, ")]");
+ }
+
+ fprintf(f, "\n");
+}
+
+static void rc_print_pair_instruction(FILE * f, struct rc_instruction * fullinst)
+{
+ struct rc_pair_instruction * inst = &fullinst->U.P;
+ int printedsrc = 0;
+
+ for(unsigned int src = 0; src < 3; ++src) {
+ if (inst->RGB.Src[src].Used) {
+ if (printedsrc)
+ fprintf(f, ", ");
+ fprintf(f, "src%i.xyz = ", src);
+ rc_print_register(f, inst->RGB.Src[src].File, inst->RGB.Src[src].Index, 0);
+ printedsrc = 1;
+ }
+ if (inst->Alpha.Src[src].Used) {
+ if (printedsrc)
+ fprintf(f, ", ");
+ fprintf(f, "src%i.w = ", src);
+ rc_print_register(f, inst->Alpha.Src[src].File, inst->Alpha.Src[src].Index, 0);
+ printedsrc = 1;
+ }
+ }
+ fprintf(f, "\n");
+
+ if (inst->RGB.Opcode != RC_OPCODE_NOP) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->RGB.Opcode);
+
+ fprintf(f, " %s%s", opcode->Name, inst->RGB.Saturate ? "_SAT" : "");
+ if (inst->RGB.WriteMask)
+ fprintf(f, " temp[%i].%s%s%s", inst->RGB.DestIndex,
+ (inst->RGB.WriteMask & 1) ? "x" : "",
+ (inst->RGB.WriteMask & 2) ? "y" : "",
+ (inst->RGB.WriteMask & 4) ? "z" : "");
+ if (inst->RGB.OutputWriteMask)
+ fprintf(f, " color[%i].%s%s%s", inst->RGB.Target,
+ (inst->RGB.OutputWriteMask & 1) ? "x" : "",
+ (inst->RGB.OutputWriteMask & 2) ? "y" : "",
+ (inst->RGB.OutputWriteMask & 4) ? "z" : "");
+ if (inst->WriteALUResult == RC_ALURESULT_X)
+ fprintf(f, " aluresult");
+
+ for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
+ const char* abs = inst->RGB.Arg[arg].Abs ? "|" : "";
+ const char* neg = inst->RGB.Arg[arg].Negate ? "-" : "";
+ fprintf(f, ", %s%ssrc%i.%c%c%c%s", neg, abs, inst->RGB.Arg[arg].Source,
+ rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 0)),
+ rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 1)),
+ rc_swizzle_char(GET_SWZ(inst->RGB.Arg[arg].Swizzle, 2)),
+ abs);
+ }
+ fprintf(f, "\n");
+ }
+
+ if (inst->Alpha.Opcode != RC_OPCODE_NOP) {
+ const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Alpha.Opcode);
+
+ fprintf(f, " %s%s", opcode->Name, inst->Alpha.Saturate ? "_SAT" : "");
+ if (inst->Alpha.WriteMask)
+ fprintf(f, " temp[%i].w", inst->Alpha.DestIndex);
+ if (inst->Alpha.OutputWriteMask)
+ fprintf(f, " color[%i].w", inst->Alpha.Target);
+ if (inst->Alpha.DepthWriteMask)
+ fprintf(f, " depth.w");
+ if (inst->WriteALUResult == RC_ALURESULT_W)
+ fprintf(f, " aluresult");
+
+ for(unsigned int arg = 0; arg < opcode->NumSrcRegs; ++arg) {
+ const char* abs = inst->Alpha.Arg[arg].Abs ? "|" : "";
+ const char* neg = inst->Alpha.Arg[arg].Negate ? "-" : "";
+ fprintf(f, ", %s%ssrc%i.%c%s", neg, abs, inst->Alpha.Arg[arg].Source,
+ rc_swizzle_char(inst->Alpha.Arg[arg].Swizzle), abs);
+ }
+ fprintf(f, "\n");
+ }
+
+ if (inst->WriteALUResult) {
+ fprintf(f, " [aluresult = (");
+ rc_print_comparefunc(f, "result", inst->ALUResultCompare, "0");
+ fprintf(f, ")]\n");
+ }
+}
+
+/**
+ * Print program to stderr, default options.
+ */
+void rc_print_program(const struct rc_program *prog)
+{
+ unsigned int linenum = 0;
+ struct rc_instruction *inst;
+
+ fprintf(stderr, "# Radeon Compiler Program\n");
+
+ for(inst = prog->Instructions.Next; inst != &prog->Instructions; inst = inst->Next) {
+ fprintf(stderr, "%3d: ", linenum);
+
+ if (inst->Type == RC_INSTRUCTION_PAIR)
+ rc_print_pair_instruction(stderr, inst);
+ else
+ rc_print_normal_instruction(stderr, inst);
+
+ linenum++;
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
new file mode 100644
index 0000000000..9c4b65f4c0
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.c
@@ -0,0 +1,389 @@
+/*
+ * Copyright (C) 2010 Corbin Simpson
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_program_tex.h"
+
+/* Series of transformations to be done on textures. */
+
+static struct rc_src_register shadow_ambient(struct r300_fragment_program_compiler *compiler,
+ int tmu)
+{
+ struct rc_src_register reg = { 0, };
+
+ if (compiler->enable_shadow_ambient) {
+ reg.File = RC_FILE_CONSTANT;
+ reg.Index = rc_constants_add_state(&compiler->Base.Program.Constants,
+ RC_STATE_SHADOW_AMBIENT, tmu);
+ reg.Swizzle = RC_SWIZZLE_WWWW;
+ } else {
+ reg.File = RC_FILE_NONE;
+ reg.Swizzle = RC_SWIZZLE_0000;
+ }
+ return reg;
+}
+
+static void lower_texture_rect(struct r300_fragment_program_compiler *compiler,
+ struct rc_instruction *inst)
+{
+ struct rc_instruction *inst_rect;
+ unsigned temp = rc_find_free_temporary(&compiler->Base);
+
+ if (inst->U.I.TexSrcTarget == RC_TEXTURE_RECT ||
+ compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords) {
+ inst_rect = rc_insert_new_instruction(&compiler->Base, inst->Prev);
+
+ inst_rect->U.I.Opcode = RC_OPCODE_MUL;
+ inst_rect->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_rect->U.I.DstReg.Index = temp;
+ inst_rect->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+ inst_rect->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
+ inst_rect->U.I.SrcReg[1].Index =
+ rc_constants_add_state(&compiler->Base.Program.Constants,
+ RC_STATE_R300_TEXRECT_FACTOR, inst->U.I.TexSrcUnit);
+
+ reset_srcreg(&inst->U.I.SrcReg[0]);
+ inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[0].Index = temp;
+
+ inst->U.I.TexSrcTarget = RC_TEXTURE_2D;
+ }
+}
+
+/**
+ * Transform TEX, TXP, TXB, and KIL instructions in the following ways:
+ * - implement texture compare (shadow extensions)
+ * - extract non-native source / destination operands
+ * - premultiply texture coordinates for RECT
+ * - extract operand swizzles
+ * - introduce a temporary register when write masks are needed
+ */
+int radeonTransformTEX(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
+ void* data)
+{
+ struct r300_fragment_program_compiler *compiler =
+ (struct r300_fragment_program_compiler*)data;
+
+ if (inst->U.I.Opcode != RC_OPCODE_TEX &&
+ inst->U.I.Opcode != RC_OPCODE_TXB &&
+ inst->U.I.Opcode != RC_OPCODE_TXP &&
+ inst->U.I.Opcode != RC_OPCODE_KIL)
+ return 0;
+
+ /* ARB_shadow & EXT_shadow_funcs */
+ if (inst->U.I.Opcode != RC_OPCODE_KIL &&
+ ((c->Program.ShadowSamplers & (1 << inst->U.I.TexSrcUnit)) ||
+ (compiler->state.unit[inst->U.I.TexSrcUnit].compare_mode_enabled))) {
+ rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;
+
+ if (comparefunc == RC_COMPARE_FUNC_NEVER || comparefunc == RC_COMPARE_FUNC_ALWAYS) {
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+
+ if (comparefunc == RC_COMPARE_FUNC_ALWAYS) {
+ inst->U.I.SrcReg[0].File = RC_FILE_NONE;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
+ } else {
+ inst->U.I.SrcReg[0] = shadow_ambient(compiler, inst->U.I.TexSrcUnit);
+ }
+
+ return 1;
+ } else {
+ rc_compare_func comparefunc = compiler->state.unit[inst->U.I.TexSrcUnit].texture_compare_func;
+ struct rc_instruction * inst_rcp = NULL;
+ struct rc_instruction * inst_mad;
+ struct rc_instruction * inst_cmp;
+ unsigned tmp_texsample = rc_find_free_temporary(c);
+ unsigned tmp_sum = rc_find_free_temporary(c);
+ unsigned tmp_recip_w = 0;
+ int pass, fail, tex;
+
+ /* Save the output register. */
+ struct rc_dst_register output_reg = inst->U.I.DstReg;
+
+ /* Redirect TEX to a new temp. */
+ inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst->U.I.DstReg.Index = tmp_texsample;
+ inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+
+ if (inst->U.I.Opcode == RC_OPCODE_TXP) {
+ tmp_recip_w = rc_find_free_temporary(c);
+
+ /* Compute 1/W. */
+ inst_rcp = rc_insert_new_instruction(c, inst);
+ inst_rcp->U.I.Opcode = RC_OPCODE_RCP;
+ inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_rcp->U.I.DstReg.Index = tmp_recip_w;
+ inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W;
+ inst_rcp->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+ inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW;
+ }
+
+ /* Perspective-divide r by W (if it's TXP) and add the texture sample (see below). */
+ inst_mad = rc_insert_new_instruction(c, inst_rcp ? inst_rcp : inst);
+ inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mad->U.I.DstReg.Index = tmp_sum;
+ inst_mad->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+ inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_ZZZZ;
+ if (inst->U.I.Opcode == RC_OPCODE_TXP) {
+ inst_mad->U.I.Opcode = RC_OPCODE_MAD;
+ inst_mad->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+ inst_mad->U.I.SrcReg[1].Index = tmp_recip_w;
+ inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW;
+ tex = 2;
+ } else {
+ inst_mad->U.I.Opcode = RC_OPCODE_ADD;
+ tex = 1;
+ }
+ inst_mad->U.I.SrcReg[tex].File = RC_FILE_TEMPORARY;
+ inst_mad->U.I.SrcReg[tex].Index = tmp_texsample;
+ inst_mad->U.I.SrcReg[tex].Swizzle = compiler->state.unit[inst->U.I.TexSrcUnit].depth_texture_swizzle;
+
+ /* Fake EQUAL/NOTEQUAL, it seems to pass some tests suprisingly. */
+ if (comparefunc == RC_COMPARE_FUNC_EQUAL) {
+ comparefunc = RC_COMPARE_FUNC_GEQUAL;
+ } else if (comparefunc == RC_COMPARE_FUNC_NOTEQUAL) {
+ comparefunc = RC_COMPARE_FUNC_LESS;
+ }
+
+ /* Recall that SrcReg[0] is r, SrcReg[tex] is tex and:
+ * LESS: r < tex <=> -tex+r < 0
+ * GEQUAL: r >= tex <=> not (-tex+r < 0)
+ * GREATER: r > tex <=> tex-r < 0
+ * LEQUAL: r <= tex <=> not ( tex-r < 0)
+ *
+ * This negates either r or tex: */
+ if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GEQUAL)
+ inst_mad->U.I.SrcReg[tex].Negate = inst_mad->U.I.SrcReg[tex].Negate ^ RC_MASK_XYZW;
+ else
+ inst_mad->U.I.SrcReg[0].Negate = inst_mad->U.I.SrcReg[0].Negate ^ RC_MASK_XYZW;
+
+ /* This negates the whole expresion: */
+ if (comparefunc == RC_COMPARE_FUNC_LESS || comparefunc == RC_COMPARE_FUNC_GREATER) {
+ pass = 1;
+ fail = 2;
+ } else {
+ pass = 2;
+ fail = 1;
+ }
+
+ inst_cmp = rc_insert_new_instruction(c, inst_mad);
+ inst_cmp->U.I.Opcode = RC_OPCODE_CMP;
+ inst_cmp->U.I.DstReg = output_reg;
+ inst_cmp->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst_cmp->U.I.SrcReg[0].Index = tmp_sum;
+ inst_cmp->U.I.SrcReg[pass].File = RC_FILE_NONE;
+ inst_cmp->U.I.SrcReg[pass].Swizzle = RC_SWIZZLE_1111;
+ inst_cmp->U.I.SrcReg[fail] = shadow_ambient(compiler, inst->U.I.TexSrcUnit);
+ }
+ }
+
+ /* Texture wrap modes don't work on NPOT textures or texrects.
+ *
+ * The game plan is simple. We have two flags, fake_npot and
+ * non_normalized_coords, as well as a tex target. The RECT tex target
+ * will make the emitted code use non-scaled texcoords.
+ *
+ * Non-wrapped/clamped texcoords with NPOT are free in HW. Repeat and
+ * mirroring are not. If we need to repeat, we do:
+ *
+ * MUL temp, texcoord, <scaling factor constant>
+ * FRC temp, temp ; Discard integer portion of coords
+ *
+ * This gives us coords in [0, 1].
+ *
+ * Mirroring is trickier. We're going to start out like repeat:
+ *
+ * MUL temp, texcoord, <scaling factor constant> ; De-mirror across axes
+ * MUL temp, temp, 0.5 ; Pattern repeats in [0, 2]
+ * ; so scale to [0, 1]
+ * FRC temp, temp ; Make the pattern repeat
+ * MAD temp, temp, 2, -1 ; Move the pattern to [-1, 1]
+ * ADD temp, 1, -abs(temp) ; Now comes a neat trick: use abs to mirror the pattern.
+ * ; The pattern is backwards, so reverse it (1-x).
+ *
+ * This gives us coords in [0, 1].
+ *
+ * ~ C & M. ;)
+ */
+ if (inst->U.I.Opcode != RC_OPCODE_KIL &&
+ (inst->U.I.TexSrcTarget == RC_TEXTURE_RECT ||
+ compiler->state.unit[inst->U.I.TexSrcUnit].fake_npot ||
+ compiler->state.unit[inst->U.I.TexSrcUnit].non_normalized_coords)) {
+ rc_wrap_mode wrapmode = compiler->state.unit[inst->U.I.TexSrcUnit].wrap_mode;
+
+ /* R300 cannot sample from rectangles. */
+ if (!c->is_r500) {
+ lower_texture_rect(compiler, inst);
+ }
+
+ if (compiler->state.unit[inst->U.I.TexSrcUnit].fake_npot &&
+ wrapmode != RC_WRAP_NONE) {
+ struct rc_instruction *inst_mov;
+ unsigned temp = rc_find_free_temporary(c);
+
+ /* For NPOT fallback, we need normalized coordinates anyway. */
+ if (c->is_r500) {
+ lower_texture_rect(compiler, inst);
+ }
+
+ if (wrapmode == RC_WRAP_REPEAT) {
+ /* Both instructions will be paired up. */
+ struct rc_instruction *inst_frc = rc_insert_new_instruction(c, inst->Prev);
+
+ inst_frc->U.I.Opcode = RC_OPCODE_FRC;
+ inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_frc->U.I.DstReg.Index = temp;
+ inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+ inst_frc->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+ } else if (wrapmode == RC_WRAP_MIRRORED_REPEAT) {
+ /*
+ * Function:
+ * f(v) = 1 - abs(frac(v * 0.5) * 2 - 1)
+ *
+ * Code:
+ * MUL temp, src0, 0.5
+ * FRC temp, temp
+ * MAD temp, temp, 2, -1
+ * ADD temp, 1, -abs(temp)
+ */
+
+ struct rc_instruction *inst_mul, *inst_frc, *inst_mad, *inst_add;
+ unsigned two, two_swizzle;
+
+ inst_mul = rc_insert_new_instruction(c, inst->Prev);
+
+ inst_mul->U.I.Opcode = RC_OPCODE_MUL;
+ inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mul->U.I.DstReg.Index = temp;
+ inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+ inst_mul->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+ inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_HHHH;
+
+ inst_frc = rc_insert_new_instruction(c, inst->Prev);
+
+ inst_frc->U.I.Opcode = RC_OPCODE_FRC;
+ inst_frc->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_frc->U.I.DstReg.Index = temp;
+ inst_frc->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+ inst_frc->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst_frc->U.I.SrcReg[0].Index = temp;
+ inst_frc->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
+
+ two = rc_constants_add_immediate_scalar(&c->Program.Constants, 2, &two_swizzle);
+ inst_mad = rc_insert_new_instruction(c, inst->Prev);
+
+ inst_mad->U.I.Opcode = RC_OPCODE_MAD;
+ inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mad->U.I.DstReg.Index = temp;
+ inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+ inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst_mad->U.I.SrcReg[0].Index = temp;
+ inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0;
+ inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT;
+ inst_mad->U.I.SrcReg[1].Index = two;
+ inst_mad->U.I.SrcReg[1].Swizzle = two_swizzle;
+ inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_1111;
+ inst_mad->U.I.SrcReg[2].Negate = RC_MASK_XYZ;
+
+ inst_add = rc_insert_new_instruction(c, inst->Prev);
+
+ inst_add->U.I.Opcode = RC_OPCODE_ADD;
+ inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_add->U.I.DstReg.Index = temp;
+ inst_add->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+ inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111;
+ inst_add->U.I.SrcReg[1].File = RC_FILE_TEMPORARY;
+ inst_add->U.I.SrcReg[1].Index = temp;
+ inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0;
+ inst_add->U.I.SrcReg[1].Abs = 1;
+ inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZ;
+ } else if (wrapmode == RC_WRAP_MIRRORED_CLAMP) {
+ /*
+ * Mirrored clamp modes are bloody simple, we just use abs
+ * to mirror [0, 1] into [-1, 0]. This works for
+ * all modes i.e. CLAMP, CLAMP_TO_EDGE, and CLAMP_TO_BORDER.
+ */
+ struct rc_instruction *inst_mov;
+
+ inst_mov = rc_insert_new_instruction(c, inst->Prev);
+
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.DstReg.Index = temp;
+ inst_mov->U.I.DstReg.WriteMask = RC_MASK_XYZ;
+ inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+ inst_mov->U.I.SrcReg[0].Abs = 1;
+ }
+
+ /* Preserve W for TXP/TXB. */
+ inst_mov = rc_insert_new_instruction(c, inst->Prev);
+
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.DstReg.Index = temp;
+ inst_mov->U.I.DstReg.WriteMask = RC_MASK_W;
+ inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+
+ reset_srcreg(&inst->U.I.SrcReg[0]);
+ inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[0].Index = temp;
+ }
+ }
+
+ /* Cannot write texture to output registers (all chips) or with masks (non-r500) */
+ if (inst->U.I.Opcode != RC_OPCODE_KIL &&
+ (inst->U.I.DstReg.File != RC_FILE_TEMPORARY ||
+ (!c->is_r500 && inst->U.I.DstReg.WriteMask != RC_MASK_XYZW))) {
+ struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst);
+
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg = inst->U.I.DstReg;
+ inst_mov->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.SrcReg[0].Index = rc_find_free_temporary(c);
+
+ inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index;
+ inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+ }
+
+ /* Cannot read texture coordinate from constants file */
+ if (inst->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && inst->U.I.SrcReg[0].File != RC_FILE_INPUT) {
+ struct rc_instruction * inst_mov = rc_insert_new_instruction(c, inst->Prev);
+
+ inst_mov->U.I.Opcode = RC_OPCODE_MOV;
+ inst_mov->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst_mov->U.I.DstReg.Index = rc_find_free_temporary(c);
+ inst_mov->U.I.SrcReg[0] = inst->U.I.SrcReg[0];
+
+ reset_srcreg(&inst->U.I.SrcReg[0]);
+ inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY;
+ inst->U.I.SrcReg[0].Index = inst_mov->U.I.DstReg.Index;
+ }
+
+ return 1;
+}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.h b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.h
new file mode 100644
index 0000000000..a0105051ac
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_tex.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (C) 2010 Corbin Simpson
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __RADEON_PROGRAM_TEX_H_
+#define __RADEON_PROGRAM_TEX_H_
+
+#include "radeon_compiler.h"
+#include "radeon_program.h"
+
+int radeonTransformTEX(
+ struct radeon_compiler * c,
+ struct rc_instruction * inst,
+ void* data);
+
+#endif /* __RADEON_PROGRAM_TEX_H_ */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_swizzle.h b/src/mesa/drivers/dri/r300/compiler/radeon_swizzle.h
new file mode 100644
index 0000000000..c81d5f7a5e
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_swizzle.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_SWIZZLE_H
+#define RADEON_SWIZZLE_H
+
+#include "radeon_program.h"
+
+struct rc_swizzle_split {
+ unsigned char NumPhases;
+ unsigned char Phase[4];
+};
+
+/**
+ * Describe the swizzling capability of target hardware.
+ */
+struct rc_swizzle_caps {
+ /**
+ * Check whether the given swizzle, absolute and negate combination
+ * can be implemented natively by the hardware for this opcode.
+ *
+ * \return 1 if the swizzle is native for the given opcode
+ */
+ int (*IsNative)(rc_opcode opcode, struct rc_src_register reg);
+
+ /**
+ * Determine how to split access to the masked channels of the
+ * given source register to obtain ALU-native swizzles.
+ */
+ void (*Split)(struct rc_src_register reg, unsigned int mask, struct rc_swizzle_split * split);
+};
+
+#endif /* RADEON_SWIZZLE_H */
diff --git a/src/mesa/drivers/dri/r300/r300_blit.c b/src/mesa/drivers/dri/r300/r300_blit.c
new file mode 100644
index 0000000000..d2c25fb9cd
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_blit.c
@@ -0,0 +1,663 @@
+/*
+ * Copyright (C) 2009 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_common.h"
+#include "r300_context.h"
+
+#include "r300_blit.h"
+#include "r300_cmdbuf.h"
+#include "r300_emit.h"
+#include "r300_tex.h"
+#include "compiler/radeon_compiler.h"
+#include "compiler/radeon_opcodes.h"
+
+static void vp_ins_outs(struct r300_vertex_program_compiler *c)
+{
+ c->code->inputs[VERT_ATTRIB_POS] = 0;
+ c->code->inputs[VERT_ATTRIB_TEX0] = 1;
+ c->code->outputs[VERT_RESULT_HPOS] = 0;
+ c->code->outputs[VERT_RESULT_TEX0] = 1;
+}
+
+static void fp_allocate_hw_inputs(
+ struct r300_fragment_program_compiler * c,
+ void (*allocate)(void * data, unsigned input, unsigned hwreg),
+ void * mydata)
+{
+ allocate(mydata, FRAG_ATTRIB_TEX0, 0);
+}
+
+static void create_vertex_program(struct r300_context *r300)
+{
+ struct r300_vertex_program_compiler compiler;
+ struct rc_instruction *inst;
+
+ rc_init(&compiler.Base);
+
+ inst = rc_insert_new_instruction(&compiler.Base, compiler.Base.Program.Instructions.Prev);
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.DstReg.File = RC_FILE_OUTPUT;
+ inst->U.I.DstReg.Index = VERT_RESULT_HPOS;
+ inst->U.I.DstReg.RelAddr = 0;
+ inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+ inst->U.I.SrcReg[0].Abs = 0;
+ inst->U.I.SrcReg[0].File = RC_FILE_INPUT;
+ inst->U.I.SrcReg[0].Index = VERT_ATTRIB_POS;
+ inst->U.I.SrcReg[0].Negate = 0;
+ inst->U.I.SrcReg[0].RelAddr = 0;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
+
+ inst = rc_insert_new_instruction(&compiler.Base, compiler.Base.Program.Instructions.Prev);
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.DstReg.File = RC_FILE_OUTPUT;
+ inst->U.I.DstReg.Index = VERT_RESULT_TEX0;
+ inst->U.I.DstReg.RelAddr = 0;
+ inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+ inst->U.I.SrcReg[0].Abs = 0;
+ inst->U.I.SrcReg[0].File = RC_FILE_INPUT;
+ inst->U.I.SrcReg[0].Index = VERT_ATTRIB_TEX0;
+ inst->U.I.SrcReg[0].Negate = 0;
+ inst->U.I.SrcReg[0].RelAddr = 0;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
+
+ compiler.Base.Program.InputsRead = (1 << VERT_ATTRIB_POS) | (1 << VERT_ATTRIB_TEX0);
+ compiler.RequiredOutputs = compiler.Base.Program.OutputsWritten = (1 << VERT_RESULT_HPOS) | (1 << VERT_RESULT_TEX0);
+ compiler.SetHwInputOutput = vp_ins_outs;
+ compiler.code = &r300->blit.vp_code;
+
+ r3xx_compile_vertex_program(&compiler);
+}
+
+static void create_fragment_program(struct r300_context *r300)
+{
+ struct r300_fragment_program_compiler compiler;
+ struct rc_instruction *inst;
+
+ memset(&compiler, 0, sizeof(struct r300_fragment_program_compiler));
+ rc_init(&compiler.Base);
+
+ inst = rc_insert_new_instruction(&compiler.Base, compiler.Base.Program.Instructions.Prev);
+ inst->U.I.Opcode = RC_OPCODE_TEX;
+ inst->U.I.TexSrcTarget = RC_TEXTURE_2D;
+ inst->U.I.TexSrcUnit = 0;
+ inst->U.I.DstReg.File = RC_FILE_OUTPUT;
+ inst->U.I.DstReg.Index = FRAG_RESULT_COLOR;
+ inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
+ inst->U.I.SrcReg[0].Abs = 0;
+ inst->U.I.SrcReg[0].File = RC_FILE_INPUT;
+ inst->U.I.SrcReg[0].Index = FRAG_ATTRIB_TEX0;
+ inst->U.I.SrcReg[0].Negate = 0;
+ inst->U.I.SrcReg[0].RelAddr = 0;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW;
+
+ compiler.Base.Program.InputsRead = (1 << FRAG_ATTRIB_TEX0);
+ compiler.OutputColor[0] = FRAG_RESULT_COLOR;
+ compiler.OutputDepth = FRAG_RESULT_DEPTH;
+ compiler.enable_shadow_ambient = GL_TRUE;
+ compiler.Base.is_r500 = (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515);
+ compiler.Base.max_temp_regs = (compiler.Base.is_r500) ? 128 : 32;
+ compiler.code = &r300->blit.fp_code;
+ compiler.AllocateHwInputs = fp_allocate_hw_inputs;
+
+ r3xx_compile_fragment_program(&compiler);
+}
+
+void r300_blit_init(struct r300_context *r300)
+{
+ if (r300->options.hw_tcl_enabled)
+ create_vertex_program(r300);
+ create_fragment_program(r300);
+}
+
+static void r300_emit_tx_setup(struct r300_context *r300,
+ gl_format mesa_format,
+ struct radeon_bo *bo,
+ intptr_t offset,
+ unsigned width,
+ unsigned height,
+ unsigned pitch)
+{
+ int is_r500 = r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515;
+ BATCH_LOCALS(&r300->radeon);
+
+ assert(is_r500 ? width <= 4096 : width <= 2048);
+ assert(is_r500 ? height <= 4096 : height <= 2048);
+ assert(r300TranslateTexFormat(mesa_format) >= 0);
+ assert(offset % 32 == 0);
+
+ BEGIN_BATCH(17);
+ OUT_BATCH_REGVAL(R300_TX_FILTER0_0,
+ (R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_S_SHIFT) |
+ (R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_T_SHIFT) |
+ (R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_R_SHIFT) |
+ R300_TX_MIN_FILTER_MIP_NONE |
+ R300_TX_MIN_FILTER_NEAREST |
+ R300_TX_MAG_FILTER_NEAREST |
+ (0 << 28));
+ OUT_BATCH_REGVAL(R300_TX_FILTER1_0, 0);
+ OUT_BATCH_REGVAL(R300_TX_SIZE_0,
+ (((width - 1) & 0x7ff) << R300_TX_WIDTHMASK_SHIFT) |
+ (((height - 1) & 0x7ff) << R300_TX_HEIGHTMASK_SHIFT) |
+ (0 << R300_TX_DEPTHMASK_SHIFT) |
+ (0 << R300_TX_MAX_MIP_LEVEL_SHIFT) |
+ R300_TX_SIZE_TXPITCH_EN);
+
+ OUT_BATCH_REGVAL(R300_TX_FORMAT_0, r300TranslateTexFormat(mesa_format));
+ OUT_BATCH_REGVAL(R300_TX_FORMAT2_0,
+ (pitch - 1) |
+ (is_r500 && width > 2048 ? R500_TXWIDTH_BIT11 : 0) |
+ (is_r500 && height > 2048 ? R500_TXHEIGHT_BIT11 : 0));
+ OUT_BATCH_REGSEQ(R300_TX_OFFSET_0, 1);
+ OUT_BATCH_RELOC(0, bo, offset, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+
+ OUT_BATCH_REGSEQ(R300_TX_INVALTAGS, 2);
+ OUT_BATCH(0);
+ OUT_BATCH(1);
+
+ END_BATCH();
+}
+
+#define EASY_US_FORMAT(FMT, C0, C1, C2, C3, SIGN) \
+ (FMT | R500_C0_SEL_##C0 | R500_C1_SEL_##C1 | \
+ R500_C2_SEL_##C2 | R500_C3_SEL_##C3 | R500_OUT_SIGN(SIGN))
+
+static uint32_t mesa_format_to_us_format(gl_format mesa_format)
+{
+ switch(mesa_format)
+ {
+ case MESA_FORMAT_RGBA8888: // x
+ return EASY_US_FORMAT(R500_OUT_FMT_C4_8, A, B, G, R, 0);
+ case MESA_FORMAT_RGB565: // x
+ case MESA_FORMAT_ARGB1555: // x
+ case MESA_FORMAT_RGBA8888_REV: // x
+ return EASY_US_FORMAT(R500_OUT_FMT_C4_8, R, G, B, A, 0);
+ case MESA_FORMAT_ARGB8888: // x
+ return EASY_US_FORMAT(R500_OUT_FMT_C4_8, B, G, R, A, 0);
+ case MESA_FORMAT_ARGB8888_REV:
+ return EASY_US_FORMAT(R500_OUT_FMT_C4_8, A, R, G, B, 0);
+ case MESA_FORMAT_XRGB8888:
+ return EASY_US_FORMAT(R500_OUT_FMT_C4_8, A, R, G, B, 0);
+
+ case MESA_FORMAT_RGB332:
+ return EASY_US_FORMAT(R500_OUT_FMT_C_3_3_2, A, R, G, B, 0);
+
+ case MESA_FORMAT_RGBA_FLOAT32:
+ return EASY_US_FORMAT(R500_OUT_FMT_C4_32_FP, R, G, B, A, 0);
+ case MESA_FORMAT_RGBA_FLOAT16:
+ return EASY_US_FORMAT(R500_OUT_FMT_C4_16_FP, R, G, B, A, 0);
+ case MESA_FORMAT_ALPHA_FLOAT32:
+ return EASY_US_FORMAT(R500_OUT_FMT_C_32_FP, A, A, A, A, 0);
+ case MESA_FORMAT_ALPHA_FLOAT16:
+ return EASY_US_FORMAT(R500_OUT_FMT_C_16_FP, A, A, A, A, 0);
+
+ case MESA_FORMAT_SIGNED_RGBA8888:
+ return EASY_US_FORMAT(R500_OUT_FMT_C4_8, R, G, B, A, 0xf);
+ case MESA_FORMAT_SIGNED_RGBA8888_REV:
+ return EASY_US_FORMAT(R500_OUT_FMT_C4_8, A, B, G, R, 0xf);
+ case MESA_FORMAT_SIGNED_RGBA_16:
+ return EASY_US_FORMAT(R500_OUT_FMT_C4_16, R, G, B, A, 0xf);
+
+ default:
+ fprintf(stderr, "Unsupported format %s for US output\n", _mesa_get_format_name(mesa_format));
+ assert(0);
+ return 0;
+ }
+}
+#undef EASY_US_FORMAT
+
+static void r500_emit_fp_setup(struct r300_context *r300,
+ struct r500_fragment_program_code *fp,
+ gl_format dst_format)
+{
+ r500_emit_fp(r300, (uint32_t *)fp->inst, (fp->inst_end + 1) * 6, 0, 0, 0);
+ BATCH_LOCALS(&r300->radeon);
+
+ BEGIN_BATCH(10);
+ OUT_BATCH_REGSEQ(R500_US_CODE_ADDR, 3);
+ OUT_BATCH(R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(fp->inst_end));
+ OUT_BATCH(R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(fp->inst_end));
+ OUT_BATCH(0);
+ OUT_BATCH_REGVAL(R500_US_CONFIG, 0);
+ OUT_BATCH_REGVAL(R500_US_OUT_FMT_0, mesa_format_to_us_format(dst_format));
+ OUT_BATCH_REGVAL(R500_US_PIXSIZE, fp->max_temp_idx);
+ END_BATCH();
+}
+
+static void r500_emit_rs_setup(struct r300_context *r300)
+{
+ BATCH_LOCALS(&r300->radeon);
+
+ BEGIN_BATCH(7);
+ OUT_BATCH_REGSEQ(R300_RS_COUNT, 2);
+ OUT_BATCH((4 << R300_IT_COUNT_SHIFT) | R300_HIRES_EN);
+ OUT_BATCH(0);
+ OUT_BATCH_REGVAL(R500_RS_INST_0,
+ (0 << R500_RS_INST_TEX_ID_SHIFT) |
+ (0 << R500_RS_INST_TEX_ADDR_SHIFT) |
+ R500_RS_INST_TEX_CN_WRITE |
+ R500_RS_INST_COL_CN_NO_WRITE);
+ OUT_BATCH_REGVAL(R500_RS_IP_0,
+ (0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
+ (1 << R500_RS_IP_TEX_PTR_T_SHIFT) |
+ (2 << R500_RS_IP_TEX_PTR_R_SHIFT) |
+ (3 << R500_RS_IP_TEX_PTR_Q_SHIFT));
+ END_BATCH();
+}
+
+static void r300_emit_fp_setup(struct r300_context *r300,
+ struct r300_fragment_program_code *code,
+ gl_format dst_format)
+{
+ unsigned i;
+ BATCH_LOCALS(&r300->radeon);
+
+ BEGIN_BATCH((code->alu.length + 1) * 4 + code->tex.length + 1 + 11);
+
+ OUT_BATCH_REGSEQ(R300_US_ALU_RGB_INST_0, code->alu.length);
+ for (i = 0; i < code->alu.length; i++) {
+ OUT_BATCH(code->alu.inst[i].rgb_inst);
+ }
+ OUT_BATCH_REGSEQ(R300_US_ALU_RGB_ADDR_0, code->alu.length);
+ for (i = 0; i < code->alu.length; i++) {
+ OUT_BATCH(code->alu.inst[i].rgb_addr);
+ }
+ OUT_BATCH_REGSEQ(R300_US_ALU_ALPHA_INST_0, code->alu.length);
+ for (i = 0; i < code->alu.length; i++) {
+ OUT_BATCH(code->alu.inst[i].alpha_inst);
+ }
+ OUT_BATCH_REGSEQ(R300_US_ALU_ALPHA_ADDR_0, code->alu.length);
+ for (i = 0; i < code->alu.length; i++) {
+ OUT_BATCH(code->alu.inst[i].alpha_addr);
+ }
+
+ OUT_BATCH_REGSEQ(R300_US_TEX_INST_0, code->tex.length);
+ OUT_BATCH_TABLE(code->tex.inst, code->tex.length);
+
+ OUT_BATCH_REGSEQ(R300_US_CONFIG, 3);
+ OUT_BATCH(R300_PFS_CNTL_FIRST_NODE_HAS_TEX);
+ OUT_BATCH(code->pixsize);
+ OUT_BATCH(code->code_offset);
+ OUT_BATCH_REGSEQ(R300_US_CODE_ADDR_0, 4);
+ OUT_BATCH_TABLE(code->code_addr, 4);
+ OUT_BATCH_REGVAL(R500_US_OUT_FMT_0, mesa_format_to_us_format(dst_format));
+ END_BATCH();
+}
+
+static void r300_emit_rs_setup(struct r300_context *r300)
+{
+ BATCH_LOCALS(&r300->radeon);
+
+ BEGIN_BATCH(7);
+ OUT_BATCH_REGSEQ(R300_RS_COUNT, 2);
+ OUT_BATCH((4 << R300_IT_COUNT_SHIFT) | R300_HIRES_EN);
+ OUT_BATCH(0);
+ OUT_BATCH_REGVAL(R300_RS_INST_0,
+ R300_RS_INST_TEX_ID(0) |
+ R300_RS_INST_TEX_ADDR(0) |
+ R300_RS_INST_TEX_CN_WRITE);
+ OUT_BATCH_REGVAL(R300_RS_IP_0,
+ R300_RS_TEX_PTR(0) |
+ R300_RS_SEL_S(R300_RS_SEL_C0) |
+ R300_RS_SEL_T(R300_RS_SEL_C1) |
+ R300_RS_SEL_R(R300_RS_SEL_K0) |
+ R300_RS_SEL_Q(R300_RS_SEL_K1));
+ END_BATCH();
+}
+
+static void emit_pvs_setup(struct r300_context *r300,
+ uint32_t *vp_code,
+ unsigned vp_len)
+{
+ BATCH_LOCALS(&r300->radeon);
+
+ r300_emit_vpu(r300, vp_code, vp_len * 4, R300_PVS_CODE_START);
+
+ BEGIN_BATCH(4);
+ OUT_BATCH_REGSEQ(R300_VAP_PVS_CODE_CNTL_0, 3);
+ OUT_BATCH((0 << R300_PVS_FIRST_INST_SHIFT) |
+ ((vp_len - 1) << R300_PVS_XYZW_VALID_INST_SHIFT) |
+ ((vp_len - 1)<< R300_PVS_LAST_INST_SHIFT));
+ OUT_BATCH(0);
+ OUT_BATCH((vp_len - 1) << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
+ END_BATCH();
+}
+
+static void emit_vap_setup(struct r300_context *r300)
+{
+ int tex_offset;
+ BATCH_LOCALS(&r300->radeon);
+
+ if (r300->options.hw_tcl_enabled)
+ tex_offset = 1;
+ else
+ tex_offset = 6;
+
+ BEGIN_BATCH(12);
+ OUT_BATCH_REGSEQ(R300_SE_VTE_CNTL, 2);
+ OUT_BATCH(R300_VTX_XY_FMT | R300_VTX_Z_FMT);
+ OUT_BATCH(4);
+
+ OUT_BATCH_REGVAL(R300_VAP_PSC_SGN_NORM_CNTL, 0xaaaaaaaa);
+ OUT_BATCH_REGVAL(R300_VAP_PROG_STREAM_CNTL_0,
+ ((R300_DATA_TYPE_FLOAT_2 | (0 << R300_DST_VEC_LOC_SHIFT)) << 0) |
+ (((tex_offset << R300_DST_VEC_LOC_SHIFT) | R300_DATA_TYPE_FLOAT_2 | R300_LAST_VEC) << 16));
+ OUT_BATCH_REGVAL(R300_VAP_PROG_STREAM_CNTL_EXT_0,
+ ((((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) |
+ (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) |
+ (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_SHIFT) |
+ (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_SHIFT) |
+ (0xf << R300_WRITE_ENA_SHIFT) ) << 0) |
+ (((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) |
+ (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) |
+ (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_SHIFT) |
+ (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_SHIFT) |
+ (0xf << R300_WRITE_ENA_SHIFT) ) << 16) ) );
+ OUT_BATCH_REGSEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2);
+ OUT_BATCH(R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT);
+ OUT_BATCH(R300_VAP_OUTPUT_VTX_FMT_1__4_COMPONENTS);
+ END_BATCH();
+}
+
+static GLboolean validate_buffers(struct r300_context *r300,
+ struct radeon_bo *src_bo,
+ struct radeon_bo *dst_bo)
+{
+ int ret;
+
+ radeon_cs_space_reset_bos(r300->radeon.cmdbuf.cs);
+
+ ret = radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs,
+ src_bo, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0);
+ if (ret)
+ return GL_FALSE;
+
+ ret = radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs,
+ dst_bo, 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT);
+ if (ret)
+ return GL_FALSE;
+
+ return GL_TRUE;
+}
+
+/**
+ * Calculate texcoords for given image region.
+ * Output values are [minx, maxx, miny, maxy]
+ */
+static void calc_tex_coords(float img_width, float img_height,
+ float x, float y,
+ float reg_width, float reg_height,
+ unsigned flip_y, float *buf)
+{
+ buf[0] = x / img_width;
+ buf[1] = buf[0] + reg_width / img_width;
+ buf[2] = y / img_height;
+ buf[3] = buf[2] + reg_height / img_height;
+ if (flip_y)
+ {
+ buf[2] = 1.0 - buf[2];
+ buf[3] = 1.0 - buf[3];
+ }
+}
+
+static void emit_draw_packet(struct r300_context *r300,
+ unsigned src_width, unsigned src_height,
+ unsigned src_x_offset, unsigned src_y_offset,
+ unsigned dst_x_offset, unsigned dst_y_offset,
+ unsigned reg_width, unsigned reg_height,
+ unsigned flip_y)
+{
+ float texcoords[4];
+
+ calc_tex_coords(src_width, src_height,
+ src_x_offset, src_y_offset,
+ reg_width, reg_height,
+ flip_y, texcoords);
+
+ float verts[] = { dst_x_offset, dst_y_offset,
+ texcoords[0], texcoords[2],
+ dst_x_offset, dst_y_offset + reg_height,
+ texcoords[0], texcoords[3],
+ dst_x_offset + reg_width, dst_y_offset + reg_height,
+ texcoords[1], texcoords[3],
+ dst_x_offset + reg_width, dst_y_offset,
+ texcoords[1], texcoords[2] };
+
+ BATCH_LOCALS(&r300->radeon);
+
+ BEGIN_BATCH(19);
+ OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_IMMD_2, 16);
+ OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED |
+ (4 << 16) | R300_VAP_VF_CNTL__PRIM_QUADS);
+ OUT_BATCH_TABLE(verts, 16);
+ END_BATCH();
+}
+
+static void other_stuff(struct r300_context *r300)
+{
+ BATCH_LOCALS(&r300->radeon);
+
+ BEGIN_BATCH(13);
+ OUT_BATCH_REGVAL(R300_GA_POLY_MODE,
+ R300_GA_POLY_MODE_FRONT_PTYPE_TRI | R300_GA_POLY_MODE_BACK_PTYPE_TRI);
+ OUT_BATCH_REGVAL(R300_SU_CULL_MODE, R300_FRONT_FACE_CCW);
+ OUT_BATCH_REGVAL(R300_FG_FOG_BLEND, 0);
+ OUT_BATCH_REGVAL(R300_FG_ALPHA_FUNC, 0);
+ OUT_BATCH_REGSEQ(R300_RB3D_CBLEND, 2);
+ OUT_BATCH(0x0);
+ OUT_BATCH(0x0);
+ OUT_BATCH_REGVAL(R300_ZB_CNTL, 0);
+ END_BATCH();
+ if (r300->options.hw_tcl_enabled) {
+ BEGIN_BATCH(2);
+ OUT_BATCH_REGVAL(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE);
+ END_BATCH();
+ }
+}
+
+static void emit_cb_setup(struct r300_context *r300,
+ struct radeon_bo *bo,
+ intptr_t offset,
+ gl_format mesa_format,
+ unsigned pitch,
+ unsigned width,
+ unsigned height)
+{
+ BATCH_LOCALS(&r300->radeon);
+
+ unsigned x1, y1, x2, y2;
+ x1 = 0;
+ y1 = 0;
+ x2 = width - 1;
+ y2 = height - 1;
+
+ if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) {
+ x1 += R300_SCISSORS_OFFSET;
+ y1 += R300_SCISSORS_OFFSET;
+ x2 += R300_SCISSORS_OFFSET;
+ y2 += R300_SCISSORS_OFFSET;
+ }
+
+ r300_emit_cb_setup(r300, bo, offset, mesa_format,
+ _mesa_get_format_bytes(mesa_format),
+ _mesa_format_row_stride(mesa_format, pitch));
+
+ BEGIN_BATCH_NO_AUTOSTATE(5);
+ OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2);
+ OUT_BATCH((x1 << R300_SCISSORS_X_SHIFT)|(y1 << R300_SCISSORS_Y_SHIFT));
+ OUT_BATCH((x2 << R300_SCISSORS_X_SHIFT)|(y2 << R300_SCISSORS_Y_SHIFT));
+ OUT_BATCH_REGVAL(R300_RB3D_CCTL, 0);
+ END_BATCH();
+}
+
+unsigned r300_check_blit(gl_format dst_format)
+{
+ switch (dst_format) {
+ case MESA_FORMAT_RGB565:
+ case MESA_FORMAT_ARGB1555:
+ case MESA_FORMAT_RGBA8888:
+ case MESA_FORMAT_RGBA8888_REV:
+ case MESA_FORMAT_ARGB8888:
+ case MESA_FORMAT_ARGB8888_REV:
+ case MESA_FORMAT_XRGB8888:
+ break;
+ default:
+ return 0;
+ }
+
+ if (_mesa_get_format_bits(dst_format, GL_DEPTH_BITS) > 0)
+ return 0;
+
+ return 1;
+}
+
+/**
+ * Copy a region of [@a width x @a height] pixels from source buffer
+ * to destination buffer.
+ * @param[in] r300 r300 context
+ * @param[in] src_bo source radeon buffer object
+ * @param[in] src_offset offset of the source image in the @a src_bo
+ * @param[in] src_mesaformat source image format
+ * @param[in] src_pitch aligned source image width
+ * @param[in] src_width source image width
+ * @param[in] src_height source image height
+ * @param[in] src_x_offset x offset in the source image
+ * @param[in] src_y_offset y offset in the source image
+ * @param[in] dst_bo destination radeon buffer object
+ * @param[in] dst_offset offset of the destination image in the @a dst_bo
+ * @param[in] dst_mesaformat destination image format
+ * @param[in] dst_pitch aligned destination image width
+ * @param[in] dst_width destination image width
+ * @param[in] dst_height destination image height
+ * @param[in] dst_x_offset x offset in the destination image
+ * @param[in] dst_y_offset y offset in the destination image
+ * @param[in] width region width
+ * @param[in] height region height
+ * @param[in] flip_y set if y coords of the source image need to be flipped
+ */
+unsigned r300_blit(GLcontext *ctx,
+ struct radeon_bo *src_bo,
+ intptr_t src_offset,
+ gl_format src_mesaformat,
+ unsigned src_pitch,
+ unsigned src_width,
+ unsigned src_height,
+ unsigned src_x_offset,
+ unsigned src_y_offset,
+ struct radeon_bo *dst_bo,
+ intptr_t dst_offset,
+ gl_format dst_mesaformat,
+ unsigned dst_pitch,
+ unsigned dst_width,
+ unsigned dst_height,
+ unsigned dst_x_offset,
+ unsigned dst_y_offset,
+ unsigned reg_width,
+ unsigned reg_height,
+ unsigned flip_y)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+ if (!r300_check_blit(dst_mesaformat))
+ return 0;
+
+ /* Make sure that colorbuffer has even width - hw limitation */
+ if (dst_pitch % 2 > 0)
+ ++dst_pitch;
+
+ /* Need to clamp the region size to make sure
+ * we don't read outside of the source buffer
+ * or write outside of the destination buffer.
+ */
+ if (reg_width + src_x_offset > src_width)
+ reg_width = src_width - src_x_offset;
+ if (reg_height + src_y_offset > src_height)
+ reg_height = src_height - src_y_offset;
+ if (reg_width + dst_x_offset > dst_width)
+ reg_width = dst_width - dst_x_offset;
+ if (reg_height + dst_y_offset > dst_height)
+ reg_height = dst_height - dst_y_offset;
+
+ if (src_bo == dst_bo) {
+ return 0;
+ }
+
+ if (src_offset % 32 || dst_offset % 32) {
+ return GL_FALSE;
+ }
+
+ if (0) {
+ fprintf(stderr, "src: size [%d x %d], pitch %d, "
+ "offset [%d x %d], format %s, bo %p\n",
+ src_width, src_height, src_pitch,
+ src_x_offset, src_y_offset,
+ _mesa_get_format_name(src_mesaformat),
+ src_bo);
+ fprintf(stderr, "dst: pitch %d, offset[%d x %d], format %s, bo %p\n",
+ dst_pitch, dst_x_offset, dst_y_offset,
+ _mesa_get_format_name(dst_mesaformat), dst_bo);
+ fprintf(stderr, "region: %d x %d\n", reg_width, reg_height);
+ }
+
+ /* Flush is needed to make sure that source buffer has correct data */
+ radeonFlush(r300->radeon.glCtx);
+
+ if (!validate_buffers(r300, src_bo, dst_bo))
+ return 0;
+
+ rcommonEnsureCmdBufSpace(&r300->radeon, 200, __FUNCTION__);
+
+ other_stuff(r300);
+
+ r300_emit_tx_setup(r300, src_mesaformat, src_bo, src_offset, src_width, src_height, src_pitch);
+
+ if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+ r500_emit_fp_setup(r300, &r300->blit.fp_code.code.r500, dst_mesaformat);
+ r500_emit_rs_setup(r300);
+ } else {
+ r300_emit_fp_setup(r300, &r300->blit.fp_code.code.r300, dst_mesaformat);
+ r300_emit_rs_setup(r300);
+ }
+
+ if (r300->options.hw_tcl_enabled)
+ emit_pvs_setup(r300, r300->blit.vp_code.body.d, 2);
+
+ emit_vap_setup(r300);
+
+ emit_cb_setup(r300, dst_bo, dst_offset, dst_mesaformat, dst_pitch, dst_width, dst_height);
+
+ emit_draw_packet(r300, src_width, src_height,
+ src_x_offset, src_y_offset,
+ dst_x_offset, dst_y_offset,
+ reg_width, reg_height,
+ flip_y);
+
+ r300EmitCacheFlush(r300);
+
+ radeonFlush(r300->radeon.glCtx);
+
+ return 1;
+}
diff --git a/src/mesa/drivers/dri/r300/r300_blit.h b/src/mesa/drivers/dri/r300/r300_blit.h
new file mode 100644
index 0000000000..39b157a57b
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_blit.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2009 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef R300_BLIT_H
+#define R300_BLIT_H
+
+void r300_blit_init(struct r300_context *r300);
+
+unsigned r300_check_blit(gl_format mesa_format);
+
+unsigned r300_blit(GLcontext *ctx,
+ struct radeon_bo *src_bo,
+ intptr_t src_offset,
+ gl_format src_mesaformat,
+ unsigned src_pitch,
+ unsigned src_width,
+ unsigned src_height,
+ unsigned src_x_offset,
+ unsigned src_y_offset,
+ struct radeon_bo *dst_bo,
+ intptr_t dst_offset,
+ gl_format dst_mesaformat,
+ unsigned dst_pitch,
+ unsigned dst_width,
+ unsigned dst_height,
+ unsigned dst_x_offset,
+ unsigned dst_y_offset,
+ unsigned reg_width,
+ unsigned reg_height,
+ unsigned flip_y);
+
+#endif // R300_BLIT_H
diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
new file mode 100644
index 0000000000..c40802aec6
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
@@ -0,0 +1,907 @@
+/*
+Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/**
+ * \file
+ *
+ * \author Nicolai Haehnle <prefect_@gmx.net>
+ */
+
+#include "main/glheader.h"
+#include "main/state.h"
+#include "main/imports.h"
+#include "main/macros.h"
+#include "main/context.h"
+#include "main/simple_list.h"
+
+#include "drm.h"
+#include "radeon_drm.h"
+
+#include "r300_context.h"
+#include "r300_reg.h"
+#include "r300_cmdbuf.h"
+#include "r300_emit.h"
+#include "radeon_bocs_wrapper.h"
+#include "radeon_mipmap_tree.h"
+#include "radeon_queryobj.h"
+
+/** # of dwords reserved for additional instructions that may need to be written
+ * during flushing.
+ */
+#define SPACE_FOR_FLUSHING 4
+
+static unsigned packet0_count(r300ContextPtr r300, uint32_t *pkt)
+{
+ if (r300->radeon.radeonScreen->kernel_mm) {
+ return ((((*pkt) >> 16) & 0x3FFF) + 1);
+ } else {
+ drm_r300_cmd_header_t *t = (drm_r300_cmd_header_t*)pkt;
+ return t->packet0.count;
+ }
+}
+
+#define vpu_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->vpu.count)
+#define r500fp_count(ptr) (((drm_r300_cmd_header_t*)(ptr))->r500fp.count)
+
+static int check_vpu(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ int cnt;
+ int extra = 1;
+ cnt = vpu_count(atom->cmd);
+
+ if (r300->radeon.radeonScreen->kernel_mm) {
+ extra = 3;
+ }
+
+ return cnt ? (cnt * 4) + extra : 0;
+}
+
+static int check_vpp(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ int cnt;
+ int extra = 1;
+
+ if (r300->radeon.radeonScreen->kernel_mm) {
+ cnt = r300->selected_vp->code.constants.Count * 4;
+ extra = 3;
+ } else {
+ cnt = vpu_count(atom->cmd);
+ extra = 1;
+ }
+
+ return cnt ? (cnt * 4) + extra : 0;
+}
+
+void r300_emit_vpu(struct r300_context *r300,
+ uint32_t *data,
+ unsigned len,
+ uint32_t addr)
+{
+ BATCH_LOCALS(&r300->radeon);
+
+ BEGIN_BATCH_NO_AUTOSTATE(3 + len);
+ OUT_BATCH_REGVAL(R300_VAP_PVS_VECTOR_INDX_REG, addr);
+ OUT_BATCH(CP_PACKET0(R300_VAP_PVS_UPLOAD_DATA, len-1) | RADEON_ONE_REG_WR);
+ OUT_BATCH_TABLE(data, len);
+ END_BATCH();
+}
+
+static void emit_vpu_state(GLcontext *ctx, struct radeon_state_atom * atom)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ drm_r300_cmd_header_t cmd;
+ uint32_t addr;
+
+ cmd.u = atom->cmd[0];
+ addr = (cmd.vpu.adrhi << 8) | cmd.vpu.adrlo;
+
+ r300_emit_vpu(r300, &atom->cmd[1], vpu_count(atom->cmd) * 4, addr);
+}
+
+static void emit_vpp_state(GLcontext *ctx, struct radeon_state_atom * atom)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ drm_r300_cmd_header_t cmd;
+ uint32_t addr;
+
+ cmd.u = atom->cmd[0];
+ addr = (cmd.vpu.adrhi << 8) | cmd.vpu.adrlo;
+
+ r300_emit_vpu(r300, &atom->cmd[1], r300->selected_vp->code.constants.Count * 4, addr);
+}
+
+void r500_emit_fp(struct r300_context *r300,
+ uint32_t *data,
+ unsigned len,
+ uint32_t addr,
+ unsigned type,
+ unsigned clamp)
+{
+ BATCH_LOCALS(&r300->radeon);
+
+ addr |= (type << 16);
+ addr |= (clamp << 17);
+
+ BEGIN_BATCH_NO_AUTOSTATE(len + 3);
+ OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_INDEX, 0));
+ OUT_BATCH(addr);
+ OUT_BATCH(CP_PACKET0(R500_GA_US_VECTOR_DATA, len-1) | RADEON_ONE_REG_WR);
+ OUT_BATCH_TABLE(data, len);
+ END_BATCH();
+}
+
+static void emit_r500fp_atom(GLcontext *ctx, struct radeon_state_atom * atom)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ drm_r300_cmd_header_t cmd;
+ uint32_t addr, count;
+ int type, clamp;
+
+ cmd.u = atom->cmd[0];
+ addr = ((cmd.r500fp.adrhi_flags & 1) << 8) | cmd.r500fp.adrlo;
+ type = !!(cmd.r500fp.adrhi_flags & R500FP_CONSTANT_TYPE);
+ clamp = !!(cmd.r500fp.adrhi_flags & R500FP_CONSTANT_CLAMP);
+
+ if (type) {
+ count = r500fp_count(atom->cmd) * 4;
+ } else {
+ count = r500fp_count(atom->cmd) * 6;
+ }
+
+ r500_emit_fp(r300, &atom->cmd[1], count, addr, type, clamp);
+}
+
+static int check_tex_offsets(GLcontext *ctx, struct radeon_state_atom * atom)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ int numtmus = packet0_count(r300, r300->hw.tex.offset.cmd);
+ int dw = 0, i;
+ if (atom->cmd[0] == CP_PACKET2) {
+ return dw;
+ }
+ for(i = 0; i < numtmus; ++i) {
+ radeonTexObj *t = r300->hw.textures[i];
+ if (!t && !r300->radeon.radeonScreen->kernel_mm) {
+ dw += 0;
+ } else if (t && t->image_override && !t->bo) {
+ if (!r300->radeon.radeonScreen->kernel_mm)
+ dw += 2;
+ } else
+ dw += 4;
+ }
+ return dw;
+}
+
+static void emit_tex_offsets(GLcontext *ctx, struct radeon_state_atom * atom)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ BATCH_LOCALS(&r300->radeon);
+ int numtmus = packet0_count(r300, r300->hw.tex.offset.cmd);
+ int i;
+
+ for(i = 0; i < numtmus; ++i) {
+ radeonTexObj *t = r300->hw.textures[i];
+ if (t && !t->image_override) {
+ BEGIN_BATCH_NO_AUTOSTATE(4);
+ OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1);
+ OUT_BATCH_RELOC(t->tile_bits, t->mt->bo, get_base_teximage_offset(t),
+ RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+ END_BATCH();
+ } else if (!t) {
+ /* Texture unit hasn't a texture bound.
+ * We assign the current color buffer as a fakery to make
+ * KIL work on KMS (without it, the CS checker will complain).
+ */
+ if (r300->radeon.radeonScreen->kernel_mm) {
+ struct radeon_renderbuffer *rrb = radeon_get_colorbuffer(&r300->radeon);
+ if (rrb && rrb->bo) {
+ BEGIN_BATCH_NO_AUTOSTATE(4);
+ OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1);
+ OUT_BATCH_RELOC(0, rrb->bo, 0,
+ RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+ END_BATCH();
+ }
+ }
+ } else { /* override cases */
+ if (t->bo) {
+ BEGIN_BATCH_NO_AUTOSTATE(4);
+ OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1);
+ OUT_BATCH_RELOC(t->tile_bits, t->bo, 0,
+ RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
+ END_BATCH();
+ } else if (!r300->radeon.radeonScreen->kernel_mm) {
+ BEGIN_BATCH_NO_AUTOSTATE(2);
+ OUT_BATCH_REGSEQ(R300_TX_OFFSET_0 + (i * 4), 1);
+ OUT_BATCH(t->override_offset);
+ END_BATCH();
+ } else {
+ /* Texture unit hasn't a texture bound nothings to do */
+ }
+ }
+ }
+}
+
+void r300_emit_scissor(GLcontext *ctx)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ BATCH_LOCALS(&r300->radeon);
+ unsigned x1, y1, x2, y2;
+ struct radeon_renderbuffer *rrb;
+
+ if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) {
+ return;
+ }
+ rrb = radeon_get_colorbuffer(&r300->radeon);
+ if (!rrb || !rrb->bo) {
+ fprintf(stderr, "no rrb\n");
+ return;
+ }
+ if (r300->radeon.state.scissor.enabled) {
+ x1 = r300->radeon.state.scissor.rect.x1;
+ y1 = r300->radeon.state.scissor.rect.y1;
+ x2 = r300->radeon.state.scissor.rect.x2;
+ y2 = r300->radeon.state.scissor.rect.y2;
+ } else {
+ x1 = 0;
+ y1 = 0;
+ x2 = rrb->base.Width - 1;
+ y2 = rrb->base.Height - 1;
+ }
+ if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) {
+ x1 += R300_SCISSORS_OFFSET;
+ y1 += R300_SCISSORS_OFFSET;
+ x2 += R300_SCISSORS_OFFSET;
+ y2 += R300_SCISSORS_OFFSET;
+ }
+ BEGIN_BATCH_NO_AUTOSTATE(3);
+ OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2);
+ OUT_BATCH((x1 << R300_SCISSORS_X_SHIFT)|(y1 << R300_SCISSORS_Y_SHIFT));
+ OUT_BATCH((x2 << R300_SCISSORS_X_SHIFT)|(y2 << R300_SCISSORS_Y_SHIFT));
+ END_BATCH();
+}
+static int check_cb_offset(GLcontext *ctx, struct radeon_state_atom * atom)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ uint32_t dw = 6 + 3 + 16;
+ if (r300->radeon.radeonScreen->kernel_mm)
+ dw += 2;
+ if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) {
+ dw -= 3 + 16;
+ }
+ return dw;
+}
+
+static void emit_scissor(struct r300_context *r300,
+ unsigned width,
+ unsigned height)
+{
+ int i;
+ BATCH_LOCALS(&r300->radeon);
+ if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+ BEGIN_BATCH_NO_AUTOSTATE(3);
+ OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2);
+ OUT_BATCH(0);
+ OUT_BATCH(((width - 1) << R300_SCISSORS_X_SHIFT) |
+ ((height - 1) << R300_SCISSORS_Y_SHIFT));
+ END_BATCH();
+ BEGIN_BATCH_NO_AUTOSTATE(16);
+ for (i = 0; i < 4; i++) {
+ OUT_BATCH_REGSEQ(R300_SC_CLIPRECT_TL_0 + (i * 8), 2);
+ OUT_BATCH((0 << R300_CLIPRECT_X_SHIFT) | (0 << R300_CLIPRECT_Y_SHIFT));
+ OUT_BATCH(((width - 1) << R300_CLIPRECT_X_SHIFT) | ((height - 1) << R300_CLIPRECT_Y_SHIFT));
+ }
+ OUT_BATCH_REGSEQ(R300_SC_CLIP_RULE, 1);
+ OUT_BATCH(0xAAAA);
+ OUT_BATCH_REGSEQ(R300_SC_SCREENDOOR, 1);
+ OUT_BATCH(0xffffff);
+ END_BATCH();
+ } else {
+ BEGIN_BATCH_NO_AUTOSTATE(3);
+ OUT_BATCH_REGSEQ(R300_SC_SCISSORS_TL, 2);
+ OUT_BATCH((R300_SCISSORS_OFFSET << R300_SCISSORS_X_SHIFT) |
+ (R300_SCISSORS_OFFSET << R300_SCISSORS_Y_SHIFT));
+ OUT_BATCH(((width + R300_SCISSORS_OFFSET - 1) << R300_SCISSORS_X_SHIFT) |
+ ((height + R300_SCISSORS_OFFSET - 1) << R300_SCISSORS_Y_SHIFT));
+ END_BATCH();
+ BEGIN_BATCH_NO_AUTOSTATE(16);
+ for (i = 0; i < 4; i++) {
+ OUT_BATCH_REGSEQ(R300_SC_CLIPRECT_TL_0 + (i * 8), 2);
+ OUT_BATCH((R300_SCISSORS_OFFSET << R300_CLIPRECT_X_SHIFT) | (R300_SCISSORS_OFFSET << R300_CLIPRECT_Y_SHIFT));
+ OUT_BATCH(((R300_SCISSORS_OFFSET + width - 1) << R300_CLIPRECT_X_SHIFT) |
+ ((R300_SCISSORS_OFFSET + height - 1) << R300_CLIPRECT_Y_SHIFT));
+ }
+ OUT_BATCH_REGSEQ(R300_SC_CLIP_RULE, 1);
+ OUT_BATCH(0xAAAA);
+ OUT_BATCH_REGSEQ(R300_SC_SCREENDOOR, 1);
+ OUT_BATCH(0xffffff);
+ END_BATCH();
+ }
+}
+
+void r300_emit_cb_setup(struct r300_context *r300,
+ struct radeon_bo *bo,
+ uint32_t offset,
+ GLuint format,
+ unsigned cpp,
+ unsigned pitch)
+{
+ BATCH_LOCALS(&r300->radeon);
+ uint32_t cbpitch = pitch / cpp;
+ uint32_t dw = 6;
+
+ assert(offset % 32 == 0);
+
+ switch (format) {
+ case MESA_FORMAT_SL8:
+ case MESA_FORMAT_A8:
+ case MESA_FORMAT_L8:
+ case MESA_FORMAT_I8:
+ cbpitch |= R300_COLOR_FORMAT_I8;
+ break;
+ case MESA_FORMAT_RGB565:
+ case MESA_FORMAT_RGB565_REV:
+ cbpitch |= R300_COLOR_FORMAT_RGB565;
+ break;
+ case MESA_FORMAT_ARGB4444:
+ case MESA_FORMAT_ARGB4444_REV:
+ cbpitch |= R300_COLOR_FORMAT_ARGB4444;
+ break;
+ case MESA_FORMAT_RGBA5551:
+ case MESA_FORMAT_ARGB1555:
+ case MESA_FORMAT_ARGB1555_REV:
+ cbpitch |= R300_COLOR_FORMAT_ARGB1555;
+ break;
+ case MESA_FORMAT_RGBA8888:
+ case MESA_FORMAT_RGBA8888_REV:
+ case MESA_FORMAT_XRGB8888:
+ case MESA_FORMAT_ARGB8888:
+ case MESA_FORMAT_XRGB8888_REV:
+ case MESA_FORMAT_ARGB8888_REV:
+ case MESA_FORMAT_SRGBA8:
+ case MESA_FORMAT_SARGB8:
+ cbpitch |= R300_COLOR_FORMAT_ARGB8888;
+ break;
+ default:
+ _mesa_problem(r300->radeon.glCtx, "unexpected format in emit_cb_offset()");
+ break;
+ }
+
+ if (bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
+ cbpitch |= R300_COLOR_TILE_ENABLE;
+
+ if (r300->radeon.radeonScreen->kernel_mm)
+ dw += 2;
+
+ BEGIN_BATCH_NO_AUTOSTATE(dw);
+ OUT_BATCH_REGSEQ(R300_RB3D_COLOROFFSET0, 1);
+ OUT_BATCH_RELOC(offset, bo, offset, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ OUT_BATCH_REGSEQ(R300_RB3D_COLORPITCH0, 1);
+ if (!r300->radeon.radeonScreen->kernel_mm)
+ OUT_BATCH(cbpitch);
+ else
+ OUT_BATCH_RELOC(cbpitch, bo, cbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ END_BATCH();
+}
+
+static void emit_cb_offset_atom(GLcontext *ctx, struct radeon_state_atom * atom)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ struct radeon_renderbuffer *rrb;
+ uint32_t offset = r300->radeon.state.color.draw_offset;
+
+ rrb = radeon_get_colorbuffer(&r300->radeon);
+ if (!rrb || !rrb->bo) {
+ fprintf(stderr, "no rrb\n");
+ return;
+ }
+
+ if (RADEON_DEBUG & RADEON_STATE)
+ fprintf(stderr,"rrb is %p %d %dx%d\n", rrb, offset, rrb->base.Width, rrb->base.Height);
+
+ r300_emit_cb_setup(r300, rrb->bo, offset, rrb->base.Format, rrb->cpp, rrb->pitch);
+
+ if (r300->radeon.radeonScreen->driScreen->dri2.enabled) {
+ emit_scissor(r300, rrb->base.Width, rrb->base.Height);
+ }
+}
+
+static int check_zb_offset(GLcontext *ctx, struct radeon_state_atom * atom)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ uint32_t dw;
+ dw = 6;
+ if (r300->radeon.radeonScreen->kernel_mm)
+ dw += 2;
+ return dw;
+}
+
+static void emit_zb_offset(GLcontext *ctx, struct radeon_state_atom * atom)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ BATCH_LOCALS(&r300->radeon);
+ struct radeon_renderbuffer *rrb;
+ uint32_t zbpitch;
+ uint32_t dw = atom->check(ctx, atom);
+
+ rrb = radeon_get_depthbuffer(&r300->radeon);
+ if (!rrb)
+ return;
+
+ zbpitch = (rrb->pitch / rrb->cpp);
+ if (!r300->radeon.radeonScreen->kernel_mm) {
+ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
+ zbpitch |= R300_DEPTHMACROTILE_ENABLE;
+ }
+ if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE){
+ zbpitch |= R300_DEPTHMICROTILE_TILED;
+ }
+ }
+
+ BEGIN_BATCH_NO_AUTOSTATE(dw);
+ OUT_BATCH_REGSEQ(R300_ZB_DEPTHOFFSET, 1);
+ OUT_BATCH_RELOC(0, rrb->bo, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ OUT_BATCH_REGSEQ(R300_ZB_DEPTHPITCH, 1);
+ if (!r300->radeon.radeonScreen->kernel_mm)
+ OUT_BATCH(zbpitch);
+ else
+ OUT_BATCH_RELOC(cbpitch, rrb->bo, zbpitch, 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ END_BATCH();
+}
+
+static void emit_zstencil_format(GLcontext *ctx, struct radeon_state_atom * atom)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ BATCH_LOCALS(&r300->radeon);
+ struct radeon_renderbuffer *rrb;
+ uint32_t format = 0;
+
+ rrb = radeon_get_depthbuffer(&r300->radeon);
+ if (!rrb)
+ format = 0;
+ else {
+ if (rrb->cpp == 2)
+ format = R300_DEPTHFORMAT_16BIT_INT_Z;
+ else if (rrb->cpp == 4)
+ format = R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL;
+ }
+
+ BEGIN_BATCH_NO_AUTOSTATE(atom->cmd_size);
+ OUT_BATCH(atom->cmd[0]);
+ atom->cmd[1] &= ~0xf;
+ atom->cmd[1] |= format;
+ OUT_BATCH(atom->cmd[1]);
+ OUT_BATCH(atom->cmd[2]);
+ OUT_BATCH(atom->cmd[3]);
+ OUT_BATCH(atom->cmd[4]);
+ END_BATCH();
+}
+
+static int check_never(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ return 0;
+}
+
+static int check_always(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ return atom->cmd_size;
+}
+
+static int check_variable(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ int cnt;
+ if (atom->cmd[0] == CP_PACKET2) {
+ return 0;
+ }
+ cnt = packet0_count(r300, atom->cmd);
+ return cnt ? cnt + 1 : 0;
+}
+
+static int check_r500fp(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ int cnt;
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ int extra = 1;
+ cnt = r500fp_count(atom->cmd);
+ if (r300->radeon.radeonScreen->kernel_mm)
+ extra = 3;
+
+ return cnt ? (cnt * 6) + extra : 0;
+}
+
+static int check_r500fp_const(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+ int cnt;
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ int extra = 1;
+ cnt = r500fp_count(atom->cmd);
+ if (r300->radeon.radeonScreen->kernel_mm)
+ extra = 3;
+
+ cnt = r500fp_count(atom->cmd);
+ return cnt ? (cnt * 4) + extra : 0;
+}
+
+#define ALLOC_STATE( ATOM, CHK, SZ, IDX ) \
+ do { \
+ r300->hw.ATOM.cmd_size = (SZ); \
+ r300->hw.ATOM.cmd = (uint32_t*)CALLOC((SZ) * sizeof(uint32_t)); \
+ r300->hw.ATOM.name = #ATOM; \
+ r300->hw.ATOM.idx = (IDX); \
+ r300->hw.ATOM.check = check_##CHK; \
+ r300->hw.ATOM.dirty = GL_FALSE; \
+ r300->radeon.hw.max_state_size += (SZ); \
+ insert_at_tail(&r300->radeon.hw.atomlist, &r300->hw.ATOM); \
+ } while (0)
+/**
+ * Allocate memory for the command buffer and initialize the state atom
+ * list. Note that the initial hardware state is set by r300InitState().
+ */
+void r300InitCmdBuf(r300ContextPtr r300)
+{
+ int mtu;
+ int has_tcl;
+ int is_r500 = 0;
+
+ has_tcl = r300->options.hw_tcl_enabled;
+
+ if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
+ is_r500 = 1;
+
+ r300->radeon.hw.max_state_size = 2 + 2; /* reserve extra space for WAIT_IDLE and tex cache flush */
+
+ mtu = r300->radeon.glCtx->Const.MaxTextureUnits;
+ if (RADEON_DEBUG & RADEON_TEXTURE) {
+ fprintf(stderr, "Using %d maximum texture units..\n", mtu);
+ }
+
+ /* Setup the atom linked list */
+ make_empty_list(&r300->radeon.hw.atomlist);
+ r300->radeon.hw.atomlist.name = "atom-list";
+
+ /* Initialize state atoms */
+ ALLOC_STATE(vpt, always, R300_VPT_CMDSIZE, 0);
+ r300->hw.vpt.cmd[R300_VPT_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_SE_VPORT_XSCALE, 6);
+ ALLOC_STATE(vap_cntl, always, R300_VAP_CNTL_SIZE, 0);
+ r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PVS_STATE_FLUSH_REG, 1);
+ r300->hw.vap_cntl.cmd[R300_VAP_CNTL_FLUSH_1] = 0;
+ r300->hw.vap_cntl.cmd[R300_VAP_CNTL_CMD] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_CNTL, 1);
+ if (is_r500 && !r300->radeon.radeonScreen->kernel_mm) {
+ ALLOC_STATE(vap_index_offset, always, 2, 0);
+ r300->hw.vap_index_offset.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_VAP_INDEX_OFFSET, 1);
+ r300->hw.vap_index_offset.cmd[1] = 0;
+ }
+ ALLOC_STATE(vte, always, 3, 0);
+ r300->hw.vte.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SE_VTE_CNTL, 2);
+ ALLOC_STATE(vap_vf_max_vtx_indx, always, 3, 0);
+ r300->hw.vap_vf_max_vtx_indx.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_VF_MAX_VTX_INDX, 2);
+ ALLOC_STATE(vap_cntl_status, always, 2, 0);
+ r300->hw.vap_cntl_status.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_CNTL_STATUS, 1);
+ ALLOC_STATE(vir[0], variable, R300_VIR_CMDSIZE, 0);
+ r300->hw.vir[0].cmd[R300_VIR_CMD_0] =
+ cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PROG_STREAM_CNTL_0, 1);
+ ALLOC_STATE(vir[1], variable, R300_VIR_CMDSIZE, 1);
+ r300->hw.vir[1].cmd[R300_VIR_CMD_0] =
+ cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PROG_STREAM_CNTL_EXT_0, 1);
+ ALLOC_STATE(vic, always, R300_VIC_CMDSIZE, 0);
+ r300->hw.vic.cmd[R300_VIC_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_VTX_STATE_CNTL, 2);
+ ALLOC_STATE(vap_psc_sgn_norm_cntl, always, 2, 0);
+ r300->hw.vap_psc_sgn_norm_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PSC_SGN_NORM_CNTL, SGN_NORM_ZERO_CLAMP_MINUS_ONE);
+
+ if (has_tcl) {
+ ALLOC_STATE(vap_clip_cntl, always, 2, 0);
+ r300->hw.vap_clip_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_CLIP_CNTL, 1);
+ ALLOC_STATE(vap_clip, always, 5, 0);
+ r300->hw.vap_clip.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_VAP_GB_VERT_CLIP_ADJ, 4);
+ ALLOC_STATE(vap_pvs_vtx_timeout_reg, always, 2, 0);
+ r300->hw.vap_pvs_vtx_timeout_reg.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, VAP_PVS_VTX_TIMEOUT_REG, 1);
+ }
+
+ ALLOC_STATE(vof, always, R300_VOF_CMDSIZE, 0);
+ r300->hw.vof.cmd[R300_VOF_CMD_0] =
+ cmdpacket0(r300->radeon.radeonScreen, R300_VAP_OUTPUT_VTX_FMT_0, 2);
+
+ if (has_tcl) {
+ ALLOC_STATE(pvs, always, R300_PVS_CMDSIZE, 0);
+ r300->hw.pvs.cmd[R300_PVS_CMD_0] =
+ cmdpacket0(r300->radeon.radeonScreen, R300_VAP_PVS_CODE_CNTL_0, 3);
+ }
+
+ ALLOC_STATE(gb_enable, always, 2, 0);
+ r300->hw.gb_enable.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GB_ENABLE, 1);
+ if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) {
+ ALLOC_STATE(gb_misc, always, R300_GB_MISC_CMDSIZE, 0);
+ } else {
+ ALLOC_STATE(gb_misc, never, R300_GB_MISC_CMDSIZE, 0);
+ }
+ r300->hw.gb_misc.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GB_MSPOS0, 3);
+ ALLOC_STATE(gb_misc2, always, R300_GB_MISC2_CMDSIZE, 0);
+ r300->hw.gb_misc2.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, 0x401C, 2);
+ ALLOC_STATE(txe, always, R300_TXE_CMDSIZE, 0);
+ r300->hw.txe.cmd[R300_TXE_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_ENABLE, 1);
+ ALLOC_STATE(ga_point_s0, always, 5, 0);
+ r300->hw.ga_point_s0.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POINT_S0, 4);
+ ALLOC_STATE(ga_triangle_stipple, always, 2, 0);
+ r300->hw.ga_triangle_stipple.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_TRIANGLE_STIPPLE, 1);
+ ALLOC_STATE(ps, always, R300_PS_CMDSIZE, 0);
+ r300->hw.ps.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POINT_SIZE, 1);
+ ALLOC_STATE(ga_point_minmax, always, 4, 0);
+ r300->hw.ga_point_minmax.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POINT_MINMAX, 3);
+ ALLOC_STATE(lcntl, always, 2, 0);
+ r300->hw.lcntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_LINE_CNTL, 1);
+ ALLOC_STATE(ga_line_stipple, always, 4, 0);
+ r300->hw.ga_line_stipple.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_LINE_STIPPLE_VALUE, 3);
+ if (!r300->radeon.radeonScreen->driScreen->dri2.enabled) {
+ ALLOC_STATE(shade, always, 2, 0);
+ } else {
+ ALLOC_STATE(shade, never, 2, 0);
+ }
+ r300->hw.shade.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_ENHANCE, 1);
+ ALLOC_STATE(shade2, always, 4, 0);
+ r300->hw.shade2.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, 0x4278, 3);
+ ALLOC_STATE(polygon_mode, always, 4, 0);
+ r300->hw.polygon_mode.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_POLY_MODE, 3);
+ ALLOC_STATE(fogp, always, 3, 0);
+ r300->hw.fogp.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_GA_FOG_SCALE, 2);
+ ALLOC_STATE(zbias_cntl, always, 2, 0);
+ r300->hw.zbias_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_TEX_WRAP, 1);
+ ALLOC_STATE(zbs, always, R300_ZBS_CMDSIZE, 0);
+ r300->hw.zbs.cmd[R300_ZBS_CMD_0] =
+ cmdpacket0(r300->radeon.radeonScreen, R300_SU_POLY_OFFSET_FRONT_SCALE, 4);
+ ALLOC_STATE(occlusion_cntl, always, 2, 0);
+ r300->hw.occlusion_cntl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_POLY_OFFSET_ENABLE, 1);
+ ALLOC_STATE(cul, always, R300_CUL_CMDSIZE, 0);
+ r300->hw.cul.cmd[R300_CUL_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_CULL_MODE, 1);
+ ALLOC_STATE(su_depth_scale, always, 3, 0);
+ r300->hw.su_depth_scale.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_DEPTH_SCALE, 2);
+ ALLOC_STATE(rc, always, R300_RC_CMDSIZE, 0);
+ r300->hw.rc.cmd[R300_RC_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_COUNT, 2);
+ if (is_r500) {
+ ALLOC_STATE(ri, variable, R500_RI_CMDSIZE, 0);
+ r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_IP_0, 16);
+ ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0);
+ r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_INST_0, 1);
+ } else {
+ ALLOC_STATE(ri, variable, R300_RI_CMDSIZE, 0);
+ r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_IP_0, 8);
+ ALLOC_STATE(rr, variable, R300_RR_CMDSIZE, 0);
+ r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_INST_0, 1);
+ }
+ ALLOC_STATE(sc_hyperz, always, 3, 0);
+ r300->hw.sc_hyperz.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_HYPERZ, 2);
+ ALLOC_STATE(sc_screendoor, always, 2, 0);
+ r300->hw.sc_screendoor.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_SC_SCREENDOOR, 1);
+ ALLOC_STATE(us_out_fmt, always, 6, 0);
+ r300->hw.us_out_fmt.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_OUT_FMT, 5);
+
+ if (is_r500) {
+ ALLOC_STATE(fp, always, R500_FP_CMDSIZE, 0);
+ r300->hw.fp.cmd[R500_FP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_US_CONFIG, 2);
+ r300->hw.fp.cmd[R500_FP_CNTL] = R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO;
+ r300->hw.fp.cmd[R500_FP_CMD_1] = cmdpacket0(r300->radeon.radeonScreen, R500_US_CODE_ADDR, 3);
+ r300->hw.fp.cmd[R500_FP_CMD_2] = cmdpacket0(r300->radeon.radeonScreen, R500_US_FC_CTRL, 1);
+ r300->hw.fp.cmd[R500_FP_FC_CNTL] = 0; /* FIXME when we add flow control */
+
+ ALLOC_STATE(r500fp, r500fp, R500_FPI_CMDSIZE, 0);
+ r300->hw.r500fp.cmd[R300_FPI_CMD_0] =
+ cmdr500fp(r300->radeon.radeonScreen, 0, 0, 0, 0);
+ if (r300->radeon.radeonScreen->kernel_mm)
+ r300->hw.r500fp.emit = emit_r500fp_atom;
+
+ ALLOC_STATE(r500fp_const, r500fp_const, R500_FPP_CMDSIZE, 0);
+ r300->hw.r500fp_const.cmd[R300_FPI_CMD_0] =
+ cmdr500fp(r300->radeon.radeonScreen, 0, 0, 1, 0);
+ if (r300->radeon.radeonScreen->kernel_mm)
+ r300->hw.r500fp_const.emit = emit_r500fp_atom;
+ } else {
+ ALLOC_STATE(fp, always, R300_FP_CMDSIZE, 0);
+ r300->hw.fp.cmd[R300_FP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_CONFIG, 3);
+ r300->hw.fp.cmd[R300_FP_CMD_1] = cmdpacket0(r300->radeon.radeonScreen, R300_US_CODE_ADDR_0, 4);
+
+ ALLOC_STATE(fpt, variable, R300_FPT_CMDSIZE, 0);
+ r300->hw.fpt.cmd[R300_FPT_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_TEX_INST_0, 0);
+
+ ALLOC_STATE(fpi[0], variable, R300_FPI_CMDSIZE, 0);
+ r300->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_RGB_INST_0, 1);
+ ALLOC_STATE(fpi[1], variable, R300_FPI_CMDSIZE, 1);
+ r300->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_RGB_ADDR_0, 1);
+ ALLOC_STATE(fpi[2], variable, R300_FPI_CMDSIZE, 2);
+ r300->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_ALPHA_INST_0, 1);
+ ALLOC_STATE(fpi[3], variable, R300_FPI_CMDSIZE, 3);
+ r300->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_US_ALU_ALPHA_ADDR_0, 1);
+ ALLOC_STATE(fpp, variable, R300_FPP_CMDSIZE, 0);
+ r300->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_PFS_PARAM_0_X, 0);
+ }
+ ALLOC_STATE(fogs, always, R300_FOGS_CMDSIZE, 0);
+ r300->hw.fogs.cmd[R300_FOGS_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_FOG_BLEND, 1);
+ ALLOC_STATE(fogc, always, R300_FOGC_CMDSIZE, 0);
+ r300->hw.fogc.cmd[R300_FOGC_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_FOG_COLOR_R, 3);
+ ALLOC_STATE(at, always, R300_AT_CMDSIZE, 0);
+ r300->hw.at.cmd[R300_AT_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_ALPHA_FUNC, 2);
+ ALLOC_STATE(fg_depth_src, always, 2, 0);
+ r300->hw.fg_depth_src.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_FG_DEPTH_SRC, 1);
+ ALLOC_STATE(rb3d_cctl, always, 2, 0);
+ r300->hw.rb3d_cctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_CCTL, 1);
+ ALLOC_STATE(bld, always, R300_BLD_CMDSIZE, 0);
+ r300->hw.bld.cmd[R300_BLD_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_CBLEND, 2);
+ ALLOC_STATE(cmk, always, R300_CMK_CMDSIZE, 0);
+ r300->hw.cmk.cmd[R300_CMK_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, RB3D_COLOR_CHANNEL_MASK, 1);
+ if (is_r500) {
+ ALLOC_STATE(blend_color, always, 3, 0);
+ r300->hw.blend_color.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_RB3D_CONSTANT_COLOR_AR, 2);
+ } else {
+ ALLOC_STATE(blend_color, always, 2, 0);
+ r300->hw.blend_color.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_BLEND_COLOR, 1);
+ }
+ ALLOC_STATE(rop, always, 2, 0);
+ r300->hw.rop.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_ROPCNTL, 1);
+ ALLOC_STATE(cb, cb_offset, R300_CB_CMDSIZE, 0);
+ r300->hw.cb.emit = &emit_cb_offset_atom;
+ ALLOC_STATE(rb3d_dither_ctl, always, 10, 0);
+ r300->hw.rb3d_dither_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_DITHER_CTL, 9);
+ ALLOC_STATE(rb3d_aaresolve_ctl, always, 2, 0);
+ r300->hw.rb3d_aaresolve_ctl.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_RB3D_AARESOLVE_CTL, 1);
+ if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV350) {
+ ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, always, 3, 0);
+ } else {
+ ALLOC_STATE(rb3d_discard_src_pixel_lte_threshold, never, 3, 0);
+ }
+ r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 2);
+ ALLOC_STATE(zs, always, R300_ZS_CMDSIZE, 0);
+ r300->hw.zs.cmd[R300_ZS_CMD_0] =
+ cmdpacket0(r300->radeon.radeonScreen, R300_ZB_CNTL, 3);
+ if (is_r500) {
+ if (r300->radeon.radeonScreen->kernel_mm)
+ ALLOC_STATE(zsb, always, R300_ZSB_CMDSIZE, 0);
+ else
+ ALLOC_STATE(zsb, never, R300_ZSB_CMDSIZE, 0);
+ r300->hw.zsb.cmd[R300_ZSB_CMD_0] =
+ cmdpacket0(r300->radeon.radeonScreen, R500_ZB_STENCILREFMASK_BF, 1);
+ }
+
+ ALLOC_STATE(zstencil_format, always, 5, 0);
+ r300->hw.zstencil_format.cmd[0] =
+ cmdpacket0(r300->radeon.radeonScreen, R300_ZB_FORMAT, 4);
+ r300->hw.zstencil_format.emit = emit_zstencil_format;
+
+ ALLOC_STATE(zb, zb_offset, R300_ZB_CMDSIZE, 0);
+ r300->hw.zb.emit = emit_zb_offset;
+ ALLOC_STATE(zb_depthclearvalue, always, 2, 0);
+ r300->hw.zb_depthclearvalue.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_DEPTHCLEARVALUE, 1);
+ ALLOC_STATE(zb_zmask, always, 3, 0);
+ r300->hw.zb_zmask.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_ZMASK_OFFSET, 2);
+ ALLOC_STATE(zb_hiz_offset, always, 2, 0);
+ r300->hw.zb_hiz_offset.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_HIZ_OFFSET, 1);
+ ALLOC_STATE(zb_hiz_pitch, always, 2, 0);
+ r300->hw.zb_hiz_pitch.cmd[0] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_HIZ_PITCH, 1);
+
+ /* VPU only on TCL */
+ if (has_tcl) {
+ int i;
+ ALLOC_STATE(vpi, vpu, R300_VPI_CMDSIZE, 0);
+ r300->hw.vpi.cmd[0] =
+ cmdvpu(r300->radeon.radeonScreen, R300_PVS_CODE_START, 0);
+ if (r300->radeon.radeonScreen->kernel_mm)
+ r300->hw.vpi.emit = emit_vpu_state;
+
+ if (is_r500) {
+ ALLOC_STATE(vpp, vpp, R300_VPP_CMDSIZE, 0);
+ r300->hw.vpp.cmd[0] =
+ cmdvpu(r300->radeon.radeonScreen, R500_PVS_CONST_START, 0);
+ if (r300->radeon.radeonScreen->kernel_mm)
+ r300->hw.vpp.emit = emit_vpp_state;
+
+ ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0);
+ r300->hw.vps.cmd[0] =
+ cmdvpu(r300->radeon.radeonScreen, R500_POINT_VPORT_SCALE_OFFSET, 1);
+ if (r300->radeon.radeonScreen->kernel_mm)
+ r300->hw.vps.emit = emit_vpu_state;
+
+ for (i = 0; i < 6; i++) {
+ ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0);
+ r300->hw.vpucp[i].cmd[0] =
+ cmdvpu(r300->radeon.radeonScreen,
+ R500_PVS_UCP_START + i, 1);
+ if (r300->radeon.radeonScreen->kernel_mm)
+ r300->hw.vpucp[i].emit = emit_vpu_state;
+ }
+ } else {
+ ALLOC_STATE(vpp, vpp, R300_VPP_CMDSIZE, 0);
+ r300->hw.vpp.cmd[0] =
+ cmdvpu(r300->radeon.radeonScreen, R300_PVS_CONST_START, 0);
+ if (r300->radeon.radeonScreen->kernel_mm)
+ r300->hw.vpp.emit = emit_vpp_state;
+
+ ALLOC_STATE(vps, vpu, R300_VPS_CMDSIZE, 0);
+ r300->hw.vps.cmd[0] =
+ cmdvpu(r300->radeon.radeonScreen, R300_POINT_VPORT_SCALE_OFFSET, 1);
+ if (r300->radeon.radeonScreen->kernel_mm)
+ r300->hw.vps.emit = emit_vpu_state;
+
+ for (i = 0; i < 6; i++) {
+ ALLOC_STATE(vpucp[i], vpu, R300_VPUCP_CMDSIZE, 0);
+ r300->hw.vpucp[i].cmd[0] =
+ cmdvpu(r300->radeon.radeonScreen,
+ R300_PVS_UCP_START + i, 1);
+ if (r300->radeon.radeonScreen->kernel_mm)
+ r300->hw.vpucp[i].emit = emit_vpu_state;
+ }
+ }
+ }
+
+ /* Textures */
+ ALLOC_STATE(tex.filter, variable, mtu + 1, 0);
+ r300->hw.tex.filter.cmd[R300_TEX_CMD_0] =
+ cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, 0);
+
+ ALLOC_STATE(tex.filter_1, variable, mtu + 1, 0);
+ r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] =
+ cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER1_0, 0);
+
+ ALLOC_STATE(tex.size, variable, mtu + 1, 0);
+ r300->hw.tex.size.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_SIZE_0, 0);
+
+ ALLOC_STATE(tex.format, variable, mtu + 1, 0);
+ r300->hw.tex.format.cmd[R300_TEX_CMD_0] =
+ cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT_0, 0);
+
+ ALLOC_STATE(tex.pitch, variable, mtu + 1, 0);
+ r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT2_0, 0);
+
+ ALLOC_STATE(tex.offset, tex_offsets, 1, 0);
+ r300->hw.tex.offset.cmd[R300_TEX_CMD_0] =
+ cmdpacket0(r300->radeon.radeonScreen, R300_TX_OFFSET_0, 0);
+ r300->hw.tex.offset.emit = &emit_tex_offsets;
+
+ ALLOC_STATE(tex.chroma_key, variable, mtu + 1, 0);
+ r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] =
+ cmdpacket0(r300->radeon.radeonScreen, R300_TX_CHROMA_KEY_0, 0);
+
+ ALLOC_STATE(tex.border_color, variable, mtu + 1, 0);
+ r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] =
+ cmdpacket0(r300->radeon.radeonScreen, R300_TX_BORDER_COLOR_0, 0);
+
+ radeon_init_query_stateobj(&r300->radeon, R300_QUERYOBJ_CMDSIZE);
+ if (r300->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530) {
+ r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, RV530_FG_ZBREG_DEST, 1);
+ r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_DATA_0] = RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL;
+ } else {
+ r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_SU_REG_DEST, 1);
+ r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_DATA_0] = R300_RASTER_PIPE_SELECT_ALL;
+ }
+ r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_CMD_1] = cmdpacket0(r300->radeon.radeonScreen, R300_ZB_ZPASS_DATA, 1);
+ r300->radeon.query.queryobj.cmd[R300_QUERYOBJ_DATA_1] = 0;
+
+ r300->radeon.hw.is_dirty = GL_TRUE;
+ r300->radeon.hw.all_dirty = GL_TRUE;
+
+ rcommonInitCmdBuf(&r300->radeon);
+}
diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.h b/src/mesa/drivers/dri/r300/r300_cmdbuf.h
new file mode 100644
index 0000000000..0e68da928e
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.h
@@ -0,0 +1,69 @@
+/*
+Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/**
+ * \file
+ *
+ * \author Nicolai Haehnle <prefect_@gmx.net>
+ */
+
+#ifndef __R300_CMDBUF_H__
+#define __R300_CMDBUF_H__
+
+#include "r300_context.h"
+
+#define CACHE_FLUSH_BUFSZ (4*2)
+#define PRE_EMIT_STATE_BUFSZ (2+2)
+#define AOS_BUFSZ(nr) (3+(nr >>1)*3 + (nr&1)*2 + (nr*2))
+#define FIREAOS_BUFSZ (3)
+#define SCISSORS_BUFSZ (3)
+
+void r300InitCmdBuf(r300ContextPtr r300);
+void r300_emit_scissor(GLcontext *ctx);
+
+void r300_emit_vpu(struct r300_context *ctx,
+ uint32_t *data,
+ unsigned len,
+ uint32_t addr);
+
+void r500_emit_fp(struct r300_context *r300,
+ uint32_t *data,
+ unsigned len,
+ uint32_t addr,
+ unsigned type,
+ unsigned clamp);
+
+void r300_emit_cb_setup(struct r300_context *r300,
+ struct radeon_bo *bo,
+ uint32_t offset,
+ GLuint format,
+ unsigned cpp,
+ unsigned pitch);
+
+#endif /* __R300_CMDBUF_H__ */
diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c
new file mode 100644
index 0000000000..6992ca59db
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_context.c
@@ -0,0 +1,570 @@
+/*
+Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/**
+ * \file
+ *
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ *
+ * \author Nicolai Haehnle <prefect_@gmx.net>
+ */
+
+#include "main/glheader.h"
+#include "main/api_arrayelt.h"
+#include "main/context.h"
+#include "main/simple_list.h"
+#include "main/imports.h"
+#include "main/extensions.h"
+#include "main/bufferobj.h"
+#include "main/texobj.h"
+
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "vbo/vbo.h"
+
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+
+#include "drivers/common/driverfuncs.h"
+#include "drivers/common/meta.h"
+
+#include "r300_context.h"
+#include "radeon_span.h"
+#include "r300_blit.h"
+#include "r300_cmdbuf.h"
+#include "r300_state.h"
+#include "r300_tex.h"
+#include "r300_emit.h"
+#include "r300_render.h"
+#include "r300_swtcl.h"
+#include "radeon_bocs_wrapper.h"
+#include "radeon_buffer_objects.h"
+#include "radeon_queryobj.h"
+
+#include "utils.h"
+#include "xmlpool.h" /* for symbolic values of enum-type options */
+
+#define need_GL_VERSION_2_0
+#define need_GL_ARB_occlusion_query
+#define need_GL_ARB_point_parameters
+#define need_GL_ARB_vertex_program
+#define need_GL_EXT_blend_equation_separate
+#define need_GL_EXT_blend_func_separate
+#define need_GL_EXT_blend_minmax
+#define need_GL_EXT_framebuffer_blit
+#define need_GL_EXT_framebuffer_object
+#define need_GL_EXT_fog_coord
+#define need_GL_EXT_gpu_program_parameters
+#define need_GL_EXT_provoking_vertex
+#define need_GL_EXT_secondary_color
+#define need_GL_EXT_stencil_two_side
+#define need_GL_ATI_separate_stencil
+#define need_GL_NV_vertex_program
+
+#include "main/remap_helper.h"
+
+static const struct dri_extension card_extensions[] = {
+ /* *INDENT-OFF* */
+ {"GL_ARB_depth_texture", NULL},
+ {"GL_ARB_fragment_program", NULL},
+ {"GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions},
+ {"GL_ARB_multitexture", NULL},
+ {"GL_ARB_point_parameters", GL_ARB_point_parameters_functions},
+ {"GL_ARB_shadow", NULL},
+ {"GL_ARB_shadow_ambient", NULL},
+ {"GL_ARB_texture_border_clamp", NULL},
+ {"GL_ARB_texture_cube_map", NULL},
+ {"GL_ARB_texture_env_add", NULL},
+ {"GL_ARB_texture_env_combine", NULL},
+ {"GL_ARB_texture_env_crossbar", NULL},
+ {"GL_ARB_texture_env_dot3", NULL},
+ {"GL_ARB_texture_mirrored_repeat", NULL},
+ {"GL_ARB_vertex_program", GL_ARB_vertex_program_functions},
+ {"GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions},
+ {"GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions},
+ {"GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions},
+ {"GL_EXT_blend_subtract", NULL},
+ {"GL_EXT_fog_coord", GL_EXT_fog_coord_functions },
+ {"GL_EXT_gpu_program_parameters", GL_EXT_gpu_program_parameters_functions},
+ {"GL_EXT_provoking_vertex", GL_EXT_provoking_vertex_functions },
+ {"GL_EXT_secondary_color", GL_EXT_secondary_color_functions},
+ {"GL_EXT_shadow_funcs", NULL},
+ {"GL_EXT_stencil_two_side", GL_EXT_stencil_two_side_functions},
+ {"GL_EXT_stencil_wrap", NULL},
+ {"GL_EXT_texture_edge_clamp", NULL},
+ {"GL_EXT_texture_env_combine", NULL},
+ {"GL_EXT_texture_env_dot3", NULL},
+ {"GL_EXT_texture_filter_anisotropic", NULL},
+ {"GL_EXT_texture_lod_bias", NULL},
+ {"GL_EXT_texture_mirror_clamp", NULL},
+ {"GL_EXT_texture_rectangle", NULL},
+ {"GL_EXT_texture_sRGB", NULL},
+ {"GL_EXT_vertex_array_bgra", NULL},
+ {"GL_ATI_separate_stencil", GL_ATI_separate_stencil_functions},
+ {"GL_ATI_texture_env_combine3", NULL},
+ {"GL_ATI_texture_mirror_once", NULL},
+ {"GL_MESA_pack_invert", NULL},
+ {"GL_MESA_ycbcr_texture", NULL},
+ {"GL_MESAX_texture_float", NULL},
+ {"GL_NV_blend_square", NULL},
+ {"GL_NV_vertex_program", GL_NV_vertex_program_functions},
+ {"GL_SGIS_generate_mipmap", NULL},
+ {NULL, NULL}
+ /* *INDENT-ON* */
+};
+
+
+static const struct dri_extension mm_extensions[] = {
+ { "GL_EXT_framebuffer_blit", GL_EXT_framebuffer_blit_functions },
+ { "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions },
+ { NULL, NULL }
+};
+
+/**
+ * The GL 2.0 functions are needed to make display lists work with
+ * functions added by GL_ATI_separate_stencil.
+ */
+static const struct dri_extension gl_20_extension[] = {
+ {"GL_VERSION_2_0", GL_VERSION_2_0_functions },
+};
+
+static const struct tnl_pipeline_stage *r300_pipeline[] = {
+ /* Catch any t&l fallbacks
+ */
+ &_tnl_vertex_transform_stage,
+ &_tnl_normal_transform_stage,
+ &_tnl_lighting_stage,
+ &_tnl_fog_coordinate_stage,
+ &_tnl_texgen_stage,
+ &_tnl_texture_transform_stage,
+ &_tnl_point_attenuation_stage,
+ &_tnl_vertex_program_stage,
+ &_tnl_render_stage,
+ 0,
+};
+
+static void r300_get_lock(radeonContextPtr rmesa)
+{
+ drm_radeon_sarea_t *sarea = rmesa->sarea;
+
+ if (sarea->ctx_owner != rmesa->dri.hwContext) {
+ sarea->ctx_owner = rmesa->dri.hwContext;
+ if (!rmesa->radeonScreen->kernel_mm)
+ radeon_bo_legacy_texture_age(rmesa->radeonScreen->bom);
+ }
+}
+
+static void r300_vtbl_emit_cs_header(struct radeon_cs *cs, radeonContextPtr rmesa)
+{
+ /* please flush pipe do all pending work */
+ radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
+ R300_SC_SCREENDOOR, 1));
+ radeon_cs_write_dword(cs, 0x0);
+ radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
+ R300_SC_SCREENDOOR, 1));
+ radeon_cs_write_dword(cs, 0x00FFFFFF);
+ radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
+ R300_SC_HYPERZ, 1));
+ radeon_cs_write_dword(cs, 0x0);
+ radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
+ R300_US_CONFIG, 1));
+ radeon_cs_write_dword(cs, 0x0);
+ radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
+ R300_ZB_CNTL, 1));
+ radeon_cs_write_dword(cs, 0x0);
+ radeon_cs_write_dword(cs, cmdwait(rmesa->radeonScreen, R300_WAIT_3D));
+ radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
+ R300_RB3D_DSTCACHE_CTLSTAT, 1));
+ radeon_cs_write_dword(cs, R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
+ radeon_cs_write_dword(cs, cmdpacket0(rmesa->radeonScreen,
+ R300_ZB_ZCACHE_CTLSTAT, 1));
+ radeon_cs_write_dword(cs, R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE);
+ radeon_cs_write_dword(cs, cmdwait(rmesa->radeonScreen,
+ R300_WAIT_3D | R300_WAIT_3D_CLEAN));
+}
+
+static void r300_vtbl_pre_emit_atoms(radeonContextPtr radeon)
+{
+ BATCH_LOCALS(radeon);
+
+ cp_wait(radeon, R300_WAIT_3D | R300_WAIT_3D_CLEAN);
+ BEGIN_BATCH_NO_AUTOSTATE(2);
+ OUT_BATCH_REGVAL(R300_TX_INVALTAGS, R300_TX_FLUSH);
+ END_BATCH();
+ end_3d(radeon);
+}
+
+static void r300_fallback(GLcontext *ctx, GLuint bit, GLboolean mode)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ if (mode)
+ r300->radeon.Fallback |= bit;
+ else
+ r300->radeon.Fallback &= ~bit;
+
+ r300SwitchFallback(ctx, R300_FALLBACK_RADEON_COMMON, mode);
+}
+
+static void r300_emit_query_finish(radeonContextPtr radeon)
+{
+ r300ContextPtr r300 = (r300ContextPtr)radeon;
+ struct radeon_query_object *query = radeon->query.current;
+ BATCH_LOCALS(radeon);
+
+ BEGIN_BATCH_NO_AUTOSTATE(3 * 2 *r300->radeon.radeonScreen->num_gb_pipes + 2);
+ switch (r300->radeon.radeonScreen->num_gb_pipes) {
+ case 4:
+ OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_3);
+ OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1);
+ OUT_BATCH_RELOC(0, query->bo, query->curr_offset+3*sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0);
+ case 3:
+ OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_2);
+ OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1);
+ OUT_BATCH_RELOC(0, query->bo, query->curr_offset+2*sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0);
+ case 2:
+ if (r300->radeon.radeonScreen->chip_family <= CHIP_FAMILY_RV380) {
+ OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_3);
+ } else {
+ OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_1);
+ }
+ OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1);
+ OUT_BATCH_RELOC(0, query->bo, query->curr_offset+1*sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0);
+ case 1:
+ default:
+ OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_0);
+ OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1);
+ OUT_BATCH_RELOC(0, query->bo, query->curr_offset, 0, RADEON_GEM_DOMAIN_GTT, 0);
+ break;
+ }
+ OUT_BATCH_REGVAL(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_ALL);
+ END_BATCH();
+ query->curr_offset += r300->radeon.radeonScreen->num_gb_pipes * sizeof(uint32_t);
+ assert(query->curr_offset < RADEON_QUERY_PAGE_SIZE);
+ query->emitted_begin = GL_FALSE;
+}
+
+static void rv530_emit_query_finish_single_z(radeonContextPtr radeon)
+{
+ BATCH_LOCALS(radeon);
+ struct radeon_query_object *query = radeon->query.current;
+
+ BEGIN_BATCH_NO_AUTOSTATE(8);
+ OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0);
+ OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1);
+ OUT_BATCH_RELOC(0, query->bo, query->curr_offset, 0, RADEON_GEM_DOMAIN_GTT, 0);
+ OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL);
+ END_BATCH();
+
+ query->curr_offset += sizeof(uint32_t);
+ assert(query->curr_offset < RADEON_QUERY_PAGE_SIZE);
+ query->emitted_begin = GL_FALSE;
+}
+
+static void rv530_emit_query_finish_double_z(radeonContextPtr radeon)
+{
+ BATCH_LOCALS(radeon);
+ struct radeon_query_object *query = radeon->query.current;
+
+ BEGIN_BATCH_NO_AUTOSTATE(14);
+ OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0);
+ OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1);
+ OUT_BATCH_RELOC(0, query->bo, query->curr_offset, 0, RADEON_GEM_DOMAIN_GTT, 0);
+ OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1);
+ OUT_BATCH_REGSEQ(R300_ZB_ZPASS_ADDR, 1);
+ OUT_BATCH_RELOC(0, query->bo, query->curr_offset + sizeof(uint32_t), 0, RADEON_GEM_DOMAIN_GTT, 0);
+ OUT_BATCH_REGVAL(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL);
+ END_BATCH();
+
+ query->curr_offset += 2 * sizeof(uint32_t);
+ assert(query->curr_offset < RADEON_QUERY_PAGE_SIZE);
+ query->emitted_begin = GL_FALSE;
+}
+
+static void r300_init_vtbl(radeonContextPtr radeon)
+{
+ radeon->vtbl.get_lock = r300_get_lock;
+ radeon->vtbl.update_viewport_offset = r300UpdateViewportOffset;
+ radeon->vtbl.emit_cs_header = r300_vtbl_emit_cs_header;
+ radeon->vtbl.swtcl_flush = r300_swtcl_flush;
+ radeon->vtbl.pre_emit_atoms = r300_vtbl_pre_emit_atoms;
+ radeon->vtbl.fallback = r300_fallback;
+ if (radeon->radeonScreen->chip_family == CHIP_FAMILY_RV530) {
+ if (radeon->radeonScreen->num_z_pipes == 2)
+ radeon->vtbl.emit_query_finish = rv530_emit_query_finish_double_z;
+ else
+ radeon->vtbl.emit_query_finish = rv530_emit_query_finish_single_z;
+ } else
+ radeon->vtbl.emit_query_finish = r300_emit_query_finish;
+
+ radeon->vtbl.check_blit = r300_check_blit;
+ radeon->vtbl.blit = r300_blit;
+
+ if (radeon->radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+ radeon->vtbl.is_format_renderable = r500IsFormatRenderable;
+ } else {
+ radeon->vtbl.is_format_renderable = r300IsFormatRenderable;
+ }
+}
+
+static void r300InitConstValues(GLcontext *ctx, radeonScreenPtr screen)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+ ctx->Const.MaxTextureImageUnits =
+ driQueryOptioni(&r300->radeon.optionCache, "texture_image_units");
+ ctx->Const.MaxTextureCoordUnits =
+ driQueryOptioni(&r300->radeon.optionCache, "texture_coord_units");
+ ctx->Const.MaxTextureUnits = MIN2(ctx->Const.MaxTextureImageUnits,
+ ctx->Const.MaxTextureCoordUnits);
+ ctx->Const.MaxCombinedTextureImageUnits =
+ ctx->Const.MaxVertexTextureImageUnits +
+ ctx->Const.MaxTextureImageUnits;
+
+
+ ctx->Const.MaxTextureMaxAnisotropy = 16.0;
+ ctx->Const.MaxTextureLodBias = 16.0;
+
+ if (screen->chip_family >= CHIP_FAMILY_RV515) {
+ ctx->Const.MaxTextureLevels = 13;
+ ctx->Const.MaxCubeTextureLevels = 13;
+ ctx->Const.MaxTextureRectSize = 4096;
+ ctx->Const.MaxRenderbufferSize = 4096;
+ }
+ else {
+ ctx->Const.MaxTextureLevels = 12;
+ ctx->Const.MaxCubeTextureLevels = 12;
+ ctx->Const.MaxTextureRectSize = 2048;
+ ctx->Const.MaxRenderbufferSize = 2048;
+ }
+
+ ctx->Const.MinPointSize = 1.0;
+ ctx->Const.MinPointSizeAA = 1.0;
+ ctx->Const.MaxPointSize = R300_POINTSIZE_MAX;
+ ctx->Const.MaxPointSizeAA = R300_POINTSIZE_MAX;
+
+ ctx->Const.MinLineWidth = 1.0;
+ ctx->Const.MinLineWidthAA = 1.0;
+ ctx->Const.MaxLineWidth = R300_LINESIZE_MAX;
+ ctx->Const.MaxLineWidthAA = R300_LINESIZE_MAX;
+
+ ctx->Const.MaxDrawBuffers = 1;
+ ctx->Const.MaxColorAttachments = 1;
+
+ /* currently bogus data */
+ if (r300->options.hw_tcl_enabled) {
+ ctx->Const.VertexProgram.MaxNativeInstructions = VSF_MAX_FRAGMENT_LENGTH / 4;
+ ctx->Const.VertexProgram.MaxNativeAluInstructions = VSF_MAX_FRAGMENT_LENGTH / 4;
+ ctx->Const.VertexProgram.MaxNativeAttribs = 16; /* r420 */
+ ctx->Const.VertexProgram.MaxNativeTemps = 32;
+ ctx->Const.VertexProgram.MaxNativeParameters = 256; /* r420 */
+ ctx->Const.VertexProgram.MaxNativeAddressRegs = 1;
+ }
+
+ if (screen->chip_family >= CHIP_FAMILY_RV515) {
+ ctx->Const.FragmentProgram.MaxNativeTemps = R500_PFS_NUM_TEMP_REGS;
+ ctx->Const.FragmentProgram.MaxNativeAttribs = 11; /* copy i915... */
+
+ /* The hardware limits are higher than this,
+ * but the non-KMS DRM interface artificially limits us
+ * to this many instructions.
+ *
+ * We could of course work around it in the KMS path,
+ * but it would be a mess, so it seems wiser
+ * to leave it as is. Going forward, the Gallium driver
+ * will not be subject to these limitations.
+ */
+ ctx->Const.FragmentProgram.MaxNativeParameters = 255;
+ ctx->Const.FragmentProgram.MaxNativeAluInstructions = 255;
+ ctx->Const.FragmentProgram.MaxNativeTexInstructions = 255;
+ ctx->Const.FragmentProgram.MaxNativeInstructions = 255;
+ ctx->Const.FragmentProgram.MaxNativeTexIndirections = 255;
+ ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0;
+ } else {
+ ctx->Const.FragmentProgram.MaxNativeTemps = R300_PFS_NUM_TEMP_REGS;
+ ctx->Const.FragmentProgram.MaxNativeAttribs = 11; /* copy i915... */
+ ctx->Const.FragmentProgram.MaxNativeParameters = R300_PFS_NUM_CONST_REGS;
+ ctx->Const.FragmentProgram.MaxNativeAluInstructions = R300_PFS_MAX_ALU_INST;
+ ctx->Const.FragmentProgram.MaxNativeTexInstructions = R300_PFS_MAX_TEX_INST;
+ ctx->Const.FragmentProgram.MaxNativeInstructions = R300_PFS_MAX_ALU_INST + R300_PFS_MAX_TEX_INST;
+ ctx->Const.FragmentProgram.MaxNativeTexIndirections = R300_PFS_MAX_TEX_INDIRECT;
+ ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0;
+ }
+
+}
+
+static void r300ParseOptions(r300ContextPtr r300, radeonScreenPtr screen)
+{
+ struct r300_options options = { 0 };
+
+ driParseConfigFiles(&r300->radeon.optionCache, &screen->optionCache,
+ screen->driScreen->myNum, "r300");
+
+ r300->radeon.initialMaxAnisotropy = driQueryOptionf(&r300->radeon.optionCache, "def_max_anisotropy");
+
+ options.stencil_two_side_disabled = driQueryOptionb(&r300->radeon.optionCache, "disable_stencil_two_side");
+ options.s3tc_force_enabled = driQueryOptionb(&r300->radeon.optionCache, "force_s3tc_enable");
+ options.s3tc_force_disabled = driQueryOptionb(&r300->radeon.optionCache, "disable_s3tc");
+
+ if (!(screen->chip_flags & RADEON_CHIPSET_TCL) || driQueryOptioni(&r300->radeon.optionCache, "tcl_mode") == DRI_CONF_TCL_SW)
+ options.hw_tcl_enabled = 0;
+ else
+ options.hw_tcl_enabled = 1;
+
+ options.conformance_mode = !driQueryOptionb(&r300->radeon.optionCache, "disable_lowimpact_fallback");
+
+ r300->options = options;
+}
+
+static void r300InitGLExtensions(GLcontext *ctx)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+ driInitExtensions(ctx, card_extensions, GL_TRUE);
+ if (r300->radeon.radeonScreen->kernel_mm)
+ driInitExtensions(ctx, mm_extensions, GL_FALSE);
+
+ if (r300->options.stencil_two_side_disabled)
+ _mesa_disable_extension(ctx, "GL_EXT_stencil_two_side");
+
+ if (r300->options.s3tc_force_disabled) {
+ _mesa_disable_extension(ctx, "GL_EXT_texture_compression_s3tc");
+ } else if (ctx->Mesa_DXTn || r300->options.s3tc_force_enabled) {
+ _mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc");
+ _mesa_enable_extension(ctx, "GL_S3_s3tc");
+ }
+
+ if (!r300->radeon.radeonScreen->drmSupportsOcclusionQueries) {
+ _mesa_disable_extension(ctx, "GL_ARB_occlusion_query");
+ }
+ if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV350)
+ _mesa_enable_extension(ctx, "GL_ARB_half_float_vertex");
+
+ if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
+ _mesa_enable_extension(ctx, "GL_EXT_packed_depth_stencil");
+}
+
+static void r300InitIoctlFuncs(struct dd_function_table *functions)
+{
+ functions->Clear = _mesa_meta_Clear;
+ functions->Finish = radeonFinish;
+ functions->Flush = radeonFlush;
+}
+
+/* Create the device specific rendering context.
+ */
+GLboolean r300CreateContext(gl_api api,
+ const __GLcontextModes * glVisual,
+ __DRIcontext * driContextPriv,
+ void *sharedContextPrivate)
+{
+ __DRIscreen *sPriv = driContextPriv->driScreenPriv;
+ radeonScreenPtr screen = (radeonScreenPtr) (sPriv->private);
+ struct dd_function_table functions;
+ r300ContextPtr r300;
+ GLcontext *ctx;
+
+ assert(glVisual);
+ assert(driContextPriv);
+ assert(screen);
+
+ r300 = (r300ContextPtr) CALLOC(sizeof(*r300));
+ if (!r300)
+ return GL_FALSE;
+
+ r300ParseOptions(r300, screen);
+
+ r300->radeon.radeonScreen = screen;
+ r300_init_vtbl(&r300->radeon);
+
+ _mesa_init_driver_functions(&functions);
+ r300InitIoctlFuncs(&functions);
+ r300InitStateFuncs(&r300->radeon, &functions);
+ r300InitTextureFuncs(&r300->radeon, &functions);
+ r300InitShaderFuncs(&functions);
+ radeonInitQueryObjFunctions(&functions);
+ radeonInitBufferObjectFuncs(&functions);
+
+ if (!radeonInitContext(&r300->radeon, &functions,
+ glVisual, driContextPriv,
+ sharedContextPrivate)) {
+ FREE(r300);
+ return GL_FALSE;
+ }
+
+ ctx = r300->radeon.glCtx;
+
+ r300->fallback = 0;
+ if (r300->options.hw_tcl_enabled)
+ ctx->VertexProgram._MaintainTnlProgram = GL_TRUE;
+
+ ctx->FragmentProgram._MaintainTexEnvProgram = GL_TRUE;
+
+ r300InitConstValues(ctx, screen);
+
+ _mesa_set_mvp_with_dp4( ctx, GL_TRUE );
+
+ /* Initialize the software rasterizer and helper modules.
+ */
+ _swrast_CreateContext(ctx);
+ _vbo_CreateContext(ctx);
+ _tnl_CreateContext(ctx);
+ _swsetup_CreateContext(ctx);
+ _swsetup_Wakeup(ctx);
+
+ /* Install the customized pipeline:
+ */
+ _tnl_destroy_pipeline(ctx);
+ _tnl_install_pipeline(ctx, r300_pipeline);
+ TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
+
+ /* Configure swrast and TNL to match hardware characteristics:
+ */
+ _swrast_allow_pixel_fog(ctx, GL_FALSE);
+ _swrast_allow_vertex_fog(ctx, GL_TRUE);
+ _tnl_allow_pixel_fog(ctx, GL_FALSE);
+ _tnl_allow_vertex_fog(ctx, GL_TRUE);
+
+ if (r300->options.hw_tcl_enabled) {
+ r300InitDraw(ctx);
+ } else {
+ r300InitSwtcl(ctx);
+ }
+
+ r300_blit_init(r300);
+ radeon_fbo_init(&r300->radeon);
+ radeonInitSpanFuncs( ctx );
+ r300InitCmdBuf(r300);
+ r300InitState(r300);
+ r300InitShaderFunctions(r300);
+
+ r300InitGLExtensions(ctx);
+
+ return GL_TRUE;
+}
+
diff --git a/src/mesa/drivers/dri/r300/r300_context.h b/src/mesa/drivers/dri/r300/r300_context.h
new file mode 100644
index 0000000000..fbb609b9f6
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_context.h
@@ -0,0 +1,560 @@
+/*
+Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/**
+ * \file
+ *
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ * \author Nicolai Haehnle <prefect_@gmx.net>
+ */
+
+#ifndef __R300_CONTEXT_H__
+#define __R300_CONTEXT_H__
+
+#include "drm.h"
+#include "radeon_drm.h"
+#include "dri_util.h"
+#include "radeon_common.h"
+
+#include "main/mtypes.h"
+#include "shader/prog_instruction.h"
+#include "compiler/radeon_code.h"
+
+struct r300_context;
+typedef struct r300_context r300ContextRec;
+typedef struct r300_context *r300ContextPtr;
+
+
+#include "r300_vertprog.h"
+
+
+/* The blit width for texture uploads
+ */
+#define R300_BLIT_WIDTH_BYTES 1024
+#define R300_MAX_TEXTURE_UNITS 8
+
+
+
+#define R300_VPT_CMD_0 0
+#define R300_VPT_XSCALE 1
+#define R300_VPT_XOFFSET 2
+#define R300_VPT_YSCALE 3
+#define R300_VPT_YOFFSET 4
+#define R300_VPT_ZSCALE 5
+#define R300_VPT_ZOFFSET 6
+#define R300_VPT_CMDSIZE 7
+
+#define R300_VIR_CMD_0 0 /* vir is variable size (at least 1) */
+#define R300_VIR_CNTL_0 1
+#define R300_VIR_CNTL_1 2
+#define R300_VIR_CNTL_2 3
+#define R300_VIR_CNTL_3 4
+#define R300_VIR_CNTL_4 5
+#define R300_VIR_CNTL_5 6
+#define R300_VIR_CNTL_6 7
+#define R300_VIR_CNTL_7 8
+#define R300_VIR_CMDSIZE 9
+
+#define R300_VIC_CMD_0 0
+#define R300_VIC_CNTL_0 1
+#define R300_VIC_CNTL_1 2
+#define R300_VIC_CMDSIZE 3
+
+#define R300_VOF_CMD_0 0
+#define R300_VOF_CNTL_0 1
+#define R300_VOF_CNTL_1 2
+#define R300_VOF_CMDSIZE 3
+
+#define R300_PVS_CMD_0 0
+#define R300_PVS_CNTL_1 1
+#define R300_PVS_CNTL_2 2
+#define R300_PVS_CNTL_3 3
+#define R300_PVS_CMDSIZE 4
+
+#define R300_GB_MISC_CMD_0 0
+#define R300_GB_MISC_MSPOS_0 1
+#define R300_GB_MISC_MSPOS_1 2
+#define R300_GB_MISC_TILE_CONFIG 3
+#define R300_GB_MISC_CMDSIZE 4
+#define R300_GB_MISC2_CMD_0 0
+#define R300_GB_MISC2_SELECT 1
+#define R300_GB_MISC2_AA_CONFIG 2
+#define R300_GB_MISC2_CMDSIZE 3
+
+#define R300_TXE_CMD_0 0
+#define R300_TXE_ENABLE 1
+#define R300_TXE_CMDSIZE 2
+
+#define R300_PS_CMD_0 0
+#define R300_PS_POINTSIZE 1
+#define R300_PS_CMDSIZE 2
+
+#define R300_ZBS_CMD_0 0
+#define R300_ZBS_T_FACTOR 1
+#define R300_ZBS_T_CONSTANT 2
+#define R300_ZBS_W_FACTOR 3
+#define R300_ZBS_W_CONSTANT 4
+#define R300_ZBS_CMDSIZE 5
+
+#define R300_CUL_CMD_0 0
+#define R300_CUL_CULL 1
+#define R300_CUL_CMDSIZE 2
+
+#define R300_RC_CMD_0 0
+#define R300_RC_CNTL_0 1
+#define R300_RC_CNTL_1 2
+#define R300_RC_CMDSIZE 3
+
+#define R300_RI_CMD_0 0
+#define R300_RI_INTERP_0 1
+#define R300_RI_INTERP_1 2
+#define R300_RI_INTERP_2 3
+#define R300_RI_INTERP_3 4
+#define R300_RI_INTERP_4 5
+#define R300_RI_INTERP_5 6
+#define R300_RI_INTERP_6 7
+#define R300_RI_INTERP_7 8
+#define R300_RI_CMDSIZE 9
+
+#define R500_RI_CMDSIZE 17
+
+#define R300_RR_CMD_0 0 /* rr is variable size (at least 1) */
+#define R300_RR_INST_0 1
+#define R300_RR_INST_1 2
+#define R300_RR_INST_2 3
+#define R300_RR_INST_3 4
+#define R300_RR_INST_4 5
+#define R300_RR_INST_5 6
+#define R300_RR_INST_6 7
+#define R300_RR_INST_7 8
+#define R300_RR_CMDSIZE 9
+
+#define R300_FP_CMD_0 0
+#define R300_FP_CNTL0 1
+#define R300_FP_CNTL1 2
+#define R300_FP_CNTL2 3
+#define R300_FP_CMD_1 4
+#define R300_FP_NODE0 5
+#define R300_FP_NODE1 6
+#define R300_FP_NODE2 7
+#define R300_FP_NODE3 8
+#define R300_FP_CMDSIZE 9
+
+#define R500_FP_CMD_0 0
+#define R500_FP_CNTL 1
+#define R500_FP_PIXSIZE 2
+#define R500_FP_CMD_1 3
+#define R500_FP_CODE_ADDR 4
+#define R500_FP_CODE_RANGE 5
+#define R500_FP_CODE_OFFSET 6
+#define R500_FP_CMD_2 7
+#define R500_FP_FC_CNTL 8
+#define R500_FP_CMDSIZE 9
+
+#define R300_FPT_CMD_0 0
+#define R300_FPT_INSTR_0 1
+#define R300_FPT_CMDSIZE 65
+
+#define R300_FPI_CMD_0 0
+#define R300_FPI_INSTR_0 1
+#define R300_FPI_CMDSIZE 65
+/* R500 has space for 512 instructions - 6 dwords per instruction */
+#define R500_FPI_CMDSIZE (512*6+1)
+
+#define R300_FPP_CMD_0 0
+#define R300_FPP_PARAM_0 1
+#define R300_FPP_CMDSIZE (32*4+1)
+/* R500 has spcae for 256 constants - 4 dwords per constant */
+#define R500_FPP_CMDSIZE (256*4+1)
+
+#define R300_FOGS_CMD_0 0
+#define R300_FOGS_STATE 1
+#define R300_FOGS_CMDSIZE 2
+
+#define R300_FOGC_CMD_0 0
+#define R300_FOGC_R 1
+#define R300_FOGC_G 2
+#define R300_FOGC_B 3
+#define R300_FOGC_CMDSIZE 4
+
+#define R300_FOGP_CMD_0 0
+#define R300_FOGP_SCALE 1
+#define R300_FOGP_START 2
+#define R300_FOGP_CMDSIZE 3
+
+#define R300_AT_CMD_0 0
+#define R300_AT_ALPHA_TEST 1
+#define R300_AT_UNKNOWN 2
+#define R300_AT_CMDSIZE 3
+
+#define R300_BLD_CMD_0 0
+#define R300_BLD_CBLEND 1
+#define R300_BLD_ABLEND 2
+#define R300_BLD_CMDSIZE 3
+
+#define R300_CMK_CMD_0 0
+#define R300_CMK_COLORMASK 1
+#define R300_CMK_CMDSIZE 2
+
+#define R300_CB_CMD_0 0
+#define R300_CB_OFFSET 1
+#define R300_CB_CMD_1 2
+#define R300_CB_PITCH 3
+#define R300_CB_CMDSIZE 4
+
+#define R300_ZS_CMD_0 0
+#define R300_ZS_CNTL_0 1
+#define R300_ZS_CNTL_1 2
+#define R300_ZS_CNTL_2 3
+#define R300_ZS_CMDSIZE 4
+
+#define R300_ZSB_CMD_0 0
+#define R300_ZSB_CNTL_0 1
+#define R300_ZSB_CMDSIZE 2
+
+#define R300_ZB_CMD_0 0
+#define R300_ZB_OFFSET 1
+#define R300_ZB_PITCH 2
+#define R300_ZB_CMDSIZE 3
+
+#define R300_VAP_CNTL_FLUSH 0
+#define R300_VAP_CNTL_FLUSH_1 1
+#define R300_VAP_CNTL_CMD 2
+#define R300_VAP_CNTL_INSTR 3
+#define R300_VAP_CNTL_SIZE 4
+
+#define R300_VPI_CMD_0 0
+#define R300_VPI_INSTR_0 1
+#define R300_VPI_CMDSIZE 1025 /* 256 16 byte instructions */
+
+#define R300_VPP_CMD_0 0
+#define R300_VPP_PARAM_0 1
+#define R300_VPP_CMDSIZE 1025 /* 256 4-component parameters */
+
+#define R300_VPUCP_CMD_0 0
+#define R300_VPUCP_X 1
+#define R300_VPUCP_Y 2
+#define R300_VPUCP_Z 3
+#define R300_VPUCP_W 4
+#define R300_VPUCP_CMDSIZE 5 /* 256 4-component parameters */
+
+#define R300_VPS_CMD_0 0
+#define R300_VPS_ZERO_0 1
+#define R300_VPS_ZERO_1 2
+#define R300_VPS_POINTSIZE 3
+#define R300_VPS_ZERO_3 4
+#define R300_VPS_CMDSIZE 5
+
+ /* the layout is common for all fields inside tex */
+#define R300_TEX_CMD_0 0
+#define R300_TEX_VALUE_0 1
+/* We don't really use this, instead specify mtu+1 dynamically
+#define R300_TEX_CMDSIZE (MAX_TEXTURE_UNITS+1)
+*/
+
+#define R300_QUERYOBJ_CMD_0 0
+#define R300_QUERYOBJ_DATA_0 1
+#define R300_QUERYOBJ_CMD_1 2
+#define R300_QUERYOBJ_DATA_1 3
+#define R300_QUERYOBJ_CMDSIZE 4
+
+/**
+ * Cache for hardware register state.
+ */
+struct r300_hw_state {
+ struct radeon_state_atom vpt; /* viewport (1D98) */
+ struct radeon_state_atom vap_cntl;
+ struct radeon_state_atom vap_index_offset; /* 0x208c r5xx only */
+ struct radeon_state_atom vof; /* VAP output format register 0x2090 */
+ struct radeon_state_atom vte; /* (20B0) */
+ struct radeon_state_atom vap_vf_max_vtx_indx; /* Maximum Vertex Indx Clamp (2134) */
+ struct radeon_state_atom vap_cntl_status;
+ struct radeon_state_atom vir[2]; /* vap input route (2150/21E0) */
+ struct radeon_state_atom vic; /* vap input control (2180) */
+ struct radeon_state_atom vap_psc_sgn_norm_cntl; /* Programmable Stream Control Signed Normalize Control (21DC) */
+ struct radeon_state_atom vap_clip_cntl;
+ struct radeon_state_atom vap_clip;
+ struct radeon_state_atom vap_pvs_vtx_timeout_reg; /* Vertex timeout register (2288) */
+ struct radeon_state_atom pvs; /* pvs_cntl (22D0) */
+ struct radeon_state_atom gb_enable; /* (4008) */
+ struct radeon_state_atom gb_misc; /* Multisampling position shifts ? (4010) */
+ struct radeon_state_atom gb_misc2; /* Multisampling position shifts ? (4010) */
+ struct radeon_state_atom ga_point_s0; /* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) (4200) */
+ struct radeon_state_atom ga_triangle_stipple; /* (4214) */
+ struct radeon_state_atom ps; /* pointsize (421C) */
+ struct radeon_state_atom ga_point_minmax; /* (4230) */
+ struct radeon_state_atom lcntl; /* line control */
+ struct radeon_state_atom ga_line_stipple; /* (4260) */
+ struct radeon_state_atom shade;
+ struct radeon_state_atom shade2;
+ struct radeon_state_atom polygon_mode;
+ struct radeon_state_atom fogp; /* fog parameters (4294) */
+ struct radeon_state_atom ga_soft_reset; /* (429C) */
+ struct radeon_state_atom zbias_cntl;
+ struct radeon_state_atom zbs; /* zbias (42A4) */
+ struct radeon_state_atom occlusion_cntl;
+ struct radeon_state_atom cul; /* cull cntl (42B8) */
+ struct radeon_state_atom su_depth_scale; /* (42C0) */
+ struct radeon_state_atom rc; /* rs control (4300) */
+ struct radeon_state_atom ri; /* rs interpolators (4310) */
+ struct radeon_state_atom rr; /* rs route (4330) */
+ struct radeon_state_atom sc_hyperz; /* (43A4) */
+ struct radeon_state_atom sc_screendoor; /* (43E8) */
+ struct radeon_state_atom fp; /* fragment program cntl + nodes (4600) */
+ struct radeon_state_atom fpt; /* texi - (4620) */
+ struct radeon_state_atom us_out_fmt; /* (46A4) */
+ struct radeon_state_atom r500fp; /* r500 fp instructions */
+ struct radeon_state_atom r500fp_const; /* r500 fp constants */
+ struct radeon_state_atom fpi[4]; /* fp instructions (46C0/47C0/48C0/49C0) */
+ struct radeon_state_atom fogs; /* fog state (4BC0) */
+ struct radeon_state_atom fogc; /* fog color (4BC8) */
+ struct radeon_state_atom at; /* alpha test (4BD4) */
+ struct radeon_state_atom fg_depth_src; /* (4BD8) */
+ struct radeon_state_atom fpp; /* 0x4C00 and following */
+ struct radeon_state_atom rb3d_cctl; /* (4E00) */
+ struct radeon_state_atom bld; /* blending (4E04) */
+ struct radeon_state_atom cmk; /* colormask (4E0C) */
+ struct radeon_state_atom blend_color; /* constant blend color */
+ struct radeon_state_atom rop; /* ropcntl */
+ struct radeon_state_atom cb; /* colorbuffer (4E28) */
+ struct radeon_state_atom rb3d_dither_ctl; /* (4E50) */
+ struct radeon_state_atom rb3d_aaresolve_ctl; /* (4E88) */
+ struct radeon_state_atom rb3d_discard_src_pixel_lte_threshold; /* (4E88) I saw it only written on RV350 hardware.. */
+ struct radeon_state_atom zs; /* zstencil control (4F00) */
+ struct radeon_state_atom zsb; /* zstencil bf */
+ struct radeon_state_atom zstencil_format;
+ struct radeon_state_atom zb; /* z buffer (4F20) */
+ struct radeon_state_atom zb_depthclearvalue; /* (4F28) */
+ struct radeon_state_atom zb_zmask; /* (4F30) */
+ struct radeon_state_atom zb_hiz_offset; /* (4F44) */
+ struct radeon_state_atom zb_hiz_pitch; /* (4F54) */
+
+ struct radeon_state_atom vpi; /* vp instructions */
+ struct radeon_state_atom vpp; /* vp parameters */
+ struct radeon_state_atom vps; /* vertex point size (?) */
+ struct radeon_state_atom vpucp[6]; /* vp user clip plane - 6 */
+ /* 8 texture units */
+ /* the state is grouped by function and not by
+ texture unit. This makes single unit updates
+ really awkward - we are much better off
+ updating the whole thing at once */
+ struct {
+ struct radeon_state_atom filter;
+ struct radeon_state_atom filter_1;
+ struct radeon_state_atom size;
+ struct radeon_state_atom format;
+ struct radeon_state_atom pitch;
+ struct radeon_state_atom offset;
+ struct radeon_state_atom chroma_key;
+ struct radeon_state_atom border_color;
+ } tex;
+ struct radeon_state_atom txe; /* tex enable (4104) */
+ radeonTexObj *textures[R300_MAX_TEXTURE_UNITS];
+};
+
+/**
+ * State cache
+ */
+
+/* Vertex shader state */
+
+#define COLOR_IS_RGBA
+#define TAG(x) r300##x
+#include "tnl_dd/t_dd_vertex.h"
+#undef TAG
+
+struct r300_vertex_program_key {
+ GLbitfield FpReads;
+ GLuint FogAttr;
+ GLuint WPosAttr;
+};
+
+struct r300_vertex_program {
+ struct gl_vertex_program *Base;
+ struct r300_vertex_program *next;
+
+ struct r300_vertex_program_key key;
+ struct r300_vertex_program_code code;
+
+ GLboolean error;
+};
+
+struct r300_vertex_program_cont {
+ /* This is the unmodified vertex program mesa provided us with.
+ * We need to keep it unchanged because we may need to create another
+ * hw specific vertex program based on this.
+ */
+ struct gl_vertex_program mesa_program;
+ /* This is the list of hw specific vertex programs derived from mesa_program */
+ struct r300_vertex_program *progs;
+};
+
+
+/**
+* Store everything about a fragment program that is needed
+* to render with that program.
+*/
+struct r300_fragment_program {
+ GLboolean error;
+ struct r300_fragment_program *next;
+ struct r300_fragment_program_external_state state;
+
+ struct rX00_fragment_program_code code;
+ GLbitfield InputsRead;
+
+ /* attribute that we are sending the WPOS in */
+ gl_frag_attrib wpos_attr;
+ /* attribute that we are sending the fog coordinate in */
+ gl_frag_attrib fog_attr;
+};
+
+struct r300_fragment_program_cont {
+ /* This is the unmodified fragment program mesa provided us with.
+ * We need to keep it unchanged because we may need to create another
+ * hw specific fragment program based on this.
+ */
+ struct gl_fragment_program Base;
+ /* This is the list of hw specific fragment programs derived from Base */
+ struct r300_fragment_program *progs;
+};
+
+
+#define R300_MAX_AOS_ARRAYS 16
+
+
+/* r300_swtcl.c
+ */
+struct r300_swtcl_info {
+ /*
+ * Offset of the 4UB color data within a hardware (swtcl) vertex.
+ */
+ GLuint coloroffset;
+
+ /**
+ * Offset of the 3UB specular color data within a hardware (swtcl) vertex.
+ */
+ GLuint specoffset;
+};
+
+struct r300_vtable {
+ void (* SetupRSUnit)(GLcontext *ctx);
+ void (* SetupFragmentShaderTextures)(GLcontext *ctx, int *tmu_mappings);
+ void (* SetupPixelShader)(GLcontext *ctx);
+};
+
+struct r300_vertex_buffer {
+ struct vertex_attribute {
+ /* generic */
+ GLubyte element;
+ GLuint stride;
+ GLuint dwords;
+ GLubyte size; /* number of components */
+ GLboolean is_named_bo;
+ struct radeon_bo *bo;
+ GLint bo_offset;
+
+ /* hw specific */
+ uint32_t data_type:4;
+ uint32_t dst_loc:5;
+ uint32_t _signed:1;
+ uint32_t normalize:1;
+ uint32_t swizzle:12;
+ uint32_t write_mask:4;
+ } attribs[VERT_ATTRIB_MAX];
+
+ GLubyte num_attribs;
+};
+
+struct r300_index_buffer {
+ struct radeon_bo *bo;
+ int bo_offset;
+
+ GLboolean is_32bit;
+ GLuint count;
+};
+
+
+/**
+ * \brief R300 context structure.
+ */
+struct r300_context {
+ struct radeon_context radeon; /* parent class, must be first */
+
+ struct r300_vtable vtbl;
+
+ struct r300_hw_state hw;
+
+ struct r300_vertex_program *selected_vp;
+ struct r300_fragment_program *selected_fp;
+
+ /* Vertex buffers
+ */
+ GLvector4f dummy_attrib[_TNL_ATTRIB_MAX];
+ GLvector4f *temp_attrib[_TNL_ATTRIB_MAX];
+
+ struct r300_options {
+ uint32_t conformance_mode:1;
+ uint32_t hw_tcl_enabled:1;
+ uint32_t s3tc_force_enabled:1;
+ uint32_t s3tc_force_disabled:1;
+ uint32_t stencil_two_side_disabled:1;
+ } options;
+
+ struct r300_swtcl_info swtcl;
+ struct r300_vertex_buffer vbuf;
+ struct r300_index_buffer ind_buf;
+
+ uint32_t fallback;
+
+ struct {
+ struct r300_vertex_program_code vp_code;
+ struct rX00_fragment_program_code fp_code;
+ } blit;
+
+ DECLARE_RENDERINPUTS(render_inputs_bitset);
+};
+
+#define R300_CONTEXT(ctx) ((r300ContextPtr)(ctx->DriverCtx))
+
+extern void r300DestroyContext(__DRIcontext * driContextPriv);
+extern GLboolean r300CreateContext(gl_api api,
+ const __GLcontextModes * glVisual,
+ __DRIcontext * driContextPriv,
+ void *sharedContextPrivate);
+
+extern void r300InitShaderFuncs(struct dd_function_table *functions);
+
+extern void r300InitShaderFunctions(r300ContextPtr r300);
+
+extern void r300InitDraw(GLcontext *ctx);
+
+#define r300PackFloat32 radeonPackFloat32
+#define r300PackFloat24 radeonPackFloat24
+
+#endif /* __R300_CONTEXT_H__ */
diff --git a/src/mesa/drivers/dri/r300/r300_draw.c b/src/mesa/drivers/dri/r300/r300_draw.c
new file mode 100644
index 0000000000..282c0e18bc
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_draw.c
@@ -0,0 +1,750 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Maciej Cencora
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHOR(S) AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include <stdlib.h>
+
+#include "main/glheader.h"
+#include "main/context.h"
+#include "main/state.h"
+#include "main/enums.h"
+#include "main/simple_list.h"
+
+#include "r300_reg.h"
+#include "r300_context.h"
+#include "r300_emit.h"
+#include "r300_render.h"
+#include "r300_state.h"
+#include "r300_tex.h"
+#include "r300_cmdbuf.h"
+
+#include "radeon_buffer_objects.h"
+#include "radeon_common_context.h"
+
+#include "tnl/tnl.h"
+#include "tnl/t_vp_build.h"
+#include "vbo/vbo_context.h"
+
+
+static int getTypeSize(GLenum type)
+{
+ switch (type) {
+ case GL_DOUBLE:
+ return sizeof(GLdouble);
+ case GL_HALF_FLOAT:
+ return sizeof(GLhalfARB);
+ case GL_FLOAT:
+ return sizeof(GLfloat);
+ case GL_INT:
+ return sizeof(GLint);
+ case GL_UNSIGNED_INT:
+ return sizeof(GLuint);
+ case GL_SHORT:
+ return sizeof(GLshort);
+ case GL_UNSIGNED_SHORT:
+ return sizeof(GLushort);
+ case GL_BYTE:
+ return sizeof(GLbyte);
+ case GL_UNSIGNED_BYTE:
+ return sizeof(GLubyte);
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+static void r300FixupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ GLvoid *src_ptr;
+ GLuint *out;
+ int i;
+ GLboolean mapped_named_bo = GL_FALSE;
+
+ if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) {
+ ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+ mapped_named_bo = GL_TRUE;
+ assert(mesa_ind_buf->obj->Pointer != NULL);
+ }
+ src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr);
+
+ radeon_print(RADEON_FALLBACKS, RADEON_IMPORTANT,
+ "%s: Fixing index buffer format. type %d\n",
+ __func__, mesa_ind_buf->type);
+
+ if (mesa_ind_buf->type == GL_UNSIGNED_BYTE) {
+ GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1);
+ GLubyte *in = (GLubyte *)src_ptr;
+
+ radeonAllocDmaRegion(&r300->radeon, &r300->ind_buf.bo, &r300->ind_buf.bo_offset, size, 4);
+ radeon_bo_map(r300->ind_buf.bo, 1);
+ assert(r300->ind_buf.bo->ptr != NULL);
+ out = (GLuint *)ADD_POINTERS(r300->ind_buf.bo->ptr, r300->ind_buf.bo_offset);
+
+ for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) {
+ *out++ = in[i] | in[i + 1] << 16;
+ }
+
+ if (i < mesa_ind_buf->count) {
+ *out++ = in[i];
+ }
+ radeon_bo_unmap(r300->ind_buf.bo);
+#if MESA_BIG_ENDIAN
+ } else { /* if (mesa_ind_buf->type == GL_UNSIGNED_SHORT) */
+ GLushort *in = (GLushort *)src_ptr;
+ GLuint size = sizeof(GLushort) * ((mesa_ind_buf->count + 1) & ~1);
+
+ radeonAllocDmaRegion(&r300->radeon, &r300->ind_buf.bo,
+ &r300->ind_buf.bo_offset, size, 4);
+
+ radeon_bo_map(r300->ind_buf.bo, 1);
+ assert(r300->ind_buf.bo->ptr != NULL);
+ out = (GLuint *)ADD_POINTERS(r300->ind_buf.bo->ptr, r300->ind_buf.bo_offset);
+
+ for (i = 0; i + 1 < mesa_ind_buf->count; i += 2) {
+ *out++ = in[i] | in[i + 1] << 16;
+ }
+
+ if (i < mesa_ind_buf->count) {
+ *out++ = in[i];
+ }
+ radeon_bo_unmap(r300->ind_buf.bo);
+#endif
+ }
+
+ r300->ind_buf.is_32bit = GL_FALSE;
+ r300->ind_buf.count = mesa_ind_buf->count;
+
+ if (mapped_named_bo) {
+ ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+ }
+}
+
+
+static void r300SetupIndexBuffer(GLcontext *ctx, const struct _mesa_index_buffer *mesa_ind_buf)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+ if (!mesa_ind_buf) {
+ r300->ind_buf.bo = NULL;
+ return;
+ }
+ radeon_print(RADEON_RENDER, RADEON_TRACE, "%s\n", __func__);
+
+#if MESA_BIG_ENDIAN
+ if (mesa_ind_buf->type == GL_UNSIGNED_INT) {
+#else
+ if (mesa_ind_buf->type != GL_UNSIGNED_BYTE) {
+#endif
+ const GLvoid *src_ptr;
+ GLvoid *dst_ptr;
+ GLboolean mapped_named_bo = GL_FALSE;
+
+ if (mesa_ind_buf->obj->Name && !mesa_ind_buf->obj->Pointer) {
+ ctx->Driver.MapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, GL_READ_ONLY_ARB, mesa_ind_buf->obj);
+ assert(mesa_ind_buf->obj->Pointer != NULL);
+ mapped_named_bo = GL_TRUE;
+ }
+
+ src_ptr = ADD_POINTERS(mesa_ind_buf->obj->Pointer, mesa_ind_buf->ptr);
+
+ const GLuint size = mesa_ind_buf->count * getTypeSize(mesa_ind_buf->type);
+
+ radeonAllocDmaRegion(&r300->radeon, &r300->ind_buf.bo, &r300->ind_buf.bo_offset, size, 4);
+
+ radeon_bo_map(r300->ind_buf.bo, 1);
+ assert(r300->ind_buf.bo->ptr != NULL);
+ dst_ptr = ADD_POINTERS(r300->ind_buf.bo->ptr, r300->ind_buf.bo_offset);
+ memcpy(dst_ptr, src_ptr, size);
+
+ radeon_bo_unmap(r300->ind_buf.bo);
+ r300->ind_buf.is_32bit = (mesa_ind_buf->type == GL_UNSIGNED_INT);
+ r300->ind_buf.count = mesa_ind_buf->count;
+
+ if (mapped_named_bo) {
+ ctx->Driver.UnmapBuffer(ctx, GL_ELEMENT_ARRAY_BUFFER, mesa_ind_buf->obj);
+ }
+ } else {
+ r300FixupIndexBuffer(ctx, mesa_ind_buf);
+ }
+}
+
+#define CONVERT( TYPE, MACRO ) do { \
+ GLuint i, j, sz; \
+ sz = input->Size; \
+ if (input->Normalized) { \
+ for (i = 0; i < count; i++) { \
+ const TYPE *in = (TYPE *)src_ptr; \
+ for (j = 0; j < sz; j++) { \
+ *dst_ptr++ = MACRO(*in); \
+ in++; \
+ } \
+ src_ptr += stride; \
+ } \
+ } else { \
+ for (i = 0; i < count; i++) { \
+ const TYPE *in = (TYPE *)src_ptr; \
+ for (j = 0; j < sz; j++) { \
+ *dst_ptr++ = (GLfloat)(*in); \
+ in++; \
+ } \
+ src_ptr += stride; \
+ } \
+ } \
+} while (0)
+
+/**
+ * Convert attribute data type to float
+ * If the attribute uses named buffer object replace the bo with newly allocated bo
+ */
+static void r300ConvertAttrib(GLcontext *ctx, int count, const struct gl_client_array *input, struct vertex_attribute *attr)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ const GLvoid *src_ptr;
+ GLboolean mapped_named_bo = GL_FALSE;
+ GLfloat *dst_ptr;
+ GLuint stride;
+
+ stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size : input->StrideB;
+
+ /* Convert value for first element only */
+ if (input->StrideB == 0)
+ count = 1;
+
+ if (input->BufferObj->Name) {
+ if (!input->BufferObj->Pointer) {
+ ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+ mapped_named_bo = GL_TRUE;
+ }
+
+ src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr);
+ } else {
+ src_ptr = input->Ptr;
+ }
+
+ radeonAllocDmaRegion(&r300->radeon, &attr->bo, &attr->bo_offset, sizeof(GLfloat) * input->Size * count, 32);
+ radeon_bo_map(attr->bo, 1);
+ dst_ptr = (GLfloat *)ADD_POINTERS(attr->bo->ptr, attr->bo_offset);
+
+ radeon_print(RADEON_FALLBACKS, RADEON_IMPORTANT,
+ "%s: Converting vertex attributes, attribute data format %x,"
+ "stride %d, components %d\n"
+ , __FUNCTION__, input->Type
+ , stride, input->Size);
+
+ assert(src_ptr != NULL);
+
+ switch (input->Type) {
+ case GL_DOUBLE:
+ CONVERT(GLdouble, (GLfloat));
+ break;
+ case GL_UNSIGNED_INT:
+ CONVERT(GLuint, UINT_TO_FLOAT);
+ break;
+ case GL_INT:
+ CONVERT(GLint, INT_TO_FLOAT);
+ break;
+ case GL_UNSIGNED_SHORT:
+ CONVERT(GLushort, USHORT_TO_FLOAT);
+ break;
+ case GL_SHORT:
+ CONVERT(GLshort, SHORT_TO_FLOAT);
+ break;
+ case GL_UNSIGNED_BYTE:
+ assert(input->Format != GL_BGRA);
+ CONVERT(GLubyte, UBYTE_TO_FLOAT);
+ break;
+ case GL_BYTE:
+ CONVERT(GLbyte, BYTE_TO_FLOAT);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ radeon_bo_unmap(attr->bo);
+ if (mapped_named_bo) {
+ ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+ }
+}
+
+static void r300AlignDataToDword(GLcontext *ctx, const struct gl_client_array *input, int count, struct vertex_attribute *attr)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ const int dst_stride = (input->StrideB + 3) & ~3;
+ const int size = getTypeSize(input->Type) * input->Size * count;
+ GLboolean mapped_named_bo = GL_FALSE;
+
+ radeonAllocDmaRegion(&r300->radeon, &attr->bo, &attr->bo_offset, size, 32);
+
+ radeon_bo_map(attr->bo, 1);
+
+ if (!input->BufferObj->Pointer) {
+ ctx->Driver.MapBuffer(ctx, GL_ARRAY_BUFFER, GL_READ_ONLY_ARB, input->BufferObj);
+ mapped_named_bo = GL_TRUE;
+ }
+
+ radeon_print(RADEON_FALLBACKS, RADEON_IMPORTANT, "%s. Vertex alignment doesn't match hw requirements.\n", __func__);
+
+ {
+ GLvoid *src_ptr = ADD_POINTERS(input->BufferObj->Pointer, input->Ptr);
+ GLvoid *dst_ptr = ADD_POINTERS(attr->bo->ptr, attr->bo_offset);
+ int i;
+
+ for (i = 0; i < count; ++i) {
+ memcpy(dst_ptr, src_ptr, input->StrideB);
+ src_ptr += input->StrideB;
+ dst_ptr += dst_stride;
+ }
+ }
+
+ if (mapped_named_bo) {
+ ctx->Driver.UnmapBuffer(ctx, GL_ARRAY_BUFFER, input->BufferObj);
+ }
+
+ radeon_bo_unmap(attr->bo);
+ attr->stride = dst_stride;
+}
+
+static void r300TranslateAttrib(GLcontext *ctx, GLuint attr, int count, const struct gl_client_array *input)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ struct r300_vertex_buffer *vbuf = &r300->vbuf;
+ struct vertex_attribute r300_attr = { 0 };
+ GLenum type;
+ GLuint stride;
+
+ radeon_print(RADEON_RENDER, RADEON_TRACE, "%s\n", __func__);
+ stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size : input->StrideB;
+
+ if (input->Type == GL_DOUBLE || input->Type == GL_UNSIGNED_INT || input->Type == GL_INT ||
+#if MESA_BIG_ENDIAN
+ getTypeSize(input->Type) != 4 ||
+#endif
+ stride < 4) {
+
+ type = GL_FLOAT;
+
+ if (input->StrideB == 0) {
+ r300_attr.stride = 0;
+ } else {
+ r300_attr.stride = sizeof(GLfloat) * input->Size;
+ }
+ r300_attr.dwords = input->Size;
+ r300_attr.is_named_bo = GL_FALSE;
+ } else {
+ type = input->Type;
+ r300_attr.dwords = (getTypeSize(type) * input->Size + 3)/ 4;
+ if (!input->BufferObj->Name) {
+
+ if (input->StrideB == 0) {
+ r300_attr.stride = 0;
+ } else {
+ r300_attr.stride = (getTypeSize(type) * input->Size + 3) & ~3;
+ }
+
+ r300_attr.is_named_bo = GL_FALSE;
+ }
+ }
+
+ r300_attr.size = input->Size;
+ r300_attr.element = attr;
+ r300_attr.dst_loc = vbuf->num_attribs;
+
+ switch (type) {
+ case GL_FLOAT:
+ switch (input->Size) {
+ case 1: r300_attr.data_type = R300_DATA_TYPE_FLOAT_1; break;
+ case 2: r300_attr.data_type = R300_DATA_TYPE_FLOAT_2; break;
+ case 3: r300_attr.data_type = R300_DATA_TYPE_FLOAT_3; break;
+ case 4: r300_attr.data_type = R300_DATA_TYPE_FLOAT_4; break;
+ }
+ r300_attr._signed = 0;
+ r300_attr.normalize = 0;
+ break;
+ case GL_HALF_FLOAT:
+ switch (input->Size) {
+ case 1:
+ case 2:
+ r300_attr.data_type = R300_DATA_TYPE_FLT16_2;
+ break;
+ case 3:
+ case 4:
+ r300_attr.data_type = R300_DATA_TYPE_FLT16_4;
+ break;
+ }
+ break;
+ case GL_SHORT:
+ r300_attr._signed = 1;
+ r300_attr.normalize = input->Normalized;
+ switch (input->Size) {
+ case 1:
+ case 2:
+ r300_attr.data_type = R300_DATA_TYPE_SHORT_2;
+ break;
+ case 3:
+ case 4:
+ r300_attr.data_type = R300_DATA_TYPE_SHORT_4;
+ break;
+ }
+ break;
+ case GL_BYTE:
+ r300_attr._signed = 1;
+ r300_attr.normalize = input->Normalized;
+ r300_attr.data_type = R300_DATA_TYPE_BYTE;
+ break;
+ case GL_UNSIGNED_SHORT:
+ r300_attr._signed = 0;
+ r300_attr.normalize = input->Normalized;
+ switch (input->Size) {
+ case 1:
+ case 2:
+ r300_attr.data_type = R300_DATA_TYPE_SHORT_2;
+ break;
+ case 3:
+ case 4:
+ r300_attr.data_type = R300_DATA_TYPE_SHORT_4;
+ break;
+ }
+ break;
+ case GL_UNSIGNED_BYTE:
+ r300_attr._signed = 0;
+ r300_attr.normalize = input->Normalized;
+ if (input->Format == GL_BGRA)
+ r300_attr.data_type = R300_DATA_TYPE_D3DCOLOR;
+ else
+ r300_attr.data_type = R300_DATA_TYPE_BYTE;
+ break;
+
+ default:
+ case GL_DOUBLE:
+ case GL_INT:
+ case GL_UNSIGNED_INT:
+ assert(0);
+ break;
+ }
+
+ switch (input->Size) {
+ case 4:
+ r300_attr.swizzle = SWIZZLE_XYZW;
+ break;
+ case 3:
+ r300_attr.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE);
+ break;
+ case 2:
+ r300_attr.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ONE);
+ break;
+ case 1:
+ r300_attr.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE);
+ break;
+ }
+
+ r300_attr.write_mask = MASK_XYZW;
+
+ vbuf->attribs[vbuf->num_attribs] = r300_attr;
+ ++vbuf->num_attribs;
+}
+
+static void r300SetVertexFormat(GLcontext *ctx, const struct gl_client_array *arrays[], int count)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ struct r300_vertex_buffer *vbuf = &r300->vbuf;
+ radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s\n", __func__);
+ {
+ int i, tmp;
+
+ tmp = r300->selected_vp->code.InputsRead;
+ i = 0;
+ vbuf->num_attribs = 0;
+ while (tmp) {
+ /* find first enabled bit */
+ while (!(tmp & 1)) {
+ tmp >>= 1;
+ ++i;
+ }
+
+ r300TranslateAttrib(ctx, i, count, arrays[i]);
+
+ tmp >>= 1;
+ ++i;
+ }
+ }
+
+ r300SwitchFallback(ctx, R300_FALLBACK_AOS_LIMIT, vbuf->num_attribs > R300_MAX_AOS_ARRAYS);
+ if (r300->fallback)
+ return;
+}
+
+static void r300AllocDmaRegions(GLcontext *ctx, const struct gl_client_array *input[], int count)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ struct r300_vertex_buffer *vbuf = &r300->vbuf;
+ GLuint stride;
+ int ret;
+ int i, index;
+ radeon_print(RADEON_RENDER, RADEON_VERBOSE,
+ "%s: count %d num_attribs %d\n",
+ __func__, count, vbuf->num_attribs);
+
+ for (index = 0; index < vbuf->num_attribs; index++) {
+ struct radeon_aos *aos = &r300->radeon.tcl.aos[index];
+ i = vbuf->attribs[index].element;
+
+ stride = (input[i]->StrideB == 0) ? getTypeSize(input[i]->Type) * input[i]->Size : input[i]->StrideB;
+
+ if (input[i]->Type == GL_DOUBLE || input[i]->Type == GL_UNSIGNED_INT || input[i]->Type == GL_INT ||
+#if MESA_BIG_ENDIAN
+ getTypeSize(input[i]->Type) != 4 ||
+#endif
+ stride < 4) {
+
+ r300ConvertAttrib(ctx, count, input[i], &vbuf->attribs[index]);
+ } else {
+ if (input[i]->BufferObj->Name) {
+ if (stride % 4 != 0) {
+ assert(((intptr_t) input[i]->Ptr) % input[i]->StrideB == 0);
+ r300AlignDataToDword(ctx, input[i], count, &vbuf->attribs[index]);
+ vbuf->attribs[index].is_named_bo = GL_FALSE;
+ } else {
+ vbuf->attribs[index].stride = input[i]->StrideB;
+ vbuf->attribs[index].bo_offset = (intptr_t) input[i]->Ptr;
+ vbuf->attribs[index].bo = get_radeon_buffer_object(input[i]->BufferObj)->bo;
+ vbuf->attribs[index].is_named_bo = GL_TRUE;
+ }
+ } else {
+
+ int size;
+ int local_count = count;
+ uint32_t *dst;
+
+ if (input[i]->StrideB == 0) {
+ size = getTypeSize(input[i]->Type) * input[i]->Size;
+ local_count = 1;
+ } else {
+ size = getTypeSize(input[i]->Type) * input[i]->Size * local_count;
+ }
+
+ radeonAllocDmaRegion(&r300->radeon, &vbuf->attribs[index].bo, &vbuf->attribs[index].bo_offset, size, 32);
+ radeon_bo_map(vbuf->attribs[index].bo, 1);
+ assert(vbuf->attribs[index].bo->ptr != NULL);
+ dst = (uint32_t *)ADD_POINTERS(vbuf->attribs[index].bo->ptr, vbuf->attribs[index].bo_offset);
+ switch (vbuf->attribs[index].dwords) {
+ case 1: radeonEmitVec4(dst, input[i]->Ptr, input[i]->StrideB, local_count); break;
+ case 2: radeonEmitVec8(dst, input[i]->Ptr, input[i]->StrideB, local_count); break;
+ case 3: radeonEmitVec12(dst, input[i]->Ptr, input[i]->StrideB, local_count); break;
+ case 4: radeonEmitVec16(dst, input[i]->Ptr, input[i]->StrideB, local_count); break;
+ default: assert(0); break;
+ }
+ radeon_bo_unmap(vbuf->attribs[index].bo);
+
+ }
+ }
+
+ aos->count = vbuf->attribs[index].stride == 0 ? 1 : count;
+ aos->stride = vbuf->attribs[index].stride / sizeof(float);
+ aos->components = vbuf->attribs[index].dwords;
+ aos->bo = vbuf->attribs[index].bo;
+ aos->offset = vbuf->attribs[index].bo_offset;
+
+ if (vbuf->attribs[index].is_named_bo) {
+ radeon_cs_space_add_persistent_bo(r300->radeon.cmdbuf.cs, r300->vbuf.attribs[index].bo, RADEON_GEM_DOMAIN_GTT, 0);
+ }
+ }
+
+ r300->radeon.tcl.aos_count = vbuf->num_attribs;
+ ret = radeon_cs_space_check_with_bo(r300->radeon.cmdbuf.cs, first_elem(&r300->radeon.dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0);
+ r300SwitchFallback(ctx, R300_FALLBACK_INVALID_BUFFERS, ret);
+
+}
+
+static void r300FreeData(GLcontext *ctx)
+{
+ /* Need to zero tcl.aos[n].bo and tcl.elt_dma_bo
+ * to prevent double unref in radeonReleaseArrays
+ * called during context destroy
+ */
+ radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s\n", __func__);
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ {
+ int i;
+
+ for (i = 0; i < r300->vbuf.num_attribs; i++) {
+ if (!r300->vbuf.attribs[i].is_named_bo) {
+ radeon_bo_unref(r300->vbuf.attribs[i].bo);
+ }
+ r300->radeon.tcl.aos[i].bo = NULL;
+ }
+ }
+
+ {
+ if (r300->ind_buf.bo != NULL) {
+ radeon_bo_unref(r300->ind_buf.bo);
+ }
+ }
+}
+
+static GLuint r300PredictTryDrawPrimsSize(GLcontext *ctx,
+ GLuint nr_prims, const struct _mesa_prim *prim)
+{
+ struct r300_context *r300 = R300_CONTEXT(ctx);
+ struct r300_vertex_buffer *vbuf = &r300->vbuf;
+ GLboolean flushed;
+ GLuint dwords;
+ GLuint state_size;
+ int i;
+ GLuint extra_prims = 0;
+
+ /* Check for primitive splitting. */
+ for (i = 0; i < nr_prims; ++i) {
+ const GLuint num_verts = r300NumVerts(r300, prim[i].count, prim[i].mode);
+ extra_prims += num_verts/(65535 - 32);
+ }
+ nr_prims += extra_prims;
+
+ dwords = 2*CACHE_FLUSH_BUFSZ;
+ dwords += PRE_EMIT_STATE_BUFSZ;
+ dwords += (AOS_BUFSZ(vbuf->num_attribs)
+ + SCISSORS_BUFSZ*2
+ + FIREAOS_BUFSZ )*nr_prims;
+
+ state_size = radeonCountStateEmitSize(&r300->radeon);
+ flushed = rcommonEnsureCmdBufSpace(&r300->radeon,
+ dwords + state_size,
+ __FUNCTION__);
+ if (flushed)
+ dwords += radeonCountStateEmitSize(&r300->radeon);
+ else
+ dwords += state_size;
+
+ radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s: total prediction size is %d.\n", __FUNCTION__, dwords);
+ return dwords;
+}
+
+static GLboolean r300TryDrawPrims(GLcontext *ctx,
+ const struct gl_client_array *arrays[],
+ const struct _mesa_prim *prim,
+ GLuint nr_prims,
+ const struct _mesa_index_buffer *ib,
+ GLuint min_index,
+ GLuint max_index )
+{
+ struct r300_context *r300 = R300_CONTEXT(ctx);
+ GLuint i;
+
+ radeon_print(RADEON_RENDER, RADEON_NORMAL, "%s: %u (%d-%d) cs begin at %d\n",
+ __FUNCTION__, nr_prims, min_index, max_index, r300->radeon.cmdbuf.cs->cdw );
+
+ if (ctx->NewState)
+ _mesa_update_state( ctx );
+
+ if (r300->options.hw_tcl_enabled)
+ _tnl_UpdateFixedFunctionProgram(ctx);
+
+ r300UpdateShaders(r300);
+
+ r300SwitchFallback(ctx, R300_FALLBACK_INVALID_BUFFERS, !r300ValidateBuffers(ctx));
+
+ r300SetVertexFormat(ctx, arrays, max_index + 1);
+
+ if (r300->fallback)
+ return GL_FALSE;
+
+ r300SetupVAP(ctx, r300->selected_vp->code.InputsRead, r300->selected_vp->code.OutputsWritten);
+
+ r300UpdateShaderStates(r300);
+
+ /* ensure we have the cmd buf space in advance to cover
+ * the state + DMA AOS pointers */
+ GLuint emit_end = r300PredictTryDrawPrimsSize(ctx, nr_prims, prim)
+ + r300->radeon.cmdbuf.cs->cdw;
+
+ r300SetupIndexBuffer(ctx, ib);
+
+ r300AllocDmaRegions(ctx, arrays, max_index + 1);
+
+ if (r300->fallback)
+ return GL_FALSE;
+
+ r300EmitCacheFlush(r300);
+ radeonEmitState(&r300->radeon);
+
+ for (i = 0; i < nr_prims; ++i) {
+ r300RunRenderPrimitive(ctx, prim[i].start, prim[i].start + prim[i].count, prim[i].mode);
+ }
+
+ r300EmitCacheFlush(r300);
+
+ r300FreeData(ctx);
+
+ radeon_print(RADEON_RENDER, RADEON_VERBOSE, "%s: %u (%d-%d) cs ending at %d\n",
+ __FUNCTION__, nr_prims, min_index, max_index, r300->radeon.cmdbuf.cs->cdw );
+
+ if (emit_end < r300->radeon.cmdbuf.cs->cdw)
+ WARN_ONCE("Rendering was %d commands larger than predicted size."
+ " We might overflow command buffer.\n", r300->radeon.cmdbuf.cs->cdw - emit_end);
+
+ return GL_TRUE;
+}
+
+static void r300DrawPrims(GLcontext *ctx,
+ const struct gl_client_array *arrays[],
+ const struct _mesa_prim *prim,
+ GLuint nr_prims,
+ const struct _mesa_index_buffer *ib,
+ GLboolean index_bounds_valid,
+ GLuint min_index,
+ GLuint max_index)
+{
+ GLboolean retval;
+
+ /* This check should get folded into just the places that
+ * min/max index are really needed.
+ */
+ if (!index_bounds_valid) {
+ vbo_get_minmax_index(ctx, prim, ib, &min_index, &max_index);
+ }
+
+ if (min_index) {
+ radeon_print(RADEON_FALLBACKS, RADEON_IMPORTANT,
+ "%s: Rebasing primitives. %p nr_prims %d min_index %u max_index %u\n",
+ __func__, prim, nr_prims, min_index, max_index);
+ vbo_rebase_prims( ctx, arrays, prim, nr_prims, ib, min_index, max_index, r300DrawPrims );
+ return;
+ }
+
+ /* Make an attempt at drawing */
+ retval = r300TryDrawPrims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
+
+ /* If failed run tnl pipeline - it should take care of fallbacks */
+ if (!retval)
+ _tnl_draw_prims(ctx, arrays, prim, nr_prims, ib, min_index, max_index);
+}
+
+void r300InitDraw(GLcontext *ctx)
+{
+ struct vbo_context *vbo = vbo_context(ctx);
+
+ vbo->draw_prims = r300DrawPrims;
+}
diff --git a/src/mesa/drivers/dri/r300/r300_emit.c b/src/mesa/drivers/dri/r300/r300_emit.c
new file mode 100644
index 0000000000..a24d431611
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_emit.c
@@ -0,0 +1,135 @@
+/*
+Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/**
+ * \file
+ *
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ * \author Maciej Cencora <m.cencora@gmail.com>
+ */
+
+#include "main/glheader.h"
+#include "main/mtypes.h"
+#include "main/colormac.h"
+#include "main/imports.h"
+#include "main/macros.h"
+
+#include "swrast_setup/swrast_setup.h"
+#include "tnl/tnl.h"
+#include "tnl/t_context.h"
+
+#include "r300_context.h"
+#include "r300_emit.h"
+
+
+GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead)
+{
+ /* No idea what this value means. I have seen other values written to
+ * this register... */
+ return 0x5555;
+}
+
+GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead)
+{
+ GLuint i, vic_1 = 0;
+
+ if (InputsRead & (1 << VERT_ATTRIB_POS))
+ vic_1 |= R300_INPUT_CNTL_POS;
+
+ if (InputsRead & (1 << VERT_ATTRIB_NORMAL))
+ vic_1 |= R300_INPUT_CNTL_NORMAL;
+
+ if (InputsRead & (1 << VERT_ATTRIB_COLOR0))
+ vic_1 |= R300_INPUT_CNTL_COLOR;
+
+ for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
+ if (InputsRead & (1 << (VERT_ATTRIB_TEX0 + i))) {
+ vic_1 |= R300_INPUT_CNTL_TC0 << i;
+ }
+
+ return vic_1;
+}
+
+GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint vp_writes)
+{
+ GLuint ret = 0;
+
+ if (vp_writes & (1 << VERT_RESULT_HPOS))
+ ret |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
+
+ if (vp_writes & (1 << VERT_RESULT_COL0))
+ ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT;
+
+ if (vp_writes & (1 << VERT_RESULT_COL1))
+ ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT;
+
+ /* Two sided lighting works only if all 4 colors are written */
+ if (vp_writes & (1 << VERT_RESULT_BFC0) || vp_writes & (1 << VERT_RESULT_BFC1))
+ ret |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT | R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT |
+ R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT | R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT;
+
+ if (vp_writes & (1 << VERT_RESULT_PSIZ))
+ ret |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
+
+ return ret;
+}
+
+GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint vp_writes)
+{
+ GLuint i, ret = 0, first_free_texcoord = 0;
+
+ for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
+ if (vp_writes & (1 << (VERT_RESULT_TEX0 + i))) {
+ ret |= (4 << (3 * first_free_texcoord));
+ ++first_free_texcoord;
+ }
+ }
+
+ if (first_free_texcoord > 8) {
+ fprintf(stderr, "\tout of free texcoords\n");
+ exit(-1);
+ }
+
+ return ret;
+}
+
+void r300EmitCacheFlush(r300ContextPtr rmesa)
+{
+ BATCH_LOCALS(&rmesa->radeon);
+
+ BEGIN_BATCH_NO_AUTOSTATE(4);
+ OUT_BATCH_REGVAL(R300_RB3D_DSTCACHE_CTLSTAT,
+ R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
+ R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
+ OUT_BATCH_REGVAL(R300_ZB_ZCACHE_CTLSTAT,
+ R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
+ R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
+ END_BATCH();
+ COMMIT_BATCH();
+}
diff --git a/src/mesa/drivers/dri/r300/r300_emit.h b/src/mesa/drivers/dri/r300/r300_emit.h
new file mode 100644
index 0000000000..a456d8867c
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_emit.h
@@ -0,0 +1,228 @@
+/*
+ * Copyright (C) 2005 Vladimir Dergachev.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/*
+ * Authors:
+ * Vladimir Dergachev <volodya@mindspring.com>
+ * Nicolai Haehnle <prefect_@gmx.net>
+ * Aapo Tahkola <aet@rasterburn.org>
+ * Ben Skeggs <darktama@iinet.net.au>
+ * Jerome Glisse <j.glisse@gmail.com>
+ */
+
+/* This files defines functions for accessing R300 hardware.
+ */
+#ifndef __R300_EMIT_H__
+#define __R300_EMIT_H__
+
+#include "main/glheader.h"
+#include "r300_context.h"
+#include "r300_cmdbuf.h"
+
+static INLINE uint32_t cmdpacket0(struct radeon_screen *rscrn,
+ int reg, int count)
+{
+ if (!rscrn->kernel_mm) {
+ drm_r300_cmd_header_t cmd;
+
+ cmd.u = 0;
+ cmd.packet0.cmd_type = R300_CMD_PACKET0;
+ cmd.packet0.count = count;
+ cmd.packet0.reghi = ((unsigned int)reg & 0xFF00) >> 8;
+ cmd.packet0.reglo = ((unsigned int)reg & 0x00FF);
+
+ return cmd.u;
+ }
+ if (count) {
+ return CP_PACKET0(reg, count - 1);
+ }
+ return CP_PACKET2;
+}
+
+static INLINE uint32_t cmdvpu(struct radeon_screen *rscrn, int addr, int count)
+{
+ drm_r300_cmd_header_t cmd;
+
+ cmd.u = 0;
+ cmd.vpu.cmd_type = R300_CMD_VPU;
+ cmd.vpu.count = count;
+ cmd.vpu.adrhi = ((unsigned int)addr & 0xFF00) >> 8;
+ cmd.vpu.adrlo = ((unsigned int)addr & 0x00FF);
+
+ return cmd.u;
+}
+
+static INLINE uint32_t cmdr500fp(struct radeon_screen *rscrn,
+ int addr, int count, int type, int clamp)
+{
+ drm_r300_cmd_header_t cmd;
+
+ cmd.u = 0;
+ cmd.r500fp.cmd_type = R300_CMD_R500FP;
+ cmd.r500fp.count = count;
+ cmd.r500fp.adrhi_flags = ((unsigned int)addr & 0x100) >> 8;
+ cmd.r500fp.adrhi_flags |= type ? R500FP_CONSTANT_TYPE : 0;
+ cmd.r500fp.adrhi_flags |= clamp ? R500FP_CONSTANT_CLAMP : 0;
+ cmd.r500fp.adrlo = ((unsigned int)addr & 0x00FF);
+
+ return cmd.u;
+}
+
+static INLINE uint32_t cmdpacket3(struct radeon_screen *rscrn, int packet)
+{
+ drm_r300_cmd_header_t cmd;
+
+ cmd.u = 0;
+ cmd.packet3.cmd_type = R300_CMD_PACKET3;
+ cmd.packet3.packet = packet;
+
+ return cmd.u;
+}
+
+static INLINE uint32_t cmdcpdelay(struct radeon_screen *rscrn,
+ unsigned short count)
+{
+ drm_r300_cmd_header_t cmd;
+
+ cmd.u = 0;
+
+ cmd.delay.cmd_type = R300_CMD_CP_DELAY;
+ cmd.delay.count = count;
+
+ return cmd.u;
+}
+
+static INLINE uint32_t cmdwait(struct radeon_screen *rscrn,
+ unsigned char flags)
+{
+ drm_r300_cmd_header_t cmd;
+
+ cmd.u = 0;
+ cmd.wait.cmd_type = R300_CMD_WAIT;
+ cmd.wait.flags = flags;
+
+ return cmd.u;
+}
+
+static INLINE uint32_t cmdpacify(struct radeon_screen *rscrn)
+{
+ drm_r300_cmd_header_t cmd;
+
+ cmd.u = 0;
+ cmd.header.cmd_type = R300_CMD_END3D;
+
+ return cmd.u;
+}
+
+/**
+ * Write the header of a packet3 to the command buffer.
+ * Outputs 2 dwords and expects (num_extra+1) additional dwords afterwards.
+ */
+#define OUT_BATCH_PACKET3(packet, num_extra) do {\
+ if (!b_l_rmesa->radeonScreen->kernel_mm) { \
+ OUT_BATCH(cmdpacket3(b_l_rmesa->radeonScreen,\
+ R300_CMD_PACKET3_RAW)); \
+ } else b_l_rmesa->cmdbuf.cs->section_cdw++;\
+ OUT_BATCH(CP_PACKET3((packet), (num_extra))); \
+ } while(0)
+
+/**
+ * Must be sent to switch to 2d commands
+ */
+void static INLINE end_3d(radeonContextPtr radeon)
+{
+ BATCH_LOCALS(radeon);
+
+ if (!radeon->radeonScreen->kernel_mm) {
+ BEGIN_BATCH_NO_AUTOSTATE(1);
+ OUT_BATCH(cmdpacify(radeon->radeonScreen));
+ END_BATCH();
+ }
+}
+
+void static INLINE cp_delay(r300ContextPtr rmesa, unsigned short count)
+{
+ BATCH_LOCALS(&rmesa->radeon);
+
+ if (!rmesa->radeon.radeonScreen->kernel_mm) {
+ BEGIN_BATCH_NO_AUTOSTATE(1);
+ OUT_BATCH(cmdcpdelay(rmesa->radeon.radeonScreen, count));
+ END_BATCH();
+ }
+}
+
+void static INLINE cp_wait(radeonContextPtr radeon, unsigned char flags)
+{
+ BATCH_LOCALS(radeon);
+ uint32_t wait_until;
+
+ if (!radeon->radeonScreen->kernel_mm) {
+ BEGIN_BATCH_NO_AUTOSTATE(1);
+ OUT_BATCH(cmdwait(radeon->radeonScreen, flags));
+ END_BATCH();
+ } else {
+ switch(flags) {
+ case R300_WAIT_2D:
+ wait_until = (1 << 14);
+ break;
+ case R300_WAIT_3D:
+ wait_until = (1 << 15);
+ break;
+ case R300_NEW_WAIT_2D_3D:
+ wait_until = (1 << 14) | (1 << 15);
+ break;
+ case R300_NEW_WAIT_2D_2D_CLEAN:
+ wait_until = (1 << 14) | (1 << 16) | (1 << 18);
+ break;
+ case R300_NEW_WAIT_3D_3D_CLEAN:
+ wait_until = (1 << 15) | (1 << 17) | (1 << 18);
+ break;
+ case R300_NEW_WAIT_2D_2D_CLEAN_3D_3D_CLEAN:
+ wait_until = (1 << 14) | (1 << 16) | (1 << 18);
+ wait_until |= (1 << 15) | (1 << 17) | (1 << 18);
+ break;
+ default:
+ return;
+ }
+ BEGIN_BATCH_NO_AUTOSTATE(2);
+ OUT_BATCH(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
+ OUT_BATCH(wait_until);
+ END_BATCH();
+ }
+}
+
+extern int r300PrimitiveType(r300ContextPtr rmesa, int prim);
+extern int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim);
+
+extern void r300EmitCacheFlush(r300ContextPtr rmesa);
+
+extern GLuint r300VAPInputCntl0(GLcontext * ctx, GLuint InputsRead);
+extern GLuint r300VAPInputCntl1(GLcontext * ctx, GLuint InputsRead);
+extern GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint vp_writes);
+extern GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint vp_writes);
+
+#endif
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.c b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
new file mode 100644
index 0000000000..7be2f74b5b
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.c
@@ -0,0 +1,305 @@
+/*
+ * Copyright (C) 2009 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/**
+ * \file
+ *
+ * Fragment program compiler. Perform transformations on the intermediate
+ * representation until the program is in a form where we can translate
+ * it more or less directly into machine-readable form.
+ *
+ * \author Ben Skeggs <darktama@iinet.net.au>
+ * \author Jerome Glisse <j.glisse@gmail.com>
+ */
+
+#include "r300_fragprog_common.h"
+
+#include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
+
+#include "compiler/radeon_compiler.h"
+
+#include "radeon_mesa_to_rc.h"
+
+
+static GLuint build_dts(GLuint depthmode)
+{
+ switch(depthmode) {
+ default:
+ case GL_LUMINANCE: return RC_SWIZZLE_XYZZ;
+ case GL_INTENSITY: return RC_SWIZZLE_XYZW;
+ case GL_ALPHA: return RC_SWIZZLE_WWWW;
+ }
+}
+
+static GLuint build_func(GLuint comparefunc)
+{
+ return comparefunc - GL_NEVER;
+}
+
+/**
+ * Collect all external state that is relevant for compiling the given
+ * fragment program.
+ */
+static void build_state(
+ r300ContextPtr r300,
+ struct gl_fragment_program *fp,
+ struct r300_fragment_program_external_state *state)
+{
+ int unit;
+
+ memset(state, 0, sizeof(*state));
+
+ for(unit = 0; unit < 16; ++unit) {
+ if (fp->Base.ShadowSamplers & (1 << unit)) {
+ struct gl_texture_object* tex = r300->radeon.glCtx->Texture.Unit[unit]._Current;
+
+ state->unit[unit].depth_texture_swizzle = build_dts(tex->DepthMode);
+ state->unit[unit].texture_compare_func = build_func(tex->CompareFunc);
+ }
+ }
+}
+
+
+/**
+ * Transform the program to support fragment.position.
+ *
+ * Introduce a small fragment at the start of the program that will be
+ * the only code that directly reads the FRAG_ATTRIB_WPOS input.
+ * All other code pieces that reference that input will be rewritten
+ * to read from a newly allocated temporary.
+ *
+ */
+static void insert_WPOS_trailer(struct r300_fragment_program_compiler *compiler, struct r300_fragment_program * fp)
+{
+ int i;
+
+ fp->wpos_attr = FRAG_ATTRIB_MAX;
+ if (!(compiler->Base.Program.InputsRead & FRAG_BIT_WPOS)) {
+ return;
+ }
+
+ for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i)
+ {
+ if (!(compiler->Base.Program.InputsRead & (1 << i))) {
+ fp->wpos_attr = i;
+ break;
+ }
+ }
+
+ /* No free texcoord found, fall-back to software rendering */
+ if (fp->wpos_attr == FRAG_ATTRIB_MAX)
+ {
+ compiler->Base.Error = 1;
+ return;
+ }
+
+ rc_transform_fragment_wpos(&compiler->Base, FRAG_ATTRIB_WPOS, fp->wpos_attr, GL_FALSE);
+}
+
+/**
+ * Rewrite fragment.fogcoord to use a texture coordinate slot.
+ * Note that fogcoord is forced into an X001 pattern, and this enforcement
+ * is done here.
+ *
+ * See also the counterpart rewriting for vertex programs.
+ */
+static void rewriteFog(struct r300_fragment_program_compiler *compiler, struct r300_fragment_program * fp)
+{
+ struct rc_src_register src;
+ int i;
+
+ fp->fog_attr = FRAG_ATTRIB_MAX;
+ if (!(compiler->Base.Program.InputsRead & FRAG_BIT_FOGC)) {
+ return;
+ }
+
+ for (i = FRAG_ATTRIB_TEX0; i <= FRAG_ATTRIB_TEX7; ++i)
+ {
+ if (!(compiler->Base.Program.InputsRead & (1 << i))) {
+ fp->fog_attr = i;
+ break;
+ }
+ }
+
+ /* No free texcoord found, fall-back to software rendering */
+ if (fp->fog_attr == FRAG_ATTRIB_MAX)
+ {
+ compiler->Base.Error = 1;
+ return;
+ }
+
+ memset(&src, 0, sizeof(src));
+ src.File = RC_FILE_INPUT;
+ src.Index = fp->fog_attr;
+ src.Swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE);
+ rc_move_input(&compiler->Base, FRAG_ATTRIB_FOGC, src);
+}
+
+
+/**
+ * Reserve hardware temporary registers for the program inputs.
+ *
+ * @note This allocation is performed explicitly, because the order of inputs
+ * is determined by the RS hardware.
+ */
+static void allocate_hw_inputs(
+ struct r300_fragment_program_compiler * c,
+ void (*allocate)(void * data, unsigned input, unsigned hwreg),
+ void * mydata)
+{
+ GLuint InputsRead = c->Base.Program.InputsRead;
+ int i;
+ GLuint hwindex = 0;
+
+ /* Primary colour */
+ if (InputsRead & FRAG_BIT_COL0)
+ allocate(mydata, FRAG_ATTRIB_COL0, hwindex++);
+ InputsRead &= ~FRAG_BIT_COL0;
+
+ /* Secondary color */
+ if (InputsRead & FRAG_BIT_COL1)
+ allocate(mydata, FRAG_ATTRIB_COL1, hwindex++);
+ InputsRead &= ~FRAG_BIT_COL1;
+
+ /* Texcoords */
+ for (i = 0; i < 8; i++) {
+ if (InputsRead & (FRAG_BIT_TEX0 << i))
+ allocate(mydata, FRAG_ATTRIB_TEX0+i, hwindex++);
+ }
+ InputsRead &= ~FRAG_BITS_TEX_ANY;
+
+ /* Fogcoords treated as a texcoord */
+ if (InputsRead & FRAG_BIT_FOGC)
+ allocate(mydata, FRAG_ATTRIB_FOGC, hwindex++);
+ InputsRead &= ~FRAG_BIT_FOGC;
+
+ /* fragment position treated as a texcoord */
+ if (InputsRead & FRAG_BIT_WPOS)
+ allocate(mydata, FRAG_ATTRIB_WPOS, hwindex++);
+ InputsRead &= ~FRAG_BIT_WPOS;
+
+ /* Anything else */
+ if (InputsRead)
+ rc_error(&c->Base, "Don't know how to handle inputs 0x%x\n", InputsRead);
+}
+
+
+static void translate_fragment_program(GLcontext *ctx, struct r300_fragment_program_cont *cont, struct r300_fragment_program *fp)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ struct r300_fragment_program_compiler compiler;
+
+ rc_init(&compiler.Base);
+ compiler.Base.Debug = (RADEON_DEBUG & RADEON_PIXEL) ? GL_TRUE : GL_FALSE;
+
+ compiler.code = &fp->code;
+ compiler.state = fp->state;
+ compiler.enable_shadow_ambient = GL_TRUE;
+ compiler.Base.is_r500 = (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) ? GL_TRUE : GL_FALSE;
+ compiler.Base.max_temp_regs = (compiler.Base.is_r500) ? 128 : 32;
+ compiler.OutputDepth = FRAG_RESULT_DEPTH;
+ memset(compiler.OutputColor, 0, 4 * sizeof(unsigned));
+ compiler.OutputColor[0] = FRAG_RESULT_COLOR;
+ compiler.AllocateHwInputs = &allocate_hw_inputs;
+
+ if (compiler.Base.Debug) {
+ fflush(stderr);
+ printf("Fragment Program: Initial program:\n");
+ _mesa_print_program(&cont->Base.Base);
+ fflush(stderr);
+ }
+
+ radeon_mesa_to_rc_program(&compiler.Base, &cont->Base.Base);
+
+ insert_WPOS_trailer(&compiler, fp);
+
+ rewriteFog(&compiler, fp);
+
+ r3xx_compile_fragment_program(&compiler);
+
+ if (compiler.Base.is_r500) {
+ /* We need to support the non-KMS DRM interface, which
+ * artificially limits the number of instructions and
+ * constants which are available to us.
+ *
+ * See also the comment in r300_context.c where we
+ * set the MAX_NATIVE_xxx values.
+ */
+ if (fp->code.code.r500.inst_end >= 255 || fp->code.constants.Count > 255)
+ rc_error(&compiler.Base, "Program is too big (upgrade to r300g to avoid this limitation).\n");
+ }
+
+ fp->error = compiler.Base.Error;
+
+ fp->InputsRead = compiler.Base.Program.InputsRead;
+
+ /* Clear the fog/wpos_attr if code accessing these
+ * attributes has been removed during compilation
+ */
+ if (fp->fog_attr != FRAG_ATTRIB_MAX) {
+ if (!(fp->InputsRead & (1 << fp->fog_attr)))
+ fp->fog_attr = FRAG_ATTRIB_MAX;
+ }
+
+ if (fp->wpos_attr != FRAG_ATTRIB_MAX) {
+ if (!(fp->InputsRead & (1 << fp->wpos_attr)))
+ fp->wpos_attr = FRAG_ATTRIB_MAX;
+ }
+
+ rc_destroy(&compiler.Base);
+}
+
+struct r300_fragment_program *r300SelectAndTranslateFragmentShader(GLcontext *ctx)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ struct r300_fragment_program_cont *fp_list;
+ struct r300_fragment_program *fp;
+ struct r300_fragment_program_external_state state;
+
+ fp_list = (struct r300_fragment_program_cont *)ctx->FragmentProgram._Current;
+ build_state(r300, ctx->FragmentProgram._Current, &state);
+
+ fp = fp_list->progs;
+ while (fp) {
+ if (memcmp(&fp->state, &state, sizeof(state)) == 0) {
+ return r300->selected_fp = fp;
+ }
+ fp = fp->next;
+ }
+
+ fp = calloc(1, sizeof(struct r300_fragment_program));
+
+ fp->state = state;
+
+ fp->next = fp_list->progs;
+ fp_list->progs = fp;
+
+ translate_fragment_program(ctx, fp_list, fp);
+
+ return r300->selected_fp = fp;
+}
diff --git a/src/mesa/drivers/dri/r300/r300_fragprog_common.h b/src/mesa/drivers/dri/r300/r300_fragprog_common.h
new file mode 100644
index 0000000000..3d64c08cee
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_fragprog_common.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2009 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __R300_FRAGPROG_COMMON_H_
+#define __R300_FRAGPROG_COMMON_H_
+
+#include "main/mtypes.h"
+
+#include "r300_context.h"
+
+struct r300_fragment_program *r300SelectAndTranslateFragmentShader(GLcontext *ctx);
+
+#endif
diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h
new file mode 100644
index 0000000000..ac93563ed9
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_reg.h
@@ -0,0 +1,3313 @@
+/**************************************************************************
+
+Copyright (C) 2004-2005 Nicolai Haehnle et al.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/* *INDENT-OFF* */
+
+#ifndef _R300_REG_H
+#define _R300_REG_H
+
+#define R300_MC_INIT_MISC_LAT_TIMER 0x180
+# define R300_MC_MISC__MC_CPR_INIT_LAT_SHIFT 0
+# define R300_MC_MISC__MC_VF_INIT_LAT_SHIFT 4
+# define R300_MC_MISC__MC_DISP0R_INIT_LAT_SHIFT 8
+# define R300_MC_MISC__MC_DISP1R_INIT_LAT_SHIFT 12
+# define R300_MC_MISC__MC_FIXED_INIT_LAT_SHIFT 16
+# define R300_MC_MISC__MC_E2R_INIT_LAT_SHIFT 20
+# define R300_MC_MISC__MC_SAME_PAGE_PRIO_SHIFT 24
+# define R300_MC_MISC__MC_GLOBW_INIT_LAT_SHIFT 28
+
+
+#define R300_MC_INIT_GFX_LAT_TIMER 0x154
+# define R300_MC_MISC__MC_G3D0R_INIT_LAT_SHIFT 0
+# define R300_MC_MISC__MC_G3D1R_INIT_LAT_SHIFT 4
+# define R300_MC_MISC__MC_G3D2R_INIT_LAT_SHIFT 8
+# define R300_MC_MISC__MC_G3D3R_INIT_LAT_SHIFT 12
+# define R300_MC_MISC__MC_TX0R_INIT_LAT_SHIFT 16
+# define R300_MC_MISC__MC_TX1R_INIT_LAT_SHIFT 20
+# define R300_MC_MISC__MC_GLOBR_INIT_LAT_SHIFT 24
+# define R300_MC_MISC__MC_GLOBW_FULL_LAT_SHIFT 28
+
+/*
+ * This file contains registers and constants for the R300. They have been
+ * found mostly by examining command buffers captured using glxtest, as well
+ * as by extrapolating some known registers and constants from the R200.
+ * I am fairly certain that they are correct unless stated otherwise
+ * in comments.
+ */
+
+#define R300_SE_VPORT_XSCALE 0x1D98
+#define R300_SE_VPORT_XOFFSET 0x1D9C
+#define R300_SE_VPORT_YSCALE 0x1DA0
+#define R300_SE_VPORT_YOFFSET 0x1DA4
+#define R300_SE_VPORT_ZSCALE 0x1DA8
+#define R300_SE_VPORT_ZOFFSET 0x1DAC
+
+#define R300_VAP_PORT_IDX0 0x2040
+/*
+ * Vertex Array Processing (VAP) Control
+ */
+#define R300_VAP_CNTL 0x2080
+# define R300_PVS_NUM_SLOTS_SHIFT 0
+# define R300_PVS_NUM_CNTLRS_SHIFT 4
+# define R300_PVS_NUM_FPUS_SHIFT 8
+# define R300_VF_MAX_VTX_NUM_SHIFT 18
+# define R300_GL_CLIP_SPACE_DEF (0 << 22)
+# define R300_DX_CLIP_SPACE_DEF (1 << 22)
+# define R500_TCL_STATE_OPTIMIZATION (1 << 23)
+
+/* This register is written directly and also starts data section
+ * in many 3d CP_PACKET3's
+ */
+#define R300_VAP_VF_CNTL 0x2084
+# define R300_VAP_VF_CNTL__PRIM_TYPE__SHIFT 0
+# define R300_VAP_VF_CNTL__PRIM_NONE (0<<0)
+# define R300_VAP_VF_CNTL__PRIM_POINTS (1<<0)
+# define R300_VAP_VF_CNTL__PRIM_LINES (2<<0)
+# define R300_VAP_VF_CNTL__PRIM_LINE_STRIP (3<<0)
+# define R300_VAP_VF_CNTL__PRIM_TRIANGLES (4<<0)
+# define R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN (5<<0)
+# define R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP (6<<0)
+# define R300_VAP_VF_CNTL__PRIM_LINE_LOOP (12<<0)
+# define R300_VAP_VF_CNTL__PRIM_QUADS (13<<0)
+# define R300_VAP_VF_CNTL__PRIM_QUAD_STRIP (14<<0)
+# define R300_VAP_VF_CNTL__PRIM_POLYGON (15<<0)
+
+# define R300_VAP_VF_CNTL__PRIM_WALK__SHIFT 4
+ /* State based - direct writes to registers trigger vertex
+ generation */
+# define R300_VAP_VF_CNTL__PRIM_WALK_STATE_BASED (0<<4)
+# define R300_VAP_VF_CNTL__PRIM_WALK_INDICES (1<<4)
+# define R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST (2<<4)
+# define R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED (3<<4)
+
+ /* I don't think I saw these three used.. */
+# define R300_VAP_VF_CNTL__COLOR_ORDER__SHIFT 6
+# define R300_VAP_VF_CNTL__TCL_OUTPUT_CTL_ENA__SHIFT 9
+# define R300_VAP_VF_CNTL__PROG_STREAM_ENA__SHIFT 10
+
+ /* index size - when not set the indices are assumed to be 16 bit */
+# define R300_VAP_VF_CNTL__INDEX_SIZE_32bit (1<<11)
+ /* number of vertices */
+# define R300_VAP_VF_CNTL__NUM_VERTICES__SHIFT 16
+
+#define R500_VAP_INDEX_OFFSET 0x208c
+
+#define R300_VAP_OUTPUT_VTX_FMT_0 0x2090
+# define R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT (1<<0)
+# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT (1<<1)
+# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT (1<<2)
+# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT (1<<3)
+# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT (1<<4)
+# define R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT (1<<16)
+
+#define R300_VAP_OUTPUT_VTX_FMT_1 0x2094
+ /* each of the following is 3 bits wide, specifies number
+ of components */
+# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_0_COMP_CNT_SHIFT 0
+# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_1_COMP_CNT_SHIFT 3
+# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_2_COMP_CNT_SHIFT 6
+# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_3_COMP_CNT_SHIFT 9
+# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_4_COMP_CNT_SHIFT 12
+# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_5_COMP_CNT_SHIFT 15
+# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_6_COMP_CNT_SHIFT 18
+# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_7_COMP_CNT_SHIFT 21
+# define R300_VAP_OUTPUT_VTX_FMT_1__NOT_PRESENT 0
+# define R300_VAP_OUTPUT_VTX_FMT_1__1_COMPONENT 1
+# define R300_VAP_OUTPUT_VTX_FMT_1__2_COMPONENTS 2
+# define R300_VAP_OUTPUT_VTX_FMT_1__3_COMPONENTS 3
+# define R300_VAP_OUTPUT_VTX_FMT_1__4_COMPONENTS 4
+
+#define R300_SE_VTE_CNTL 0x20b0
+# define R300_VPORT_X_SCALE_ENA (1 << 0)
+# define R300_VPORT_X_OFFSET_ENA (1 << 1)
+# define R300_VPORT_Y_SCALE_ENA (1 << 2)
+# define R300_VPORT_Y_OFFSET_ENA (1 << 3)
+# define R300_VPORT_Z_SCALE_ENA (1 << 4)
+# define R300_VPORT_Z_OFFSET_ENA (1 << 5)
+# define R300_VTX_XY_FMT (1 << 8)
+# define R300_VTX_Z_FMT (1 << 9)
+# define R300_VTX_W0_FMT (1 << 10)
+# define R300_SERIAL_PROC_ENA (1 << 11)
+
+/* BEGIN: Vertex data assembly - lots of uncertainties */
+
+/* gap */
+
+/* Maximum Vertex Indx Clamp */
+#define R300_VAP_VF_MAX_VTX_INDX 0x2134
+/* Minimum Vertex Indx Clamp */
+#define R300_VAP_VF_MIN_VTX_INDX 0x2138
+
+/** Vertex assembler/processor control status */
+#define R300_VAP_CNTL_STATUS 0x2140
+/* No swap at all (default) */
+# define R300_VC_NO_SWAP (0 << 0)
+/* 16-bit swap: 0xAABBCCDD becomes 0xBBAADDCC */
+# define R300_VC_16BIT_SWAP (1 << 0)
+/* 32-bit swap: 0xAABBCCDD becomes 0xDDCCBBAA */
+# define R300_VC_32BIT_SWAP (2 << 0)
+/* Half-dword swap: 0xAABBCCDD becomes 0xCCDDAABB */
+# define R300_VC_HALF_DWORD_SWAP (3 << 0)
+/* The TCL engine will not be used (as it is logically or even physically removed) */
+# define R300_VAP_TCL_BYPASS (1 << 8)
+/* Read only flag if TCL engine is busy. */
+# define R300_VAP_PVS_BUSY (1 << 11)
+/* TODO: gap for MAX_MPS */
+/* Read only flag if the vertex store is busy. */
+# define R300_VAP_VS_BUSY (1 << 24)
+/* Read only flag if the reciprocal engine is busy. */
+# define R300_VAP_RCP_BUSY (1 << 25)
+/* Read only flag if the viewport transform engine is busy. */
+# define R300_VAP_VTE_BUSY (1 << 26)
+/* Read only flag if the memory interface unit is busy. */
+# define R300_VAP_MUI_BUSY (1 << 27)
+/* Read only flag if the vertex cache is busy. */
+# define R300_VAP_VC_BUSY (1 << 28)
+/* Read only flag if the vertex fetcher is busy. */
+# define R300_VAP_VF_BUSY (1 << 29)
+/* Read only flag if the register pipeline is busy. */
+# define R300_VAP_REGPIPE_BUSY (1 << 30)
+/* Read only flag if the VAP engine is busy. */
+# define R300_VAP_VAP_BUSY (1 << 31)
+
+/* gap */
+
+/* Where do we get our vertex data?
+ *
+ * Vertex data either comes either from immediate mode registers or from
+ * vertex arrays.
+ * There appears to be no mixed mode (though we can force the pitch of
+ * vertex arrays to 0, effectively reusing the same element over and over
+ * again).
+ *
+ * Immediate mode is controlled by the INPUT_CNTL registers. I am not sure
+ * if these registers influence vertex array processing.
+ *
+ * Vertex arrays are controlled via the 3D_LOAD_VBPNTR packet3.
+ *
+ * In both cases, vertex attributes are then passed through INPUT_ROUTE.
+ *
+ * Beginning with INPUT_ROUTE_0_0 is a list of WORDs that route vertex data
+ * into the vertex processor's input registers.
+ * The first word routes the first input, the second word the second, etc.
+ * The corresponding input is routed into the register with the given index.
+ * The list is ended by a word with INPUT_ROUTE_END set.
+ *
+ * Always set COMPONENTS_4 in immediate mode.
+ */
+
+#define R300_VAP_PROG_STREAM_CNTL_0 0x2150
+# define R300_DATA_TYPE_0_SHIFT 0
+# define R300_DATA_TYPE_FLOAT_1 0
+# define R300_DATA_TYPE_FLOAT_2 1
+# define R300_DATA_TYPE_FLOAT_3 2
+# define R300_DATA_TYPE_FLOAT_4 3
+# define R300_DATA_TYPE_BYTE 4
+# define R300_DATA_TYPE_D3DCOLOR 5
+# define R300_DATA_TYPE_SHORT_2 6
+# define R300_DATA_TYPE_SHORT_4 7
+# define R300_DATA_TYPE_VECTOR_3_TTT 8
+# define R300_DATA_TYPE_VECTOR_3_EET 9
+# define R300_DATA_TYPE_FLT16_2 11
+# define R300_DATA_TYPE_FLT16_4 12
+
+# define R300_SKIP_DWORDS_SHIFT 4
+# define R300_DST_VEC_LOC_SHIFT 8
+# define R300_LAST_VEC (1 << 13)
+# define R300_SIGNED (1 << 14)
+# define R300_NORMALIZE (1 << 15)
+# define R300_DATA_TYPE_1_SHIFT 16
+#define R300_VAP_PROG_STREAM_CNTL_1 0x2154
+#define R300_VAP_PROG_STREAM_CNTL_2 0x2158
+#define R300_VAP_PROG_STREAM_CNTL_3 0x215C
+#define R300_VAP_PROG_STREAM_CNTL_4 0x2160
+#define R300_VAP_PROG_STREAM_CNTL_5 0x2164
+#define R300_VAP_PROG_STREAM_CNTL_6 0x2168
+#define R300_VAP_PROG_STREAM_CNTL_7 0x216C
+/* gap */
+
+/* Notes:
+ * - always set up to produce at least two attributes:
+ * if vertex program uses only position, fglrx will set normal, too
+ * - INPUT_CNTL_0_COLOR and INPUT_CNTL_COLOR bits are always equal.
+ */
+#define R300_VAP_VTX_STATE_CNTL 0x2180
+# define R300_COLOR_0_ASSEMBLY_SHIFT 0
+# define R300_SEL_COLOR 0
+# define R300_SEL_USER_COLOR_0 1
+# define R300_SEL_USER_COLOR_1 2
+# define R300_COLOR_1_ASSEMBLY_SHIFT 2
+# define R300_COLOR_2_ASSEMBLY_SHIFT 4
+# define R300_COLOR_3_ASSEMBLY_SHIFT 6
+# define R300_COLOR_4_ASSEMBLY_SHIFT 8
+# define R300_COLOR_5_ASSEMBLY_SHIFT 10
+# define R300_COLOR_6_ASSEMBLY_SHIFT 12
+# define R300_COLOR_7_ASSEMBLY_SHIFT 14
+# define R300_UPDATE_USER_COLOR_0_ENA (1 << 16)
+
+/*
+ * Each bit in this field applies to the corresponding vector in the VSM
+ * memory (i.e. Bit 0 applies to VECTOR_0 (POSITION), etc.). If the bit
+ * is set, then the corresponding 4-Dword Vector is output into the Vertex Stream.
+ */
+#define R300_VAP_VSM_VTX_ASSM 0x2184
+# define R300_INPUT_CNTL_POS 0x00000001
+# define R300_INPUT_CNTL_NORMAL 0x00000002
+# define R300_INPUT_CNTL_COLOR 0x00000004
+# define R300_INPUT_CNTL_TC0 0x00000400
+# define R300_INPUT_CNTL_TC1 0x00000800
+# define R300_INPUT_CNTL_TC2 0x00001000 /* GUESS */
+# define R300_INPUT_CNTL_TC3 0x00002000 /* GUESS */
+# define R300_INPUT_CNTL_TC4 0x00004000 /* GUESS */
+# define R300_INPUT_CNTL_TC5 0x00008000 /* GUESS */
+# define R300_INPUT_CNTL_TC6 0x00010000 /* GUESS */
+# define R300_INPUT_CNTL_TC7 0x00020000 /* GUESS */
+
+/* Programmable Stream Control Signed Normalize Control */
+#define R300_VAP_PSC_SGN_NORM_CNTL 0x21dc
+# define SGN_NORM_ZERO 0
+# define SGN_NORM_ZERO_CLAMP_MINUS_ONE 1
+# define SGN_NORM_NO_ZERO 2
+
+/* gap */
+
+/* Words parallel to INPUT_ROUTE_0; All words that are active in INPUT_ROUTE_0
+ * are set to a swizzling bit pattern, other words are 0.
+ *
+ * In immediate mode, the pattern is always set to xyzw. In vertex array
+ * mode, the swizzling pattern is e.g. used to set zw components in texture
+ * coordinates with only tweo components.
+ */
+#define R300_VAP_PROG_STREAM_CNTL_EXT_0 0x21e0
+# define R300_SWIZZLE0_SHIFT 0
+# define R300_SWIZZLE_SELECT_X_SHIFT 0
+# define R300_SWIZZLE_SELECT_Y_SHIFT 3
+# define R300_SWIZZLE_SELECT_Z_SHIFT 6
+# define R300_SWIZZLE_SELECT_W_SHIFT 9
+
+# define R300_SWIZZLE_SELECT_X 0
+# define R300_SWIZZLE_SELECT_Y 1
+# define R300_SWIZZLE_SELECT_Z 2
+# define R300_SWIZZLE_SELECT_W 3
+# define R300_SWIZZLE_SELECT_FP_ZERO 4
+# define R300_SWIZZLE_SELECT_FP_ONE 5
+/* alternate forms for r300_emit.c */
+# define R300_INPUT_ROUTE_SELECT_X 0
+# define R300_INPUT_ROUTE_SELECT_Y 1
+# define R300_INPUT_ROUTE_SELECT_Z 2
+# define R300_INPUT_ROUTE_SELECT_W 3
+# define R300_INPUT_ROUTE_SELECT_ZERO 4
+# define R300_INPUT_ROUTE_SELECT_ONE 5
+
+# define R300_WRITE_ENA_SHIFT 12
+# define R300_WRITE_ENA_X 1
+# define R300_WRITE_ENA_Y 2
+# define R300_WRITE_ENA_Z 4
+# define R300_WRITE_ENA_W 8
+# define R300_SWIZZLE1_SHIFT 16
+#define R300_VAP_PROG_STREAM_CNTL_EXT_1 0x21e4
+#define R300_VAP_PROG_STREAM_CNTL_EXT_2 0x21e8
+#define R300_VAP_PROG_STREAM_CNTL_EXT_3 0x21ec
+#define R300_VAP_PROG_STREAM_CNTL_EXT_4 0x21f0
+#define R300_VAP_PROG_STREAM_CNTL_EXT_5 0x21f4
+#define R300_VAP_PROG_STREAM_CNTL_EXT_6 0x21f8
+#define R300_VAP_PROG_STREAM_CNTL_EXT_7 0x21fc
+
+/* END: Vertex data assembly */
+
+/* gap */
+
+/* BEGIN: Upload vertex program and data */
+
+/*
+ * The programmable vertex shader unit has a memory bank of unknown size
+ * that can be written to in 16 byte units by writing the address into
+ * UPLOAD_ADDRESS, followed by data in UPLOAD_DATA (multiples of 4 DWORDs).
+ *
+ * Pointers into the memory bank are always in multiples of 16 bytes.
+ *
+ * The memory bank is divided into areas with fixed meaning.
+ *
+ * Starting at address UPLOAD_PROGRAM: Vertex program instructions.
+ * Native limits reported by drivers from ATI suggest size 256 (i.e. 4KB),
+ * whereas the difference between known addresses suggests size 512.
+ *
+ * Starting at address UPLOAD_PARAMETERS: Vertex program parameters.
+ * Native reported limits and the VPI layout suggest size 256, whereas
+ * difference between known addresses suggests size 512.
+ *
+ * At address UPLOAD_POINTSIZE is a vector (0, 0, ps, 0), where ps is the
+ * floating point pointsize. The exact purpose of this state is uncertain,
+ * as there is also the R300_RE_POINTSIZE register.
+ *
+ * Multiple vertex programs and parameter sets can be loaded at once,
+ * which could explain the size discrepancy.
+ */
+#define R300_VAP_PVS_VECTOR_INDX_REG 0x2200
+# define R300_PVS_CODE_START 0
+# define R300_MAX_PVS_CODE_LINES 256
+# define R500_MAX_PVS_CODE_LINES 1024
+# define R300_PVS_CONST_START 512
+# define R500_PVS_CONST_START 1024
+# define R300_MAX_PVS_CONST_VECS 256
+# define R500_MAX_PVS_CONST_VECS 1024
+# define R300_PVS_UCP_START 1024
+# define R500_PVS_UCP_START 1536
+# define R300_POINT_VPORT_SCALE_OFFSET 1030
+# define R500_POINT_VPORT_SCALE_OFFSET 1542
+# define R300_POINT_GEN_TEX_OFFSET 1031
+# define R500_POINT_GEN_TEX_OFFSET 1543
+
+/*
+ * These are obsolete defines form r300_context.h, but they might give some
+ * clues when investigating the addresses further...
+ */
+#if 0
+#define VSF_DEST_PROGRAM 0x0
+#define VSF_DEST_MATRIX0 0x200
+#define VSF_DEST_MATRIX1 0x204
+#define VSF_DEST_MATRIX2 0x208
+#define VSF_DEST_VECTOR0 0x20c
+#define VSF_DEST_VECTOR1 0x20d
+#define VSF_DEST_UNKNOWN1 0x400
+#define VSF_DEST_UNKNOWN2 0x406
+#endif
+
+/* gap */
+
+#define R300_VAP_PVS_UPLOAD_DATA 0x2208
+
+/* END: Upload vertex program and data */
+
+/* gap */
+
+/* I do not know the purpose of this register. However, I do know that
+ * it is set to 221C_CLEAR for clear operations and to 221C_NORMAL
+ * for normal rendering.
+ *
+ * 2007-11-05: This register is the user clip plane control register, but there
+ * also seems to be a rendering mode control; the NORMAL/CLEAR defines.
+ *
+ * See bug #9871. http://bugs.freedesktop.org/attachment.cgi?id=10672&action=view
+ */
+#define R300_VAP_CLIP_CNTL 0x221C
+# define R300_VAP_UCP_ENABLE_0 (1 << 0)
+# define R300_VAP_UCP_ENABLE_1 (1 << 1)
+# define R300_VAP_UCP_ENABLE_2 (1 << 2)
+# define R300_VAP_UCP_ENABLE_3 (1 << 3)
+# define R300_VAP_UCP_ENABLE_4 (1 << 4)
+# define R300_VAP_UCP_ENABLE_5 (1 << 5)
+# define R300_PS_UCP_MODE_DIST_COP (0 << 14)
+# define R300_PS_UCP_MODE_RADIUS_COP (1 << 14)
+# define R300_PS_UCP_MODE_RADIUS_COP_CLIP (2 << 14)
+# define R300_PS_UCP_MODE_CLIP_AS_TRIFAN (3 << 14)
+# define R300_CLIP_DISABLE (1 << 16)
+# define R300_UCP_CULL_ONLY_ENABLE (1 << 17)
+# define R300_BOUNDARY_EDGE_FLAG_ENABLE (1 << 18)
+# define R500_COLOR2_IS_TEXTURE (1 << 20)
+# define R500_COLOR3_IS_TEXTURE (1 << 21)
+
+/* These seem to be per-pixel and per-vertex X and Y clipping planes. The first
+ * plane is per-pixel and the second plane is per-vertex.
+ *
+ * This was determined by experimentation alone but I believe it is correct.
+ *
+ * These registers are called X_QUAD0_1_FL to X_QUAD0_4_FL by glxtest.
+ */
+#define R300_VAP_GB_VERT_CLIP_ADJ 0x2220
+#define R300_VAP_GB_VERT_DISC_ADJ 0x2224
+#define R300_VAP_GB_HORZ_CLIP_ADJ 0x2228
+#define R300_VAP_GB_HORZ_DISC_ADJ 0x222c
+
+/* gap */
+
+/* Sometimes, END_OF_PKT and 0x2284=0 are the only commands sent between
+ * rendering commands and overwriting vertex program parameters.
+ * Therefore, I suspect writing zero to 0x2284 synchronizes the engine and
+ * avoids bugs caused by still running shaders reading bad data from memory.
+ */
+#define R300_VAP_PVS_STATE_FLUSH_REG 0x2284
+
+/* This register is used to define the number of core clocks to wait for a
+ * vertex to be received by the VAP input controller (while the primitive
+ * path is backed up) before forcing any accumulated vertices to be submitted
+ * to the vertex processing path.
+ */
+#define VAP_PVS_VTX_TIMEOUT_REG 0x2288
+# define R300_2288_R300 0x00750000 /* -- nh */
+# define R300_2288_RV350 0x0000FFFF /* -- Vladimir */
+
+/* gap */
+
+/* Addresses are relative to the vertex program instruction area of the
+ * memory bank. PROGRAM_END points to the last instruction of the active
+ * program
+ *
+ * The meaning of the two UNKNOWN fields is obviously not known. However,
+ * experiments so far have shown that both *must* point to an instruction
+ * inside the vertex program, otherwise the GPU locks up.
+ *
+ * fglrx usually sets CNTL_3_UNKNOWN to the end of the program and
+ * R300_PVS_CNTL_1_POS_END_SHIFT points to instruction where last write to
+ * position takes place.
+ *
+ * Most likely this is used to ignore rest of the program in cases
+ * where group of verts arent visible. For some reason this "section"
+ * is sometimes accepted other instruction that have no relationship with
+ * position calculations.
+ */
+#define R300_VAP_PVS_CODE_CNTL_0 0x22D0
+# define R300_PVS_FIRST_INST_SHIFT 0
+# define R300_PVS_XYZW_VALID_INST_SHIFT 10
+# define R300_PVS_LAST_INST_SHIFT 20
+/* Addresses are relative to the vertex program parameters area. */
+#define R300_VAP_PVS_CONST_CNTL 0x22D4
+# define R300_PVS_CONST_BASE_OFFSET_SHIFT 0
+# define R300_PVS_MAX_CONST_ADDR_SHIFT 16
+#define R300_VAP_PVS_CODE_CNTL_1 0x22D8
+# define R300_PVS_LAST_VTX_SRC_INST_SHIFT 0
+#define R300_VAP_PVS_FLOW_CNTL_OPC 0x22DC
+
+/* The entire range from 0x2300 to 0x2AC inclusive seems to be used for
+ * immediate vertices
+ */
+#define R300_VAP_VTX_COLOR_R 0x2464
+#define R300_VAP_VTX_COLOR_G 0x2468
+#define R300_VAP_VTX_COLOR_B 0x246C
+#define R300_VAP_VTX_POS_0_X_1 0x2490 /* used for glVertex2*() */
+#define R300_VAP_VTX_POS_0_Y_1 0x2494
+#define R300_VAP_VTX_COLOR_PKD 0x249C /* RGBA */
+#define R300_VAP_VTX_POS_0_X_2 0x24A0 /* used for glVertex3*() */
+#define R300_VAP_VTX_POS_0_Y_2 0x24A4
+#define R300_VAP_VTX_POS_0_Z_2 0x24A8
+/* write 0 to indicate end of packet? */
+#define R300_VAP_VTX_END_OF_PKT 0x24AC
+
+/* gap */
+
+/* These are values from r300_reg/r300_reg.h - they are known to be correct
+ * and are here so we can use one register file instead of several
+ * - Vladimir
+ */
+#define R300_GB_VAP_RASTER_VTX_FMT_0 0x4000
+# define R300_GB_VAP_RASTER_VTX_FMT_0__POS_PRESENT (1<<0)
+# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_0_PRESENT (1<<1)
+# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_1_PRESENT (1<<2)
+# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_2_PRESENT (1<<3)
+# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_3_PRESENT (1<<4)
+# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_SPACE (0xf<<5)
+# define R300_GB_VAP_RASTER_VTX_FMT_0__PT_SIZE_PRESENT (0x1<<16)
+
+#define R300_GB_VAP_RASTER_VTX_FMT_1 0x4004
+ /* each of the following is 3 bits wide, specifies number
+ of components */
+# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_0_COMP_CNT_SHIFT 0
+# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_1_COMP_CNT_SHIFT 3
+# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_2_COMP_CNT_SHIFT 6
+# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_3_COMP_CNT_SHIFT 9
+# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_4_COMP_CNT_SHIFT 12
+# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_5_COMP_CNT_SHIFT 15
+# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_6_COMP_CNT_SHIFT 18
+# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_7_COMP_CNT_SHIFT 21
+
+/* UNK30 seems to enables point to quad transformation on textures
+ * (or something closely related to that).
+ * This bit is rather fatal at the time being due to lackings at pixel
+ * shader side
+ * Specifies top of Raster pipe specific enable controls.
+ */
+#define R300_GB_ENABLE 0x4008
+# define R300_GB_POINT_STUFF_DISABLE (0 << 0)
+# define R300_GB_POINT_STUFF_ENABLE (1 << 0) /* Specifies if points will have stuffed texture coordinates. */
+# define R300_GB_LINE_STUFF_DISABLE (0 << 1)
+# define R300_GB_LINE_STUFF_ENABLE (1 << 1) /* Specifies if lines will have stuffed texture coordinates. */
+# define R300_GB_TRIANGLE_STUFF_DISABLE (0 << 2)
+# define R300_GB_TRIANGLE_STUFF_ENABLE (1 << 2) /* Specifies if triangles will have stuffed texture coordinates. */
+# define R300_GB_STENCIL_AUTO_DISABLE (0 << 4)
+# define R300_GB_STENCIL_AUTO_ENABLE (1 << 4) /* Enable stencil auto inc/dec based on triangle cw/ccw, force into dzy low bit. */
+# define R300_GB_STENCIL_AUTO_FORCE (2 << 4) /* Force 0 into dzy low bit. */
+
+ /* each of the following is 2 bits wide */
+#define R300_GB_TEX_REPLICATE 0 /* Replicate VAP source texture coordinates (S,T,[R,Q]). */
+#define R300_GB_TEX_ST 1 /* Stuff with source texture coordinates (S,T). */
+#define R300_GB_TEX_STR 2 /* Stuff with source texture coordinates (S,T,R). */
+# define R300_GB_TEX0_SOURCE_SHIFT 16
+# define R300_GB_TEX1_SOURCE_SHIFT 18
+# define R300_GB_TEX2_SOURCE_SHIFT 20
+# define R300_GB_TEX3_SOURCE_SHIFT 22
+# define R300_GB_TEX4_SOURCE_SHIFT 24
+# define R300_GB_TEX5_SOURCE_SHIFT 26
+# define R300_GB_TEX6_SOURCE_SHIFT 28
+# define R300_GB_TEX7_SOURCE_SHIFT 30
+
+/* MSPOS - positions for multisample antialiasing (?) */
+#define R300_GB_MSPOS0 0x4010
+ /* shifts - each of the fields is 4 bits */
+# define R300_GB_MSPOS0__MS_X0_SHIFT 0
+# define R300_GB_MSPOS0__MS_Y0_SHIFT 4
+# define R300_GB_MSPOS0__MS_X1_SHIFT 8
+# define R300_GB_MSPOS0__MS_Y1_SHIFT 12
+# define R300_GB_MSPOS0__MS_X2_SHIFT 16
+# define R300_GB_MSPOS0__MS_Y2_SHIFT 20
+# define R300_GB_MSPOS0__MSBD0_Y 24
+# define R300_GB_MSPOS0__MSBD0_X 28
+
+#define R300_GB_MSPOS1 0x4014
+# define R300_GB_MSPOS1__MS_X3_SHIFT 0
+# define R300_GB_MSPOS1__MS_Y3_SHIFT 4
+# define R300_GB_MSPOS1__MS_X4_SHIFT 8
+# define R300_GB_MSPOS1__MS_Y4_SHIFT 12
+# define R300_GB_MSPOS1__MS_X5_SHIFT 16
+# define R300_GB_MSPOS1__MS_Y5_SHIFT 20
+# define R300_GB_MSPOS1__MSBD1 24
+
+/* Specifies the graphics pipeline configuration for rasterization. */
+#define R300_GB_TILE_CONFIG 0x4018
+# define R300_GB_TILE_DISABLE (0 << 0)
+# define R300_GB_TILE_ENABLE (1 << 0)
+# define R300_GB_TILE_PIPE_COUNT_RV300 (0 << 1) /* RV350 (1 pipe, 1 ctx) */
+# define R300_GB_TILE_PIPE_COUNT_R300 (3 << 1) /* R300 (2 pipes, 1 ctx) */
+# define R300_GB_TILE_PIPE_COUNT_R420_3P (6 << 1) /* R420-3P (3 pipes, 1 ctx) */
+# define R300_GB_TILE_PIPE_COUNT_R420 (7 << 1) /* R420 (4 pipes, 1 ctx) */
+# define R300_GB_TILE_SIZE_8 (0 << 4)
+# define R300_GB_TILE_SIZE_16 (1 << 4)
+# define R300_GB_TILE_SIZE_32 (2 << 4)
+# define R300_GB_SUPER_SIZE_1 (0 << 6)
+# define R300_GB_SUPER_SIZE_2 (1 << 6)
+# define R300_GB_SUPER_SIZE_4 (2 << 6)
+# define R300_GB_SUPER_SIZE_8 (3 << 6)
+# define R300_GB_SUPER_SIZE_16 (4 << 6)
+# define R300_GB_SUPER_SIZE_32 (5 << 6)
+# define R300_GB_SUPER_SIZE_64 (6 << 6)
+# define R300_GB_SUPER_SIZE_128 (7 << 6)
+# define R300_GB_SUPER_X_SHIFT 9 /* 3 bits wide */
+# define R300_GB_SUPER_Y_SHIFT 12 /* 3 bits wide */
+# define R300_GB_SUPER_TILE_A (0 << 15)
+# define R300_GB_SUPER_TILE_B (1 << 15)
+# define R300_GB_SUBPIXEL_1_12 (0 << 16)
+# define R300_GB_SUBPIXEL_1_16 (1 << 16)
+# define GB_TILE_CONFIG_QUADS_PER_RAS_4 (0 << 17)
+# define GB_TILE_CONFIG_QUADS_PER_RAS_8 (1 << 17)
+# define GB_TILE_CONFIG_QUADS_PER_RAS_16 (2 << 17)
+# define GB_TILE_CONFIG_QUADS_PER_RAS_32 (3 << 17)
+# define GB_TILE_CONFIG_BB_SCAN_INTERCEPT (0 << 19)
+# define GB_TILE_CONFIG_BB_SCAN_BOUND_BOX (1 << 19)
+# define GB_TILE_CONFIG_ALT_SCAN_EN_LR (0 << 20)
+# define GB_TILE_CONFIG_ALT_SCAN_EN_LRL (1 << 20)
+# define GB_TILE_CONFIG_ALT_OFFSET (0 << 21)
+# define GB_TILE_CONFIG_SUBPRECISION (0 << 22)
+# define GB_TILE_CONFIG_ALT_TILING_DEF (0 << 23)
+# define GB_TILE_CONFIG_ALT_TILING_3_2 (1 << 23)
+# define GB_TILE_CONFIG_Z_EXTENDED_24_1 (0 << 24)
+# define GB_TILE_CONFIG_Z_EXTENDED_S25_1 (1 << 24)
+
+/* Specifies the sizes of the various FIFO`s in the sc/rs/us. This register must be the first one written */
+#define R300_GB_FIFO_SIZE 0x4024
+ /* each of the following is 2 bits wide */
+#define R300_GB_FIFO_SIZE_32 0
+#define R300_GB_FIFO_SIZE_64 1
+#define R300_GB_FIFO_SIZE_128 2
+#define R300_GB_FIFO_SIZE_256 3
+# define R300_SC_IFIFO_SIZE_SHIFT 0
+# define R300_SC_TZFIFO_SIZE_SHIFT 2
+# define R300_SC_BFIFO_SIZE_SHIFT 4
+
+# define R300_US_OFIFO_SIZE_SHIFT 12
+# define R300_US_WFIFO_SIZE_SHIFT 14
+ /* the following use the same constants as above, but meaning is
+ is times 2 (i.e. instead of 32 words it means 64 */
+# define R300_RS_TFIFO_SIZE_SHIFT 6
+# define R300_RS_CFIFO_SIZE_SHIFT 8
+# define R300_US_RAM_SIZE_SHIFT 10
+ /* watermarks, 3 bits wide */
+# define R300_RS_HIGHWATER_COL_SHIFT 16
+# define R300_RS_HIGHWATER_TEX_SHIFT 19
+# define R300_OFIFO_HIGHWATER_SHIFT 22 /* two bits only */
+# define R300_CUBE_FIFO_HIGHWATER_COL_SHIFT 24
+
+#define GB_Z_PEQ_CONFIG 0x4028
+# define GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_4_4 (0 << 0)
+# define GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8 (1 << 0)
+
+/* Specifies various polygon specific selects (fog, depth, perspective). */
+#define R300_GB_SELECT 0x401c
+# define R300_GB_FOG_SELECT_C0A (0 << 0)
+# define R300_GB_FOG_SELECT_C1A (1 << 0)
+# define R300_GB_FOG_SELECT_C2A (2 << 0)
+# define R300_GB_FOG_SELECT_C3A (3 << 0)
+# define R300_GB_FOG_SELECT_1_1_W (4 << 0)
+# define R300_GB_FOG_SELECT_Z (5 << 0)
+# define R300_GB_DEPTH_SELECT_Z (0 << 3)
+# define R300_GB_DEPTH_SELECT_1_1_W (1 << 3)
+# define R300_GB_W_SELECT_1_W (0 << 4)
+# define R300_GB_W_SELECT_1 (1 << 4)
+# define R300_GB_FOG_STUFF_DISABLE (0 << 5)
+# define R300_GB_FOG_STUFF_ENABLE (1 << 5)
+# define R300_GB_FOG_STUFF_TEX_SHIFT 6
+# define R300_GB_FOG_STUFF_TEX_MASK 0x000003c0
+# define R300_GB_FOG_STUFF_COMP_SHIFT 10
+# define R300_GB_FOG_STUFF_COMP_MASK 0x00000c00
+
+/* Specifies the graphics pipeline configuration for antialiasing. */
+#define GB_AA_CONFIG 0x4020
+# define GB_AA_CONFIG_AA_DISABLE (0 << 0)
+# define GB_AA_CONFIG_AA_ENABLE (1 << 0)
+# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2 (0 << 1)
+# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3 (1 << 1)
+# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4 (2 << 1)
+# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6 (3 << 1)
+
+/* Selects which of 4 pipes are active. */
+#define GB_PIPE_SELECT 0x402c
+# define GB_PIPE_SELECT_PIPE0_ID_SHIFT 0
+# define GB_PIPE_SELECT_PIPE1_ID_SHIFT 2
+# define GB_PIPE_SELECT_PIPE2_ID_SHIFT 4
+# define GB_PIPE_SELECT_PIPE3_ID_SHIFT 6
+# define GB_PIPE_SELECT_PIPE_MASK_SHIFT 8
+# define GB_PIPE_SELECT_MAX_PIPE 12
+# define GB_PIPE_SELECT_BAD_PIPES 14
+# define GB_PIPE_SELECT_CONFIG_PIPES 18
+
+
+/* Specifies the sizes of the various FIFO`s in the sc/rs. */
+#define GB_FIFO_SIZE1 0x4070
+/* High water mark for SC input fifo */
+# define GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_SHIFT 0
+# define GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_MASK 0x0000003f
+/* High water mark for SC input fifo (B) */
+# define GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_SHIFT 6
+# define GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_MASK 0x00000fc0
+/* High water mark for RS colors' fifo */
+# define GB_FIFO_SIZE1_SC_HIGHWATER_COL_SHIFT 12
+# define GB_FIFO_SIZE1_SC_HIGHWATER_COL_MASK 0x0003f000
+/* High water mark for RS textures' fifo */
+# define GB_FIFO_SIZE1_SC_HIGHWATER_TEX_SHIFT 18
+# define GB_FIFO_SIZE1_SC_HIGHWATER_TEX_MASK 0x00fc0000
+
+/* This table specifies the source location and format for up to 16 texture
+ * addresses (i[0]:i[15]) and four colors (c[0]:c[3])
+ */
+#define R500_RS_IP_0 0x4074
+#define R500_RS_IP_1 0x4078
+#define R500_RS_IP_2 0x407C
+#define R500_RS_IP_3 0x4080
+#define R500_RS_IP_4 0x4084
+#define R500_RS_IP_5 0x4088
+#define R500_RS_IP_6 0x408C
+#define R500_RS_IP_7 0x4090
+#define R500_RS_IP_8 0x4094
+#define R500_RS_IP_9 0x4098
+#define R500_RS_IP_10 0x409C
+#define R500_RS_IP_11 0x40A0
+#define R500_RS_IP_12 0x40A4
+#define R500_RS_IP_13 0x40A8
+#define R500_RS_IP_14 0x40AC
+#define R500_RS_IP_15 0x40B0
+#define R500_RS_IP_PTR_K0 62
+#define R500_RS_IP_PTR_K1 63
+#define R500_RS_IP_TEX_PTR_S_SHIFT 0
+#define R500_RS_IP_TEX_PTR_T_SHIFT 6
+#define R500_RS_IP_TEX_PTR_R_SHIFT 12
+#define R500_RS_IP_TEX_PTR_Q_SHIFT 18
+#define R500_RS_IP_COL_PTR_SHIFT 24
+#define R500_RS_IP_COL_FMT_SHIFT 27
+# define R500_RS_COL_PTR(x) ((x) << 24)
+# define R500_RS_COL_FMT(x) ((x) << 27)
+/* gap */
+#define R500_RS_IP_OFFSET_DIS (0 << 31)
+#define R500_RS_IP_OFFSET_EN (1 << 31)
+
+/* gap */
+
+/* Zero to flush caches. */
+#define R300_TX_INVALTAGS 0x4100
+#define R300_TX_FLUSH 0x0
+
+/* The upper enable bits are guessed, based on fglrx reported limits. */
+#define R300_TX_ENABLE 0x4104
+# define R300_TX_ENABLE_0 (1 << 0)
+# define R300_TX_ENABLE_1 (1 << 1)
+# define R300_TX_ENABLE_2 (1 << 2)
+# define R300_TX_ENABLE_3 (1 << 3)
+# define R300_TX_ENABLE_4 (1 << 4)
+# define R300_TX_ENABLE_5 (1 << 5)
+# define R300_TX_ENABLE_6 (1 << 6)
+# define R300_TX_ENABLE_7 (1 << 7)
+# define R300_TX_ENABLE_8 (1 << 8)
+# define R300_TX_ENABLE_9 (1 << 9)
+# define R300_TX_ENABLE_10 (1 << 10)
+# define R300_TX_ENABLE_11 (1 << 11)
+# define R300_TX_ENABLE_12 (1 << 12)
+# define R300_TX_ENABLE_13 (1 << 13)
+# define R300_TX_ENABLE_14 (1 << 14)
+# define R300_TX_ENABLE_15 (1 << 15)
+
+#define R500_TX_FILTER_4 0x4110
+# define R500_TX_WEIGHT_1_SHIFT (0)
+# define R500_TX_WEIGHT_0_SHIFT (11)
+# define R500_TX_WEIGHT_PAIR (1<<22)
+# define R500_TX_PHASE_SHIFT (23)
+# define R500_TX_DIRECTION_HORIZONTAL (0<<27)
+# define R500_TX_DIRECTION_VERITCAL (1<<27)
+
+/* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) */
+#define R300_GA_POINT_S0 0x4200
+
+/* T Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) */
+#define R300_GA_POINT_T0 0x4204
+
+/* S Texture Coordinate of Vertex 2 for Point texture stuffing (URC) */
+#define R300_GA_POINT_S1 0x4208
+
+/* T Texture Coordinate of Vertex 2 for Point texture stuffing (URC) */
+#define R300_GA_POINT_T1 0x420c
+
+/* Specifies amount to shift integer position of vertex (screen space) before
+ * converting to float for triangle stipple.
+ */
+#define R300_GA_TRIANGLE_STIPPLE 0x4214
+# define R300_GA_TRIANGLE_STIPPLE_X_SHIFT_SHIFT 0
+# define R300_GA_TRIANGLE_STIPPLE_X_SHIFT_MASK 0x0000000f
+# define R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_SHIFT 16
+# define R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_MASK 0x000f0000
+
+/* The pointsize is given in multiples of 6. The pointsize can be enormous:
+ * Clear() renders a single point that fills the entire framebuffer.
+ * 1/2 Height of point; fixed (16.0), subpixel format (1/12 or 1/16, even if in
+ * 8b precision).
+ */
+#define R300_GA_POINT_SIZE 0x421C
+# define R300_POINTSIZE_Y_SHIFT 0
+# define R300_POINTSIZE_Y_MASK 0x0000ffff
+# define R300_POINTSIZE_X_SHIFT 16
+# define R300_POINTSIZE_X_MASK 0xffff0000
+# define R300_POINTSIZE_MAX (R300_POINTSIZE_Y_MASK / 6)
+
+/* Blue fill color */
+#define R500_GA_FILL_R 0x4220
+
+/* Blue fill color */
+#define R500_GA_FILL_G 0x4224
+
+/* Blue fill color */
+#define R500_GA_FILL_B 0x4228
+
+/* Alpha fill color */
+#define R500_GA_FILL_A 0x422c
+
+
+/* Specifies maximum and minimum point & sprite sizes for per vertex size
+ * specification. The lower part (15:0) is MIN and (31:16) is max.
+ */
+#define R300_GA_POINT_MINMAX 0x4230
+# define R300_GA_POINT_MINMAX_MIN_SHIFT 0
+# define R300_GA_POINT_MINMAX_MIN_MASK (0xFFFF << 0)
+# define R300_GA_POINT_MINMAX_MAX_SHIFT 16
+# define R300_GA_POINT_MINMAX_MAX_MASK (0xFFFF << 16)
+
+/* 1/2 width of line, in subpixels (1/12 or 1/16 only, even in 8b
+ * subprecision); (16.0) fixed format.
+ *
+ * The line width is given in multiples of 6.
+ * In default mode lines are classified as vertical lines.
+ * HO: horizontal
+ * VE: vertical or horizontal
+ * HO & VE: no classification
+ */
+#define R300_GA_LINE_CNTL 0x4234
+# define R300_GA_LINE_CNTL_WIDTH_SHIFT 0
+# define R300_GA_LINE_CNTL_WIDTH_MASK 0x0000ffff
+# define R300_GA_LINE_CNTL_END_TYPE_HOR (0 << 16)
+# define R300_GA_LINE_CNTL_END_TYPE_VER (1 << 16)
+# define R300_GA_LINE_CNTL_END_TYPE_SQR (2 << 16) /* horizontal or vertical depending upon slope */
+# define R300_GA_LINE_CNTL_END_TYPE_COMP (3 << 16) /* Computed (perpendicular to slope) */
+# define R500_GA_LINE_CNTL_SORT_NO (0 << 18)
+# define R500_GA_LINE_CNTL_SORT_MINX_MINY (1 << 18)
+/** TODO: looks wrong */
+# define R300_LINESIZE_MAX (R300_GA_LINE_CNTL_WIDTH_MASK / 6)
+/** TODO: looks wrong */
+# define R300_LINE_CNT_HO (1 << 16)
+/** TODO: looks wrong */
+# define R300_LINE_CNT_VE (1 << 17)
+
+/* Line Stipple configuration information. */
+#define R300_GA_LINE_STIPPLE_CONFIG 0x4238
+# define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_NO (0 << 0)
+# define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_LINE (1 << 0)
+# define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_PACKET (2 << 0)
+# define R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_SHIFT 2
+# define R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_MASK 0xfffffffc
+
+/* Used to load US instructions and constants */
+#define R500_GA_US_VECTOR_INDEX 0x4250
+# define R500_GA_US_VECTOR_INDEX_SHIFT 0
+# define R500_GA_US_VECTOR_INDEX_MASK 0x000000ff
+# define R500_GA_US_VECTOR_INDEX_TYPE_INSTR (0 << 16)
+# define R500_GA_US_VECTOR_INDEX_TYPE_CONST (1 << 16)
+# define R500_GA_US_VECTOR_INDEX_CLAMP_NO (0 << 17)
+# define R500_GA_US_VECTOR_INDEX_CLAMP_CONST (1 << 17)
+
+/* Data register for loading US instructions and constants */
+#define R500_GA_US_VECTOR_DATA 0x4254
+
+/* Specifies color properties and mappings of textures. */
+#define R500_GA_COLOR_CONTROL_PS3 0x4258
+# define R500_TEX0_SHADING_PS3_SOLID (0 << 0)
+# define R500_TEX0_SHADING_PS3_FLAT (1 << 0)
+# define R500_TEX0_SHADING_PS3_GOURAUD (2 << 0)
+# define R500_TEX1_SHADING_PS3_SOLID (0 << 2)
+# define R500_TEX1_SHADING_PS3_FLAT (1 << 2)
+# define R500_TEX1_SHADING_PS3_GOURAUD (2 << 2)
+# define R500_TEX2_SHADING_PS3_SOLID (0 << 4)
+# define R500_TEX2_SHADING_PS3_FLAT (1 << 4)
+# define R500_TEX2_SHADING_PS3_GOURAUD (2 << 4)
+# define R500_TEX3_SHADING_PS3_SOLID (0 << 6)
+# define R500_TEX3_SHADING_PS3_FLAT (1 << 6)
+# define R500_TEX3_SHADING_PS3_GOURAUD (2 << 6)
+# define R500_TEX4_SHADING_PS3_SOLID (0 << 8)
+# define R500_TEX4_SHADING_PS3_FLAT (1 << 8)
+# define R500_TEX4_SHADING_PS3_GOURAUD (2 << 8)
+# define R500_TEX5_SHADING_PS3_SOLID (0 << 10)
+# define R500_TEX5_SHADING_PS3_FLAT (1 << 10)
+# define R500_TEX5_SHADING_PS3_GOURAUD (2 << 10)
+# define R500_TEX6_SHADING_PS3_SOLID (0 << 12)
+# define R500_TEX6_SHADING_PS3_FLAT (1 << 12)
+# define R500_TEX6_SHADING_PS3_GOURAUD (2 << 12)
+# define R500_TEX7_SHADING_PS3_SOLID (0 << 14)
+# define R500_TEX7_SHADING_PS3_FLAT (1 << 14)
+# define R500_TEX7_SHADING_PS3_GOURAUD (2 << 14)
+# define R500_TEX8_SHADING_PS3_SOLID (0 << 16)
+# define R500_TEX8_SHADING_PS3_FLAT (1 << 16)
+# define R500_TEX8_SHADING_PS3_GOURAUD (2 << 16)
+# define R500_TEX9_SHADING_PS3_SOLID (0 << 18)
+# define R500_TEX9_SHADING_PS3_FLAT (1 << 18)
+# define R500_TEX9_SHADING_PS3_GOURAUD (2 << 18)
+# define R500_TEX10_SHADING_PS3_SOLID (0 << 20)
+# define R500_TEX10_SHADING_PS3_FLAT (1 << 20)
+# define R500_TEX10_SHADING_PS3_GOURAUD (2 << 20)
+# define R500_COLOR0_TEX_OVERRIDE_NO (0 << 22)
+# define R500_COLOR0_TEX_OVERRIDE_TEX_0 (1 << 22)
+# define R500_COLOR0_TEX_OVERRIDE_TEX_1 (2 << 22)
+# define R500_COLOR0_TEX_OVERRIDE_TEX_2 (3 << 22)
+# define R500_COLOR0_TEX_OVERRIDE_TEX_3 (4 << 22)
+# define R500_COLOR0_TEX_OVERRIDE_TEX_4 (5 << 22)
+# define R500_COLOR0_TEX_OVERRIDE_TEX_5 (6 << 22)
+# define R500_COLOR0_TEX_OVERRIDE_TEX_6 (7 << 22)
+# define R500_COLOR0_TEX_OVERRIDE_TEX_7 (8 << 22)
+# define R500_COLOR0_TEX_OVERRIDE_TEX_8_C2 (9 << 22)
+# define R500_COLOR0_TEX_OVERRIDE_TEX_9_C3 (10 << 22)
+# define R500_COLOR1_TEX_OVERRIDE_NO (0 << 26)
+# define R500_COLOR1_TEX_OVERRIDE_TEX_0 (1 << 26)
+# define R500_COLOR1_TEX_OVERRIDE_TEX_1 (2 << 26)
+# define R500_COLOR1_TEX_OVERRIDE_TEX_2 (3 << 26)
+# define R500_COLOR1_TEX_OVERRIDE_TEX_3 (4 << 26)
+# define R500_COLOR1_TEX_OVERRIDE_TEX_4 (5 << 26)
+# define R500_COLOR1_TEX_OVERRIDE_TEX_5 (6 << 26)
+# define R500_COLOR1_TEX_OVERRIDE_TEX_6 (7 << 26)
+# define R500_COLOR1_TEX_OVERRIDE_TEX_7 (8 << 26)
+# define R500_COLOR1_TEX_OVERRIDE_TEX_8_C2 (9 << 26)
+# define R500_COLOR1_TEX_OVERRIDE_TEX_9_C3 (10 << 26)
+
+/* Returns idle status of various G3D block, captured when GA_IDLE written or
+ * when hard or soft reset asserted.
+ */
+#define R500_GA_IDLE 0x425c
+# define R500_GA_IDLE_PIPE3_Z_IDLE (0 << 0)
+# define R500_GA_IDLE_PIPE2_Z_IDLE (0 << 1)
+# define R500_GA_IDLE_PIPE3_CD_IDLE (0 << 2)
+# define R500_GA_IDLE_PIPE2_CD_IDLE (0 << 3)
+# define R500_GA_IDLE_PIPE3_FG_IDLE (0 << 4)
+# define R500_GA_IDLE_PIPE2_FG_IDLE (0 << 5)
+# define R500_GA_IDLE_PIPE3_US_IDLE (0 << 6)
+# define R500_GA_IDLE_PIPE2_US_IDLE (0 << 7)
+# define R500_GA_IDLE_PIPE3_SC_IDLE (0 << 8)
+# define R500_GA_IDLE_PIPE2_SC_IDLE (0 << 9)
+# define R500_GA_IDLE_PIPE3_RS_IDLE (0 << 10)
+# define R500_GA_IDLE_PIPE2_RS_IDLE (0 << 11)
+# define R500_GA_IDLE_PIPE1_Z_IDLE (0 << 12)
+# define R500_GA_IDLE_PIPE0_Z_IDLE (0 << 13)
+# define R500_GA_IDLE_PIPE1_CD_IDLE (0 << 14)
+# define R500_GA_IDLE_PIPE0_CD_IDLE (0 << 15)
+# define R500_GA_IDLE_PIPE1_FG_IDLE (0 << 16)
+# define R500_GA_IDLE_PIPE0_FG_IDLE (0 << 17)
+# define R500_GA_IDLE_PIPE1_US_IDLE (0 << 18)
+# define R500_GA_IDLE_PIPE0_US_IDLE (0 << 19)
+# define R500_GA_IDLE_PIPE1_SC_IDLE (0 << 20)
+# define R500_GA_IDLE_PIPE0_SC_IDLE (0 << 21)
+# define R500_GA_IDLE_PIPE1_RS_IDLE (0 << 22)
+# define R500_GA_IDLE_PIPE0_RS_IDLE (0 << 23)
+# define R500_GA_IDLE_SU_IDLE (0 << 24)
+# define R500_GA_IDLE_GA_IDLE (0 << 25)
+# define R500_GA_IDLE_GA_UNIT2_IDLE (0 << 26)
+
+/* Current value of stipple accumulator. */
+#define R300_GA_LINE_STIPPLE_VALUE 0x4260
+
+/* S Texture Coordinate Value for Vertex 0 of Line (stuff textures -- i.e. AA) */
+#define R300_GA_LINE_S0 0x4264
+/* S Texture Coordinate Value for Vertex 1 of Lines (V2 of parallelogram -- stuff textures -- i.e. AA) */
+#define R300_GA_LINE_S1 0x4268
+
+/* GA Input fifo high water marks */
+#define R500_GA_FIFO_CNTL 0x4270
+# define R500_GA_FIFO_CNTL_VERTEX_FIFO_MASK 0x00000007
+# define R500_GA_FIFO_CNTL_VERTEX_FIFO_SHIFT 0
+# define R500_GA_FIFO_CNTL_VERTEX_INDEX_MASK 0x00000038
+# define R500_GA_FIFO_CNTL_VERTEX_INDEX_SHIFT 3
+# define R500_GA_FIFO_CNTL_VERTEX_REG_MASK 0x00003fc0
+# define R500_GA_FIFO_CNTL_VERTEX_REG_SHIFT 6
+
+/* GA enhance/tweaks */
+#define R300_GA_ENHANCE 0x4274
+# define R300_GA_ENHANCE_DEADLOCK_CNTL_NO_EFFECT (0 << 0)
+# define R300_GA_ENHANCE_DEADLOCK_CNTL_PREVENT_TCL (1 << 0) /* Prevents TCL interface from deadlocking on GA side. */
+# define R300_GA_ENHANCE_FASTSYNC_CNTL_NO_EFFECT (0 << 1)
+# define R300_GA_ENHANCE_FASTSYNC_CNTL_ENABLE (1 << 1) /* Enables high-performance register/primitive switching. */
+# define R500_GA_ENHANCE_REG_READWRITE_NO_EFFECT (0 << 2) /* R520+ only */
+# define R500_GA_ENHANCE_REG_READWRITE_ENABLE (1 << 2) /* R520+ only, Enables GA support of simultaneous register reads and writes. */
+# define R500_GA_ENHANCE_REG_NOSTALL_NO_EFFECT (0 << 3)
+# define R500_GA_ENHANCE_REG_NOSTALL_ENABLE (1 << 3) /* Enables GA support of no-stall reads for register read back. */
+
+#define R300_GA_COLOR_CONTROL 0x4278
+# define R300_GA_COLOR_CONTROL_RGB0_SHADING_SOLID (0 << 0)
+# define R300_GA_COLOR_CONTROL_RGB0_SHADING_FLAT (1 << 0)
+# define R300_GA_COLOR_CONTROL_RGB0_SHADING_GOURAUD (2 << 0)
+# define R300_GA_COLOR_CONTROL_ALPHA0_SHADING_SOLID (0 << 2)
+# define R300_GA_COLOR_CONTROL_ALPHA0_SHADING_FLAT (1 << 2)
+# define R300_GA_COLOR_CONTROL_ALPHA0_SHADING_GOURAUD (2 << 2)
+# define R300_GA_COLOR_CONTROL_RGB1_SHADING_SOLID (0 << 4)
+# define R300_GA_COLOR_CONTROL_RGB1_SHADING_FLAT (1 << 4)
+# define R300_GA_COLOR_CONTROL_RGB1_SHADING_GOURAUD (2 << 4)
+# define R300_GA_COLOR_CONTROL_ALPHA1_SHADING_SOLID (0 << 6)
+# define R300_GA_COLOR_CONTROL_ALPHA1_SHADING_FLAT (1 << 6)
+# define R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD (2 << 6)
+# define R300_GA_COLOR_CONTROL_RGB2_SHADING_SOLID (0 << 8)
+# define R300_GA_COLOR_CONTROL_RGB2_SHADING_FLAT (1 << 8)
+# define R300_GA_COLOR_CONTROL_RGB2_SHADING_GOURAUD (2 << 8)
+# define R300_GA_COLOR_CONTROL_ALPHA2_SHADING_SOLID (0 << 10)
+# define R300_GA_COLOR_CONTROL_ALPHA2_SHADING_FLAT (1 << 10)
+# define R300_GA_COLOR_CONTROL_ALPHA2_SHADING_GOURAUD (2 << 10)
+# define R300_GA_COLOR_CONTROL_RGB3_SHADING_SOLID (0 << 12)
+# define R300_GA_COLOR_CONTROL_RGB3_SHADING_FLAT (1 << 12)
+# define R300_GA_COLOR_CONTROL_RGB3_SHADING_GOURAUD (2 << 12)
+# define R300_GA_COLOR_CONTROL_ALPHA3_SHADING_SOLID (0 << 14)
+# define R300_GA_COLOR_CONTROL_ALPHA3_SHADING_FLAT (1 << 14)
+# define R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD (2 << 14)
+# define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_FIRST (0 << 16)
+# define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_SECOND (1 << 16)
+# define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_THIRD (2 << 16)
+# define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST (3 << 16)
+
+/** TODO: might be candidate for removal */
+# define R300_RE_SHADE_MODEL_SMOOTH ( \
+ R300_GA_COLOR_CONTROL_RGB0_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA0_SHADING_GOURAUD | \
+ R300_GA_COLOR_CONTROL_RGB1_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD | \
+ R300_GA_COLOR_CONTROL_RGB2_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA2_SHADING_GOURAUD | \
+ R300_GA_COLOR_CONTROL_RGB3_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD)
+/** TODO: might be candidate for removal, the GOURAUD stuff also looks buggy to me */
+# define R300_RE_SHADE_MODEL_FLAT ( \
+ R300_GA_COLOR_CONTROL_RGB0_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA0_SHADING_FLAT | \
+ R300_GA_COLOR_CONTROL_RGB1_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD | \
+ R300_GA_COLOR_CONTROL_RGB2_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA2_SHADING_FLAT | \
+ R300_GA_COLOR_CONTROL_RGB3_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD)
+
+/* Specifies red & green components of fill color -- S312 format -- Backwards comp. */
+#define R300_GA_SOLID_RG 0x427c
+# define GA_SOLID_RG_COLOR_GREEN_SHIFT 0
+# define GA_SOLID_RG_COLOR_GREEN_MASK 0x0000ffff
+# define GA_SOLID_RG_COLOR_RED_SHIFT 16
+# define GA_SOLID_RG_COLOR_RED_MASK 0xffff0000
+/* Specifies blue & alpha components of fill color -- S312 format -- Backwards comp. */
+#define R300_GA_SOLID_BA 0x4280
+# define GA_SOLID_BA_COLOR_ALPHA_SHIFT 0
+# define GA_SOLID_BA_COLOR_ALPHA_MASK 0x0000ffff
+# define GA_SOLID_BA_COLOR_BLUE_SHIFT 16
+# define GA_SOLID_BA_COLOR_BLUE_MASK 0xffff0000
+
+/* Polygon Mode
+ * Dangerous
+ */
+#define R300_GA_POLY_MODE 0x4288
+# define R300_GA_POLY_MODE_DISABLE (0 << 0)
+# define R300_GA_POLY_MODE_DUAL (1 << 0) /* send 2 sets of 3 polys with specified poly type */
+/* reserved */
+# define R300_GA_POLY_MODE_FRONT_PTYPE_POINT (0 << 4)
+# define R300_GA_POLY_MODE_FRONT_PTYPE_LINE (1 << 4)
+# define R300_GA_POLY_MODE_FRONT_PTYPE_TRI (2 << 4)
+/* reserved */
+# define R300_GA_POLY_MODE_BACK_PTYPE_POINT (0 << 7)
+# define R300_GA_POLY_MODE_BACK_PTYPE_LINE (1 << 7)
+# define R300_GA_POLY_MODE_BACK_PTYPE_TRI (2 << 7)
+/* reserved */
+
+/* Specifies the rouding mode for geometry & color SPFP to FP conversions. */
+#define R300_GA_ROUND_MODE 0x428c
+# define R300_GA_ROUND_MODE_GEOMETRY_ROUND_TRUNC (0 << 0)
+# define R300_GA_ROUND_MODE_GEOMETRY_ROUND_NEAREST (1 << 0)
+# define R300_GA_ROUND_MODE_COLOR_ROUND_TRUNC (0 << 2)
+# define R300_GA_ROUND_MODE_COLOR_ROUND_NEAREST (1 << 2)
+# define R300_GA_ROUND_MODE_RGB_CLAMP_RGB (0 << 4)
+# define R300_GA_ROUND_MODE_RGB_CLAMP_FP20 (1 << 4)
+# define R300_GA_ROUND_MODE_ALPHA_CLAMP_RGB (0 << 5)
+# define R300_GA_ROUND_MODE_ALPHA_CLAMP_FP20 (1 << 5)
+# define R500_GA_ROUND_MODE_GEOMETRY_MASK_SHIFT 6
+# define R500_GA_ROUND_MODE_GEOMETRY_MASK_MASK 0x000003c0
+
+/* Specifies x & y offsets for vertex data after conversion to FP.
+ * Offsets are in S15 format (subpixels -- 1/12 or 1/16, even in 8b
+ * subprecision).
+ */
+#define R300_GA_OFFSET 0x4290
+# define R300_GA_OFFSET_X_OFFSET_SHIFT 0
+# define R300_GA_OFFSET_X_OFFSET_MASK 0x0000ffff
+# define R300_GA_OFFSET_Y_OFFSET_SHIFT 16
+# define R300_GA_OFFSET_Y_OFFSET_MASK 0xffff0000
+
+/* Specifies the scale to apply to fog. */
+#define R300_GA_FOG_SCALE 0x4294
+/* Specifies the offset to apply to fog. */
+#define R300_GA_FOG_OFFSET 0x4298
+/* Specifies number of cycles to assert reset, and also causes RB3D soft reset to assert. */
+#define R300_GA_SOFT_RESET 0x429c
+
+/* Not sure why there are duplicate of factor and constant values.
+ * My best guess so far is that there are seperate zbiases for test and write.
+ * Ordering might be wrong.
+ * Some of the tests indicate that fgl has a fallback implementation of zbias
+ * via pixel shaders.
+ */
+#define R300_SU_TEX_WRAP 0x42A0
+#define R300_SU_POLY_OFFSET_FRONT_SCALE 0x42A4
+#define R300_SU_POLY_OFFSET_FRONT_OFFSET 0x42A8
+#define R300_SU_POLY_OFFSET_BACK_SCALE 0x42AC
+#define R300_SU_POLY_OFFSET_BACK_OFFSET 0x42B0
+
+/* This register needs to be set to (1<<1) for RV350 to correctly
+ * perform depth test (see --vb-triangles in r300_demo)
+ * Don't know about other chips. - Vladimir
+ * This is set to 3 when GL_POLYGON_OFFSET_FILL is on.
+ * My guess is that there are two bits for each zbias primitive
+ * (FILL, LINE, POINT).
+ * One to enable depth test and one for depth write.
+ * Yet this doesnt explain why depth writes work ...
+ */
+#define R300_SU_POLY_OFFSET_ENABLE 0x42B4
+# define R300_FRONT_ENABLE (1 << 0)
+# define R300_BACK_ENABLE (1 << 1)
+# define R300_PARA_ENABLE (1 << 2)
+
+#define R300_SU_CULL_MODE 0x42B8
+# define R300_CULL_FRONT (1 << 0)
+# define R300_CULL_BACK (1 << 1)
+# define R300_FRONT_FACE_CCW (0 << 2)
+# define R300_FRONT_FACE_CW (1 << 2)
+
+/* SU Depth Scale value */
+#define R300_SU_DEPTH_SCALE 0x42c0
+/* SU Depth Offset value */
+#define R300_SU_DEPTH_OFFSET 0x42c4
+
+#define R300_SU_REG_DEST 0x42c8
+# define R300_RASTER_PIPE_SELECT_0 (1 << 0)
+# define R300_RASTER_PIPE_SELECT_1 (1 << 1)
+# define R300_RASTER_PIPE_SELECT_2 (1 << 2)
+# define R300_RASTER_PIPE_SELECT_3 (1 << 3)
+# define R300_RASTER_PIPE_SELECT_ALL 0xf
+
+
+/* BEGIN: Rasterization / Interpolators - many guesses */
+
+/*
+ * TC_CNT is the number of incoming texture coordinate sets (i.e. it depends
+ * on the vertex program, *not* the fragment program)
+ */
+#define R300_RS_COUNT 0x4300
+# define R300_IT_COUNT_SHIFT 0
+# define R300_IT_COUNT_MASK 0x0000007f
+# define R300_IC_COUNT_SHIFT 7
+# define R300_IC_COUNT_MASK 0x00000780
+# define R300_W_ADDR_SHIFT 12
+# define R300_W_ADDR_MASK 0x0003f000
+# define R300_HIRES_DIS (0 << 18)
+# define R300_HIRES_EN (1 << 18)
+
+#define R300_RS_INST_COUNT 0x4304
+# define R300_RS_INST_COUNT_SHIFT 0
+# define R300_RS_INST_COUNT_MASK 0x0000000f
+# define R300_RS_TX_OFFSET_SHIFT 5
+# define R300_RS_TX_OFFSET_MASK 0x000000e0
+
+/* gap */
+
+/* Only used for texture coordinates.
+ * Use the source field to route texture coordinate input from the
+ * vertex program to the desired interpolator. Note that the source
+ * field is relative to the outputs the vertex program *actually*
+ * writes. If a vertex program only writes texcoord[1], this will
+ * be source index 0.
+ * Set INTERP_USED on all interpolators that produce data used by
+ * the fragment program. INTERP_USED looks like a swizzling mask,
+ * but I haven't seen it used that way.
+ *
+ * Note: The _UNKNOWN constants are always set in their respective
+ * register. I don't know if this is necessary.
+ */
+#define R300_RS_IP_0 0x4310
+#define R300_RS_IP_1 0x4314
+#define R300_RS_IP_2 0x4318
+#define R300_RS_IP_3 0x431C
+# define R300_RS_INTERP_SRC_SHIFT 2 /* TODO: check for removal */
+# define R300_RS_INTERP_SRC_MASK (7 << 2) /* TODO: check for removal */
+# define R300_RS_TEX_PTR(x) ((x) << 0)
+# define R300_RS_COL_PTR(x) ((x) << 6)
+# define R300_RS_COL_FMT(x) ((x) << 9)
+# define R300_RS_COL_FMT_RGBA 0
+# define R300_RS_COL_FMT_RGB0 1
+# define R300_RS_COL_FMT_RGB1 2
+# define R300_RS_COL_FMT_000A 4
+# define R300_RS_COL_FMT_0000 5
+# define R300_RS_COL_FMT_0001 6
+# define R300_RS_COL_FMT_111A 8
+# define R300_RS_COL_FMT_1110 9
+# define R300_RS_COL_FMT_1111 10
+# define R300_RS_SEL_S(x) ((x) << 13)
+# define R300_RS_SEL_T(x) ((x) << 16)
+# define R300_RS_SEL_R(x) ((x) << 19)
+# define R300_RS_SEL_Q(x) ((x) << 22)
+# define R300_RS_SEL_C0 0
+# define R300_RS_SEL_C1 1
+# define R300_RS_SEL_C2 2
+# define R300_RS_SEL_C3 3
+# define R300_RS_SEL_K0 4
+# define R300_RS_SEL_K1 5
+
+
+/* */
+#define R500_RS_INST_0 0x4320
+#define R500_RS_INST_1 0x4324
+#define R500_RS_INST_2 0x4328
+#define R500_RS_INST_3 0x432c
+#define R500_RS_INST_4 0x4330
+#define R500_RS_INST_5 0x4334
+#define R500_RS_INST_6 0x4338
+#define R500_RS_INST_7 0x433c
+#define R500_RS_INST_8 0x4340
+#define R500_RS_INST_9 0x4344
+#define R500_RS_INST_10 0x4348
+#define R500_RS_INST_11 0x434c
+#define R500_RS_INST_12 0x4350
+#define R500_RS_INST_13 0x4354
+#define R500_RS_INST_14 0x4358
+#define R500_RS_INST_15 0x435c
+#define R500_RS_INST_TEX_ID_SHIFT 0
+#define R500_RS_INST_TEX_CN_WRITE (1 << 4)
+#define R500_RS_INST_TEX_ADDR_SHIFT 5
+#define R500_RS_INST_COL_ID_SHIFT 12
+#define R500_RS_INST_COL_CN_NO_WRITE (0 << 16)
+#define R500_RS_INST_COL_CN_WRITE (1 << 16)
+#define R500_RS_INST_COL_CN_WRITE_FBUFFER (2 << 16)
+#define R500_RS_INST_COL_CN_WRITE_BACKFACE (3 << 16)
+#define R500_RS_INST_COL_ADDR_SHIFT 18
+#define R500_RS_INST_TEX_ADJ (1 << 25)
+#define R500_RS_INST_W_CN (1 << 26)
+#define R500_RS_INST_TEX_ID(x) ((x) << R500_RS_INST_TEX_ID_SHIFT)
+#define R500_RS_INST_TEX_ADDR(x) ((x) << R500_RS_INST_TEX_ADDR_SHIFT)
+#define R500_RS_INST_COL_ID(x) ((x) << R500_RS_INST_COL_ID_SHIFT)
+#define R500_RS_INST_COL_ADDR(x) ((x) << R500_RS_INST_COL_ADDR_SHIFT)
+
+/* These DWORDs control how vertex data is routed into fragment program
+ * registers, after interpolators.
+ */
+#define R300_RS_INST_0 0x4330
+#define R300_RS_INST_1 0x4334
+#define R300_RS_INST_2 0x4338
+#define R300_RS_INST_3 0x433C /* GUESS */
+#define R300_RS_INST_4 0x4340 /* GUESS */
+#define R300_RS_INST_5 0x4344 /* GUESS */
+#define R300_RS_INST_6 0x4348 /* GUESS */
+#define R300_RS_INST_7 0x434C /* GUESS */
+# define R300_RS_INST_TEX_ID(x) ((x) << 0)
+# define R300_RS_INST_TEX_CN_WRITE (1 << 3)
+# define R300_RS_INST_TEX_ADDR_SHIFT 6
+# define R300_RS_INST_TEX_ADDR(x) ((x) << R300_RS_INST_TEX_ADDR_SHIFT)
+# define R300_RS_INST_COL_ID(x) ((x) << 11)
+# define R300_RS_INST_COL_CN_WRITE (1 << 14)
+# define R300_RS_INST_COL_ADDR_SHIFT 17
+# define R300_RS_INST_COL_ADDR(x) ((x) << R300_RS_INST_COL_ADDR_SHIFT)
+# define R300_RS_INST_TEX_ADJ (1 << 22)
+# define R300_RS_COL_BIAS_UNUSED_SHIFT 23
+
+/* END: Rasterization / Interpolators - many guesses */
+
+/* Hierarchical Z Enable */
+#define R300_SC_HYPERZ 0x43a4
+# define R300_SC_HYPERZ_DISABLE (0 << 0)
+# define R300_SC_HYPERZ_ENABLE (1 << 0)
+# define R300_SC_HYPERZ_MIN (0 << 1)
+# define R300_SC_HYPERZ_MAX (1 << 1)
+# define R300_SC_HYPERZ_ADJ_256 (0 << 2)
+# define R300_SC_HYPERZ_ADJ_128 (1 << 2)
+# define R300_SC_HYPERZ_ADJ_64 (2 << 2)
+# define R300_SC_HYPERZ_ADJ_32 (3 << 2)
+# define R300_SC_HYPERZ_ADJ_16 (4 << 2)
+# define R300_SC_HYPERZ_ADJ_8 (5 << 2)
+# define R300_SC_HYPERZ_ADJ_4 (6 << 2)
+# define R300_SC_HYPERZ_ADJ_2 (7 << 2)
+# define R300_SC_HYPERZ_HZ_Z0MIN_NO (0 << 5)
+# define R300_SC_HYPERZ_HZ_Z0MIN (1 << 5)
+# define R300_SC_HYPERZ_HZ_Z0MAX_NO (0 << 6)
+# define R300_SC_HYPERZ_HZ_Z0MAX (1 << 6)
+
+#define R300_SC_EDGERULE 0x43a8
+
+/* BEGIN: Scissors and cliprects */
+
+/* There are four clipping rectangles. Their corner coordinates are inclusive.
+ * Every pixel is assigned a number from 0 and 15 by setting bits 0-3 depending
+ * on whether the pixel is inside cliprects 0-3, respectively. For example,
+ * if a pixel is inside cliprects 0 and 1, but outside 2 and 3, it is assigned
+ * the number 3 (binary 0011).
+ * Iff the bit corresponding to the pixel's number in RE_CLIPRECT_CNTL is set,
+ * the pixel is rasterized.
+ *
+ * In addition to this, there is a scissors rectangle. Only pixels inside the
+ * scissors rectangle are drawn. (coordinates are inclusive)
+ *
+ * For some reason, the top-left corner of the framebuffer is at (1440, 1440)
+ * for the purpose of clipping and scissors.
+ */
+#define R300_SC_CLIPRECT_TL_0 0x43B0
+#define R300_SC_CLIPRECT_BR_0 0x43B4
+#define R300_SC_CLIPRECT_TL_1 0x43B8
+#define R300_SC_CLIPRECT_BR_1 0x43BC
+#define R300_SC_CLIPRECT_TL_2 0x43C0
+#define R300_SC_CLIPRECT_BR_2 0x43C4
+#define R300_SC_CLIPRECT_TL_3 0x43C8
+#define R300_SC_CLIPRECT_BR_3 0x43CC
+# define R300_CLIPRECT_OFFSET 1440
+# define R300_CLIPRECT_MASK 0x1FFF
+# define R300_CLIPRECT_X_SHIFT 0
+# define R300_CLIPRECT_X_MASK (0x1FFF << 0)
+# define R300_CLIPRECT_Y_SHIFT 13
+# define R300_CLIPRECT_Y_MASK (0x1FFF << 13)
+#define R300_SC_CLIP_RULE 0x43D0
+# define R300_CLIP_OUT (1 << 0)
+# define R300_CLIP_0 (1 << 1)
+# define R300_CLIP_1 (1 << 2)
+# define R300_CLIP_10 (1 << 3)
+# define R300_CLIP_2 (1 << 4)
+# define R300_CLIP_20 (1 << 5)
+# define R300_CLIP_21 (1 << 6)
+# define R300_CLIP_210 (1 << 7)
+# define R300_CLIP_3 (1 << 8)
+# define R300_CLIP_30 (1 << 9)
+# define R300_CLIP_31 (1 << 10)
+# define R300_CLIP_310 (1 << 11)
+# define R300_CLIP_32 (1 << 12)
+# define R300_CLIP_320 (1 << 13)
+# define R300_CLIP_321 (1 << 14)
+# define R300_CLIP_3210 (1 << 15)
+
+/* gap */
+
+#define R300_SC_SCISSORS_TL 0x43E0
+#define R300_SC_SCISSORS_BR 0x43E4
+# define R300_SCISSORS_OFFSET 1440
+# define R300_SCISSORS_X_SHIFT 0
+# define R300_SCISSORS_X_MASK (0x1FFF << 0)
+# define R300_SCISSORS_Y_SHIFT 13
+# define R300_SCISSORS_Y_MASK (0x1FFF << 13)
+
+/* Screen door sample mask */
+#define R300_SC_SCREENDOOR 0x43e8
+
+/* END: Scissors and cliprects */
+
+/* BEGIN: Texture specification */
+
+/*
+ * The texture specification dwords are grouped by meaning and not by texture
+ * unit. This means that e.g. the offset for texture image unit N is found in
+ * register TX_OFFSET_0 + (4*N)
+ */
+#define R300_TX_FILTER0_0 0x4400
+#define R300_TX_FILTER0_1 0x4404
+#define R300_TX_FILTER0_2 0x4408
+#define R300_TX_FILTER0_3 0x440c
+#define R300_TX_FILTER0_4 0x4410
+#define R300_TX_FILTER0_5 0x4414
+#define R300_TX_FILTER0_6 0x4418
+#define R300_TX_FILTER0_7 0x441c
+#define R300_TX_FILTER0_8 0x4420
+#define R300_TX_FILTER0_9 0x4424
+#define R300_TX_FILTER0_10 0x4428
+#define R300_TX_FILTER0_11 0x442c
+#define R300_TX_FILTER0_12 0x4430
+#define R300_TX_FILTER0_13 0x4434
+#define R300_TX_FILTER0_14 0x4438
+#define R300_TX_FILTER0_15 0x443c
+# define R300_TX_REPEAT 0
+# define R300_TX_MIRRORED 1
+# define R300_TX_CLAMP_TO_EDGE 2
+# define R300_TX_MIRROR_ONCE_TO_EDGE 3
+# define R300_TX_CLAMP 4
+# define R300_TX_MIRROR_ONCE 5
+# define R300_TX_CLAMP_TO_BORDER 6
+# define R300_TX_MIRROR_ONCE_TO_BORDER 7
+# define R300_TX_WRAP_S_SHIFT 0
+# define R300_TX_WRAP_S_MASK (7 << 0)
+# define R300_TX_WRAP_T_SHIFT 3
+# define R300_TX_WRAP_T_MASK (7 << 3)
+# define R300_TX_WRAP_R_SHIFT 6
+# define R300_TX_WRAP_R_MASK (7 << 6)
+# define R300_TX_MAG_FILTER_4 (0 << 9)
+# define R300_TX_MAG_FILTER_NEAREST (1 << 9)
+# define R300_TX_MAG_FILTER_LINEAR (2 << 9)
+# define R300_TX_MAG_FILTER_ANISO (3 << 9)
+# define R300_TX_MAG_FILTER_MASK (3 << 9)
+# define R300_TX_MIN_FILTER_NEAREST (1 << 11)
+# define R300_TX_MIN_FILTER_LINEAR (2 << 11)
+# define R300_TX_MIN_FILTER_ANISO (3 << 11)
+# define R300_TX_MIN_FILTER_MASK (3 << 11)
+# define R300_TX_MIN_FILTER_MIP_NONE (0 << 13)
+# define R300_TX_MIN_FILTER_MIP_NEAREST (1 << 13)
+# define R300_TX_MIN_FILTER_MIP_LINEAR (2 << 13)
+# define R300_TX_MIN_FILTER_MIP_MASK (3 << 13)
+# define R300_TX_MAX_ANISO_1_TO_1 (0 << 21)
+# define R300_TX_MAX_ANISO_2_TO_1 (1 << 21)
+# define R300_TX_MAX_ANISO_4_TO_1 (2 << 21)
+# define R300_TX_MAX_ANISO_8_TO_1 (3 << 21)
+# define R300_TX_MAX_ANISO_16_TO_1 (4 << 21)
+# define R300_TX_MAX_ANISO_MASK (7 << 21)
+
+#define R300_TX_FILTER1_0 0x4440
+# define R300_CHROMA_KEY_MODE_DISABLE 0
+# define R300_CHROMA_KEY_FORCE 1
+# define R300_CHROMA_KEY_BLEND 2
+# define R300_MC_ROUND_NORMAL (0<<2)
+# define R300_MC_ROUND_MPEG4 (1<<2)
+# define R300_LOD_BIAS_SHIFT 3
+# define R300_LOD_BIAS_MASK 0x1ff8
+# define R300_EDGE_ANISO_EDGE_DIAG (0<<13)
+# define R300_EDGE_ANISO_EDGE_ONLY (1<<13)
+# define R300_MC_COORD_TRUNCATE_DISABLE (0<<14)
+# define R300_MC_COORD_TRUNCATE_MPEG (1<<14)
+# define R300_TX_TRI_PERF_0_8 (0<<15)
+# define R300_TX_TRI_PERF_1_8 (1<<15)
+# define R300_TX_TRI_PERF_1_4 (2<<15)
+# define R300_TX_TRI_PERF_3_8 (3<<15)
+# define R300_ANISO_THRESHOLD_MASK (7<<17)
+
+# define R500_MACRO_SWITCH (1<<22)
+# define R500_BORDER_FIX (1<<31)
+
+#define R300_TX_SIZE_0 0x4480
+# define R300_TX_WIDTHMASK_SHIFT 0
+# define R300_TX_WIDTHMASK_MASK (2047 << 0)
+# define R300_TX_HEIGHTMASK_SHIFT 11
+# define R300_TX_HEIGHTMASK_MASK (2047 << 11)
+# define R300_TX_DEPTHMASK_SHIFT 22
+# define R300_TX_DEPTHMASK_MASK (0xf << 22)
+# define R300_TX_MAX_MIP_LEVEL_SHIFT 26
+# define R300_TX_MAX_MIP_LEVEL_MASK (0xf << 26)
+# define R300_TX_SIZE_PROJECTED (1<<30)
+# define R300_TX_SIZE_TXPITCH_EN (1<<31)
+#define R300_TX_FORMAT_0 0x44C0
+ /* The interpretation of the format word by Wladimir van der Laan */
+ /* The X, Y, Z and W refer to the layout of the components.
+ They are given meanings as R, G, B and Alpha by the swizzle
+ specification */
+# define R300_TX_FORMAT_X8 0x0
+# define R500_TX_FORMAT_X1 0x0 // bit set in format 2
+# define R300_TX_FORMAT_X16 0x1
+# define R500_TX_FORMAT_X1_REV 0x0 // bit set in format 2
+# define R300_TX_FORMAT_Y4X4 0x2
+# define R300_TX_FORMAT_Y8X8 0x3
+# define R300_TX_FORMAT_Y16X16 0x4
+# define R300_TX_FORMAT_Z3Y3X2 0x5
+# define R300_TX_FORMAT_Z5Y6X5 0x6
+# define R300_TX_FORMAT_Z6Y5X5 0x7
+# define R300_TX_FORMAT_Z11Y11X10 0x8
+# define R300_TX_FORMAT_Z10Y11X11 0x9
+# define R300_TX_FORMAT_W4Z4Y4X4 0xA
+# define R300_TX_FORMAT_W1Z5Y5X5 0xB
+# define R300_TX_FORMAT_W8Z8Y8X8 0xC
+# define R300_TX_FORMAT_W2Z10Y10X10 0xD
+# define R300_TX_FORMAT_W16Z16Y16X16 0xE
+# define R300_TX_FORMAT_DXT1 0xF
+# define R300_TX_FORMAT_DXT3 0x10
+# define R300_TX_FORMAT_DXT5 0x11
+# define R300_TX_FORMAT_D3DMFT_CxV8U8 0x12 /* no swizzle */
+# define R300_TX_FORMAT_A8R8G8B8 0x13 /* no swizzle */
+# define R300_TX_FORMAT_B8G8_B8G8 0x14 /* no swizzle */
+# define R300_TX_FORMAT_G8R8_G8B8 0x15 /* no swizzle */
+
+ /* These two values are wrong, but they're the only values that
+ * produce any even vaguely correct results. Can r300 only do 16-bit
+ * depth textures?
+ */
+# define R300_TX_FORMAT_X24_Y8 0x1e
+# define R300_TX_FORMAT_X32 0x1e
+
+ /* 0x16 - some 16 bit green format.. ?? */
+# define R300_TX_FORMAT_3D (1 << 25)
+# define R300_TX_FORMAT_CUBIC_MAP (2 << 25)
+
+# define R300_TX_FORMAT_GAMMA (1 << 21)
+
+ /* gap */
+ /* Floating point formats */
+ /* Note - hardware supports both 16 and 32 bit floating point */
+# define R300_TX_FORMAT_FL_I16 0x18
+# define R300_TX_FORMAT_FL_I16A16 0x19
+# define R300_TX_FORMAT_FL_R16G16B16A16 0x1A
+# define R300_TX_FORMAT_FL_I32 0x1B
+# define R300_TX_FORMAT_FL_I32A32 0x1C
+# define R300_TX_FORMAT_FL_R32G32B32A32 0x1D
+ /* alpha modes, convenience mostly */
+ /* if you have alpha, pick constant appropriate to the
+ number of channels (1 for I8, 2 for I8A8, 4 for R8G8B8A8, etc */
+# define R300_TX_FORMAT_ALPHA_1CH 0x000
+# define R300_TX_FORMAT_ALPHA_2CH 0x200
+# define R300_TX_FORMAT_ALPHA_4CH 0x600
+# define R300_TX_FORMAT_ALPHA_NONE 0xA00
+ /* Swizzling */
+ /* constants */
+# define R300_TX_FORMAT_X 0
+# define R300_TX_FORMAT_Y 1
+# define R300_TX_FORMAT_Z 2
+# define R300_TX_FORMAT_W 3
+# define R300_TX_FORMAT_ZERO 4
+# define R300_TX_FORMAT_ONE 5
+ /* 2.0*Z, everything above 1.0 is set to 0.0 */
+# define R300_TX_FORMAT_CUT_Z 6
+ /* 2.0*W, everything above 1.0 is set to 0.0 */
+# define R300_TX_FORMAT_CUT_W 7
+
+# define R300_TX_FORMAT_B_SHIFT 18
+# define R300_TX_FORMAT_G_SHIFT 15
+# define R300_TX_FORMAT_R_SHIFT 12
+# define R300_TX_FORMAT_A_SHIFT 9
+ /* Convenience macro to take care of layout and swizzling */
+# define R300_EASY_TX_FORMAT(B, G, R, A, FMT) ( \
+ ((R300_TX_FORMAT_##B)<<R300_TX_FORMAT_B_SHIFT) \
+ | ((R300_TX_FORMAT_##G)<<R300_TX_FORMAT_G_SHIFT) \
+ | ((R300_TX_FORMAT_##R)<<R300_TX_FORMAT_R_SHIFT) \
+ | ((R300_TX_FORMAT_##A)<<R300_TX_FORMAT_A_SHIFT) \
+ | (R300_TX_FORMAT_##FMT) \
+ )
+ /* These can be ORed with result of R300_EASY_TX_FORMAT()
+ We don't really know what they do. Take values from a
+ constant color ? */
+# define R300_TX_FORMAT_CONST_X (1<<5)
+# define R300_TX_FORMAT_CONST_Y (2<<5)
+# define R300_TX_FORMAT_CONST_Z (4<<5)
+# define R300_TX_FORMAT_CONST_W (8<<5)
+
+# define R300_TX_FORMAT_YUV_MODE 0x00800000
+
+#define R300_TX_FORMAT2_0 0x4500 /* obvious missing in gap */
+# define R300_TX_PITCHMASK_SHIFT 0
+# define R300_TX_PITCHMASK_MASK (2047 << 0)
+# define R500_TXFORMAT_MSB (1 << 14)
+# define R500_TXWIDTH_BIT11 (1 << 15)
+# define R500_TXHEIGHT_BIT11 (1 << 16)
+# define R500_POW2FIX2FLT (1 << 17)
+# define R500_SEL_FILTER4_TC0 (0 << 18)
+# define R500_SEL_FILTER4_TC1 (1 << 18)
+# define R500_SEL_FILTER4_TC2 (2 << 18)
+# define R500_SEL_FILTER4_TC3 (3 << 18)
+
+#define R300_TX_OFFSET_0 0x4540
+#define R300_TX_OFFSET_1 0x4544
+#define R300_TX_OFFSET_2 0x4548
+#define R300_TX_OFFSET_3 0x454C
+#define R300_TX_OFFSET_4 0x4550
+#define R300_TX_OFFSET_5 0x4554
+#define R300_TX_OFFSET_6 0x4558
+#define R300_TX_OFFSET_7 0x455C
+ /* BEGIN: Guess from R200 */
+# define R300_TXO_ENDIAN_NO_SWAP (0 << 0)
+# define R300_TXO_ENDIAN_BYTE_SWAP (1 << 0)
+# define R300_TXO_ENDIAN_WORD_SWAP (2 << 0)
+# define R300_TXO_ENDIAN_HALFDW_SWAP (3 << 0)
+# define R300_TXO_MACRO_TILE (1 << 2)
+# define R300_TXO_MICRO_TILE_LINEAR (0 << 3)
+# define R300_TXO_MICRO_TILE (1 << 3)
+# define R300_TXO_MICRO_TILE_SQUARE (2 << 3)
+# define R300_TXO_OFFSET_MASK 0xffffffe0
+# define R300_TXO_OFFSET_SHIFT 5
+ /* END: Guess from R200 */
+
+/* 32 bit chroma key */
+#define R300_TX_CHROMA_KEY_0 0x4580
+#define R300_TX_CHROMA_KEY_1 0x4584
+#define R300_TX_CHROMA_KEY_2 0x4588
+#define R300_TX_CHROMA_KEY_3 0x458c
+#define R300_TX_CHROMA_KEY_4 0x4590
+#define R300_TX_CHROMA_KEY_5 0x4594
+#define R300_TX_CHROMA_KEY_6 0x4598
+#define R300_TX_CHROMA_KEY_7 0x459c
+#define R300_TX_CHROMA_KEY_8 0x45a0
+#define R300_TX_CHROMA_KEY_9 0x45a4
+#define R300_TX_CHROMA_KEY_10 0x45a8
+#define R300_TX_CHROMA_KEY_11 0x45ac
+#define R300_TX_CHROMA_KEY_12 0x45b0
+#define R300_TX_CHROMA_KEY_13 0x45b4
+#define R300_TX_CHROMA_KEY_14 0x45b8
+#define R300_TX_CHROMA_KEY_15 0x45bc
+/* ff00ff00 == { 0, 1.0, 0, 1.0 } */
+
+/* Border Color */
+#define R300_TX_BORDER_COLOR_0 0x45c0
+#define R300_TX_BORDER_COLOR_1 0x45c4
+#define R300_TX_BORDER_COLOR_2 0x45c8
+#define R300_TX_BORDER_COLOR_3 0x45cc
+#define R300_TX_BORDER_COLOR_4 0x45d0
+#define R300_TX_BORDER_COLOR_5 0x45d4
+#define R300_TX_BORDER_COLOR_6 0x45d8
+#define R300_TX_BORDER_COLOR_7 0x45dc
+#define R300_TX_BORDER_COLOR_8 0x45e0
+#define R300_TX_BORDER_COLOR_9 0x45e4
+#define R300_TX_BORDER_COLOR_10 0x45e8
+#define R300_TX_BORDER_COLOR_11 0x45ec
+#define R300_TX_BORDER_COLOR_12 0x45f0
+#define R300_TX_BORDER_COLOR_13 0x45f4
+#define R300_TX_BORDER_COLOR_14 0x45f8
+#define R300_TX_BORDER_COLOR_15 0x45fc
+
+
+/* END: Texture specification */
+
+/* BEGIN: Fragment program instruction set */
+
+/* Fragment programs are written directly into register space.
+ * There are separate instruction streams for texture instructions and ALU
+ * instructions.
+ * In order to synchronize these streams, the program is divided into up
+ * to 4 nodes. Each node begins with a number of TEX operations, followed
+ * by a number of ALU operations.
+ * The first node can have zero TEX ops, all subsequent nodes must have at
+ * least
+ * one TEX ops.
+ * All nodes must have at least one ALU op.
+ *
+ * The index of the last node is stored in PFS_CNTL_0: A value of 0 means
+ * 1 node, a value of 3 means 4 nodes.
+ * The total amount of instructions is defined in PFS_CNTL_2. The offsets are
+ * offsets into the respective instruction streams, while *_END points to the
+ * last instruction relative to this offset.
+ */
+#define R300_US_CONFIG 0x4600
+# define R300_PFS_CNTL_LAST_NODES_SHIFT 0
+# define R300_PFS_CNTL_LAST_NODES_MASK (3 << 0)
+# define R300_PFS_CNTL_FIRST_NODE_HAS_TEX (1 << 3)
+#define R300_US_PIXSIZE 0x4604
+/* There is an unshifted value here which has so far always been equal to the
+ * index of the highest used temporary register.
+ */
+#define R300_US_CODE_OFFSET 0x4608
+# define R300_PFS_CNTL_ALU_OFFSET_SHIFT 0
+# define R300_PFS_CNTL_ALU_OFFSET_MASK (63 << 0)
+# define R300_PFS_CNTL_ALU_END_SHIFT 6
+# define R300_PFS_CNTL_ALU_END_MASK (63 << 6)
+# define R300_PFS_CNTL_TEX_OFFSET_SHIFT 13
+# define R300_PFS_CNTL_TEX_OFFSET_MASK (31 << 13)
+# define R300_PFS_CNTL_TEX_END_SHIFT 18
+# define R300_PFS_CNTL_TEX_END_MASK (31 << 18)
+
+/* gap */
+
+/* Nodes are stored backwards. The last active node is always stored in
+ * PFS_NODE_3.
+ * Example: In a 2-node program, NODE_0 and NODE_1 are set to 0. The
+ * first node is stored in NODE_2, the second node is stored in NODE_3.
+ *
+ * Offsets are relative to the master offset from PFS_CNTL_2.
+ */
+#define R300_US_CODE_ADDR_0 0x4610
+#define R300_US_CODE_ADDR_1 0x4614
+#define R300_US_CODE_ADDR_2 0x4618
+#define R300_US_CODE_ADDR_3 0x461C
+# define R300_ALU_START_SHIFT 0
+# define R300_ALU_START_MASK (63 << 0)
+# define R300_ALU_SIZE_SHIFT 6
+# define R300_ALU_SIZE_MASK (63 << 6)
+# define R300_TEX_START_SHIFT 12
+# define R300_TEX_START_MASK (31 << 12)
+# define R300_TEX_SIZE_SHIFT 17
+# define R300_TEX_SIZE_MASK (31 << 17)
+# define R300_RGBA_OUT (1 << 22)
+# define R300_W_OUT (1 << 23)
+
+/* TEX
+ * As far as I can tell, texture instructions cannot write into output
+ * registers directly. A subsequent ALU instruction is always necessary,
+ * even if it's just MAD o0, r0, 1, 0
+ */
+#define R300_US_TEX_INST_0 0x4620
+# define R300_SRC_ADDR_SHIFT 0
+# define R300_SRC_ADDR_MASK (31 << 0)
+# define R300_DST_ADDR_SHIFT 6
+# define R300_DST_ADDR_MASK (31 << 6)
+# define R300_TEX_ID_SHIFT 11
+# define R300_TEX_ID_MASK (15 << 11)
+# define R300_TEX_INST_SHIFT 15
+# define R300_TEX_OP_NOP 0
+# define R300_TEX_OP_LD 1
+# define R300_TEX_OP_KIL 2
+# define R300_TEX_OP_TXP 3
+# define R300_TEX_OP_TXB 4
+# define R300_TEX_INST_MASK (7 << 15)
+
+/* Output format from the unfied shader */
+#define R300_US_OUT_FMT 0x46A4
+# define R300_US_OUT_FMT_C4_8 (0 << 0)
+# define R300_US_OUT_FMT_C4_10 (1 << 0)
+# define R300_US_OUT_FMT_C4_10_GAMMA (2 << 0)
+# define R300_US_OUT_FMT_C_16 (3 << 0)
+# define R300_US_OUT_FMT_C2_16 (4 << 0)
+# define R300_US_OUT_FMT_C4_16 (5 << 0)
+# define R300_US_OUT_FMT_C_16_MPEG (6 << 0)
+# define R300_US_OUT_FMT_C2_16_MPEG (7 << 0)
+# define R300_US_OUT_FMT_C2_4 (8 << 0)
+# define R300_US_OUT_FMT_C_3_3_2 (9 << 0)
+# define R300_US_OUT_FMT_C_6_5_6 (10 << 0)
+# define R300_US_OUT_FMT_C_11_11_10 (11 << 0)
+# define R300_US_OUT_FMT_C_10_11_11 (12 << 0)
+# define R300_US_OUT_FMT_C_2_10_10_10 (13 << 0)
+/* reserved */
+# define R300_US_OUT_FMT_UNUSED (15 << 0)
+# define R300_US_OUT_FMT_C_16_FP (16 << 0)
+# define R300_US_OUT_FMT_C2_16_FP (17 << 0)
+# define R300_US_OUT_FMT_C4_16_FP (18 << 0)
+# define R300_US_OUT_FMT_C_32_FP (19 << 0)
+# define R300_US_OUT_FMT_C2_32_FP (20 << 0)
+# define R300_US_OUT_FMT_C4_32_FP (20 << 0)
+
+/* ALU
+ * The ALU instructions register blocks are enumerated according to the order
+ * in which fglrx. I assume there is space for 64 instructions, since
+ * each block has space for a maximum of 64 DWORDs, and this matches reported
+ * native limits.
+ *
+ * The basic functional block seems to be one MAD for each color and alpha,
+ * and an adder that adds all components after the MUL.
+ * - ADD, MUL, MAD etc.: use MAD with appropriate neutral operands
+ * - DP4: Use OUTC_DP4, OUTA_DP4
+ * - DP3: Use OUTC_DP3, OUTA_DP4, appropriate alpha operands
+ * - DPH: Use OUTC_DP4, OUTA_DP4, appropriate alpha operands
+ * - CMPH: If ARG2 > 0.5, return ARG0, else return ARG1
+ * - CMP: If ARG2 < 0, return ARG1, else return ARG0
+ * - FLR: use FRC+MAD
+ * - XPD: use MAD+MAD
+ * - SGE, SLT: use MAD+CMP
+ * - RSQ: use ABS modifier for argument
+ * - Use OUTC_REPL_ALPHA to write results of an alpha-only operation
+ * (e.g. RCP) into color register
+ * - apparently, there's no quick DST operation
+ * - fglrx set FPI2_UNKNOWN_31 on a "MAD fragment.color, tmp0, tmp1, tmp2"
+ * - fglrx set FPI2_UNKNOWN_31 on a "MAX r2, r1, c0"
+ * - fglrx once set FPI0_UNKNOWN_31 on a "FRC r1, r1"
+ *
+ * Operand selection
+ * First stage selects three sources from the available registers and
+ * constant parameters. This is defined in INSTR1 (color) and INSTR3 (alpha).
+ * fglrx sorts the three source fields: Registers before constants,
+ * lower indices before higher indices; I do not know whether this is
+ * necessary.
+ *
+ * fglrx fills unused sources with "read constant 0"
+ * According to specs, you cannot select more than two different constants.
+ *
+ * Second stage selects the operands from the sources. This is defined in
+ * INSTR0 (color) and INSTR2 (alpha). You can also select the special constants
+ * zero and one.
+ * Swizzling and negation happens in this stage, as well.
+ *
+ * Important: Color and alpha seem to be mostly separate, i.e. their sources
+ * selection appears to be fully independent (the register storage is probably
+ * physically split into a color and an alpha section).
+ * However (because of the apparent physical split), there is some interaction
+ * WRT swizzling. If, for example, you want to load an R component into an
+ * Alpha operand, this R component is taken from a *color* source, not from
+ * an alpha source. The corresponding register doesn't even have to appear in
+ * the alpha sources list. (I hope this all makes sense to you)
+ *
+ * Destination selection
+ * The destination register index is in FPI1 (color) and FPI3 (alpha)
+ * together with enable bits.
+ * There are separate enable bits for writing into temporary registers
+ * (DSTC_REG_* /DSTA_REG) and program output registers (DSTC_OUTPUT_*
+ * /DSTA_OUTPUT). You can write to both at once, or not write at all (the
+ * same index must be used for both).
+ *
+ * Note: There is a special form for LRP
+ * - Argument order is the same as in ARB_fragment_program.
+ * - Operation is MAD
+ * - ARG1 is set to ARGC_SRC1C_LRP/ARGC_SRC1A_LRP
+ * - Set FPI0/FPI2_SPECIAL_LRP
+ * Arbitrary LRP (including support for swizzling) requires vanilla MAD+MAD
+ */
+#define R300_US_ALU_RGB_ADDR_0 0x46C0
+# define R300_ALU_SRC0C_SHIFT 0
+# define R300_ALU_SRC0C_MASK (31 << 0)
+# define R300_ALU_SRC0C_CONST (1 << 5)
+# define R300_ALU_SRC1C_SHIFT 6
+# define R300_ALU_SRC1C_MASK (31 << 6)
+# define R300_ALU_SRC1C_CONST (1 << 11)
+# define R300_ALU_SRC2C_SHIFT 12
+# define R300_ALU_SRC2C_MASK (31 << 12)
+# define R300_ALU_SRC2C_CONST (1 << 17)
+# define R300_ALU_SRC_MASK 0x0003ffff
+# define R300_ALU_DSTC_SHIFT 18
+# define R300_ALU_DSTC_MASK (31 << 18)
+# define R300_ALU_DSTC_REG_MASK_SHIFT 23
+# define R300_ALU_DSTC_REG_X (1 << 23)
+# define R300_ALU_DSTC_REG_Y (1 << 24)
+# define R300_ALU_DSTC_REG_Z (1 << 25)
+# define R300_ALU_DSTC_OUTPUT_MASK_SHIFT 26
+# define R300_ALU_DSTC_OUTPUT_X (1 << 26)
+# define R300_ALU_DSTC_OUTPUT_Y (1 << 27)
+# define R300_ALU_DSTC_OUTPUT_Z (1 << 28)
+# define R300_RGB_TARGET(x) ((x) << 29)
+
+#define R300_US_ALU_ALPHA_ADDR_0 0x47C0
+# define R300_ALU_SRC0A_SHIFT 0
+# define R300_ALU_SRC0A_MASK (31 << 0)
+# define R300_ALU_SRC0A_CONST (1 << 5)
+# define R300_ALU_SRC1A_SHIFT 6
+# define R300_ALU_SRC1A_MASK (31 << 6)
+# define R300_ALU_SRC1A_CONST (1 << 11)
+# define R300_ALU_SRC2A_SHIFT 12
+# define R300_ALU_SRC2A_MASK (31 << 12)
+# define R300_ALU_SRC2A_CONST (1 << 17)
+# define R300_ALU_SRC_MASK 0x0003ffff
+# define R300_ALU_DSTA_SHIFT 18
+# define R300_ALU_DSTA_MASK (31 << 18)
+# define R300_ALU_DSTA_REG (1 << 23)
+# define R300_ALU_DSTA_OUTPUT (1 << 24)
+# define R300_ALU_DSTA_DEPTH (1 << 27)
+# define R300_ALPHA_TARGET(x) ((x) << 25)
+
+#define R300_US_ALU_RGB_INST_0 0x48C0
+# define R300_ALU_ARGC_SRC0C_XYZ 0
+# define R300_ALU_ARGC_SRC0C_XXX 1
+# define R300_ALU_ARGC_SRC0C_YYY 2
+# define R300_ALU_ARGC_SRC0C_ZZZ 3
+# define R300_ALU_ARGC_SRC1C_XYZ 4
+# define R300_ALU_ARGC_SRC1C_XXX 5
+# define R300_ALU_ARGC_SRC1C_YYY 6
+# define R300_ALU_ARGC_SRC1C_ZZZ 7
+# define R300_ALU_ARGC_SRC2C_XYZ 8
+# define R300_ALU_ARGC_SRC2C_XXX 9
+# define R300_ALU_ARGC_SRC2C_YYY 10
+# define R300_ALU_ARGC_SRC2C_ZZZ 11
+# define R300_ALU_ARGC_SRC0A 12
+# define R300_ALU_ARGC_SRC1A 13
+# define R300_ALU_ARGC_SRC2A 14
+# define R300_ALU_ARGC_SRCP_XYZ 15
+# define R300_ALU_ARGC_SRCP_XXX 16
+# define R300_ALU_ARGC_SRCP_YYY 17
+# define R300_ALU_ARGC_SRCP_ZZZ 18
+# define R300_ALU_ARGC_SRCP_WWW 19
+# define R300_ALU_ARGC_ZERO 20
+# define R300_ALU_ARGC_ONE 21
+# define R300_ALU_ARGC_HALF 22
+# define R300_ALU_ARGC_SRC0C_YZX 23
+# define R300_ALU_ARGC_SRC1C_YZX 24
+# define R300_ALU_ARGC_SRC2C_YZX 25
+# define R300_ALU_ARGC_SRC0C_ZXY 26
+# define R300_ALU_ARGC_SRC1C_ZXY 27
+# define R300_ALU_ARGC_SRC2C_ZXY 28
+# define R300_ALU_ARGC_SRC0CA_WZY 29
+# define R300_ALU_ARGC_SRC1CA_WZY 30
+# define R300_ALU_ARGC_SRC2CA_WZY 31
+
+# define R300_ALU_ARG0C_SHIFT 0
+# define R300_ALU_ARG0C_MASK (31 << 0)
+# define R300_ALU_ARG0C_NOP (0 << 5)
+# define R300_ALU_ARG0C_NEG (1 << 5)
+# define R300_ALU_ARG0C_ABS (2 << 5)
+# define R300_ALU_ARG0C_NAB (3 << 5)
+# define R300_ALU_ARG1C_SHIFT 7
+# define R300_ALU_ARG1C_MASK (31 << 7)
+# define R300_ALU_ARG1C_NOP (0 << 12)
+# define R300_ALU_ARG1C_NEG (1 << 12)
+# define R300_ALU_ARG1C_ABS (2 << 12)
+# define R300_ALU_ARG1C_NAB (3 << 12)
+# define R300_ALU_ARG2C_SHIFT 14
+# define R300_ALU_ARG2C_MASK (31 << 14)
+# define R300_ALU_ARG2C_NOP (0 << 19)
+# define R300_ALU_ARG2C_NEG (1 << 19)
+# define R300_ALU_ARG2C_ABS (2 << 19)
+# define R300_ALU_ARG2C_NAB (3 << 19)
+# define R300_ALU_SRCP_1_MINUS_2_SRC0 (0 << 21)
+# define R300_ALU_SRCP_SRC1_MINUS_SRC0 (1 << 21)
+# define R300_ALU_SRCP_SRC1_PLUS_SRC0 (2 << 21)
+# define R300_ALU_SRCP_1_MINUS_SRC0 (3 << 21)
+
+# define R300_ALU_OUTC_MAD (0 << 23)
+# define R300_ALU_OUTC_DP3 (1 << 23)
+# define R300_ALU_OUTC_DP4 (2 << 23)
+# define R300_ALU_OUTC_D2A (3 << 23)
+# define R300_ALU_OUTC_MIN (4 << 23)
+# define R300_ALU_OUTC_MAX (5 << 23)
+# define R300_ALU_OUTC_CMPH (7 << 23)
+# define R300_ALU_OUTC_CMP (8 << 23)
+# define R300_ALU_OUTC_FRC (9 << 23)
+# define R300_ALU_OUTC_REPL_ALPHA (10 << 23)
+
+# define R300_ALU_OUTC_MOD_NOP (0 << 27)
+# define R300_ALU_OUTC_MOD_MUL2 (1 << 27)
+# define R300_ALU_OUTC_MOD_MUL4 (2 << 27)
+# define R300_ALU_OUTC_MOD_MUL8 (3 << 27)
+# define R300_ALU_OUTC_MOD_DIV2 (4 << 27)
+# define R300_ALU_OUTC_MOD_DIV4 (5 << 27)
+# define R300_ALU_OUTC_MOD_DIV8 (6 << 27)
+
+# define R300_ALU_OUTC_CLAMP (1 << 30)
+# define R300_ALU_INSERT_NOP (1 << 31)
+
+#define R300_US_ALU_ALPHA_INST_0 0x49C0
+# define R300_ALU_ARGA_SRC0C_X 0
+# define R300_ALU_ARGA_SRC0C_Y 1
+# define R300_ALU_ARGA_SRC0C_Z 2
+# define R300_ALU_ARGA_SRC1C_X 3
+# define R300_ALU_ARGA_SRC1C_Y 4
+# define R300_ALU_ARGA_SRC1C_Z 5
+# define R300_ALU_ARGA_SRC2C_X 6
+# define R300_ALU_ARGA_SRC2C_Y 7
+# define R300_ALU_ARGA_SRC2C_Z 8
+# define R300_ALU_ARGA_SRC0A 9
+# define R300_ALU_ARGA_SRC1A 10
+# define R300_ALU_ARGA_SRC2A 11
+# define R300_ALU_ARGA_SRCP_X 12
+# define R300_ALU_ARGA_SRCP_Y 13
+# define R300_ALU_ARGA_SRCP_Z 14
+# define R300_ALU_ARGA_SRCP_W 15
+
+# define R300_ALU_ARGA_ZERO 16
+# define R300_ALU_ARGA_ONE 17
+# define R300_ALU_ARGA_HALF 18
+# define R300_ALU_ARG0A_SHIFT 0
+# define R300_ALU_ARG0A_MASK (31 << 0)
+# define R300_ALU_ARG0A_NOP (0 << 5)
+# define R300_ALU_ARG0A_NEG (1 << 5)
+# define R300_ALU_ARG0A_ABS (2 << 5)
+# define R300_ALU_ARG0A_NAB (3 << 5)
+# define R300_ALU_ARG1A_SHIFT 7
+# define R300_ALU_ARG1A_MASK (31 << 7)
+# define R300_ALU_ARG1A_NOP (0 << 12)
+# define R300_ALU_ARG1A_NEG (1 << 12)
+# define R300_ALU_ARG1A_ABS (2 << 12)
+# define R300_ALU_ARG1A_NAB (3 << 12)
+# define R300_ALU_ARG2A_SHIFT 14
+# define R300_ALU_ARG2A_MASK (31 << 14)
+# define R300_ALU_ARG2A_NOP (0 << 19)
+# define R300_ALU_ARG2A_NEG (1 << 19)
+# define R300_ALU_ARG2A_ABS (2 << 19)
+# define R300_ALU_ARG2A_NAB (3 << 19)
+# define R300_ALU_SRCP_1_MINUS_2_SRC0 (0 << 21)
+# define R300_ALU_SRCP_SRC1_MINUS_SRC0 (1 << 21)
+# define R300_ALU_SRCP_SRC1_PLUS_SRC0 (2 << 21)
+# define R300_ALU_SRCP_1_MINUS_SRC0 (3 << 21)
+
+# define R300_ALU_OUTA_MAD (0 << 23)
+# define R300_ALU_OUTA_DP4 (1 << 23)
+# define R300_ALU_OUTA_MIN (2 << 23)
+# define R300_ALU_OUTA_MAX (3 << 23)
+# define R300_ALU_OUTA_CND (5 << 23)
+# define R300_ALU_OUTA_CMP (6 << 23)
+# define R300_ALU_OUTA_FRC (7 << 23)
+# define R300_ALU_OUTA_EX2 (8 << 23)
+# define R300_ALU_OUTA_LG2 (9 << 23)
+# define R300_ALU_OUTA_RCP (10 << 23)
+# define R300_ALU_OUTA_RSQ (11 << 23)
+
+# define R300_ALU_OUTA_MOD_NOP (0 << 27)
+# define R300_ALU_OUTA_MOD_MUL2 (1 << 27)
+# define R300_ALU_OUTA_MOD_MUL4 (2 << 27)
+# define R300_ALU_OUTA_MOD_MUL8 (3 << 27)
+# define R300_ALU_OUTA_MOD_DIV2 (4 << 27)
+# define R300_ALU_OUTA_MOD_DIV4 (5 << 27)
+# define R300_ALU_OUTA_MOD_DIV8 (6 << 27)
+
+# define R300_ALU_OUTA_CLAMP (1 << 30)
+/* END: Fragment program instruction set */
+
+/* Fog: Fog Blending Enable */
+#define R300_FG_FOG_BLEND 0x4bc0
+# define R300_FG_FOG_BLEND_DISABLE (0 << 0)
+# define R300_FG_FOG_BLEND_ENABLE (1 << 0)
+# define R300_FG_FOG_BLEND_FN_LINEAR (0 << 1)
+# define R300_FG_FOG_BLEND_FN_EXP (1 << 1)
+# define R300_FG_FOG_BLEND_FN_EXP2 (2 << 1)
+# define R300_FG_FOG_BLEND_FN_CONSTANT (3 << 1)
+# define R300_FG_FOG_BLEND_FN_MASK (3 << 1)
+
+/* Fog: Red Component of Fog Color */
+#define R300_FG_FOG_COLOR_R 0x4bc8
+/* Fog: Green Component of Fog Color */
+#define R300_FG_FOG_COLOR_G 0x4bcc
+/* Fog: Blue Component of Fog Color */
+#define R300_FG_FOG_COLOR_B 0x4bd0
+# define R300_FG_FOG_COLOR_MASK 0x000003ff
+
+/* Fog: Constant Factor for Fog Blending */
+#define R300_FG_FOG_FACTOR 0x4bc4
+# define FG_FOG_FACTOR_MASK 0x000003ff
+
+/* Fog: Alpha function */
+#define R300_FG_ALPHA_FUNC 0x4bd4
+# define R300_FG_ALPHA_FUNC_VAL_MASK 0x000000ff
+# define R300_FG_ALPHA_FUNC_NEVER (0 << 8)
+# define R300_FG_ALPHA_FUNC_LESS (1 << 8)
+# define R300_FG_ALPHA_FUNC_EQUAL (2 << 8)
+# define R300_FG_ALPHA_FUNC_LE (3 << 8)
+# define R300_FG_ALPHA_FUNC_GREATER (4 << 8)
+# define R300_FG_ALPHA_FUNC_NOTEQUAL (5 << 8)
+# define R300_FG_ALPHA_FUNC_GE (6 << 8)
+# define R300_FG_ALPHA_FUNC_ALWAYS (7 << 8)
+# define R300_ALPHA_TEST_OP_MASK (7 << 8)
+# define R300_FG_ALPHA_FUNC_DISABLE (0 << 11)
+# define R300_FG_ALPHA_FUNC_ENABLE (1 << 11)
+
+# define R500_FG_ALPHA_FUNC_10BIT (0 << 12)
+# define R500_FG_ALPHA_FUNC_8BIT (1 << 12)
+
+# define R300_FG_ALPHA_FUNC_MASK_DISABLE (0 << 16)
+# define R300_FG_ALPHA_FUNC_MASK_ENABLE (1 << 16)
+# define R300_FG_ALPHA_FUNC_CFG_2_OF_4 (0 << 17)
+# define R300_FG_ALPHA_FUNC_CFG_3_OF_6 (1 << 17)
+
+# define R300_FG_ALPHA_FUNC_DITH_DISABLE (0 << 20)
+# define R300_FG_ALPHA_FUNC_DITH_ENABLE (1 << 20)
+
+# define R500_FG_ALPHA_FUNC_OFFSET_DISABLE (0 << 24)
+# define R500_FG_ALPHA_FUNC_OFFSET_ENABLE (1 << 24) /* Not supported in R520 */
+# define R500_FG_ALPHA_FUNC_DISC_ZERO_MASK_DISABLE (0 << 25)
+# define R500_FG_ALPHA_FUNC_DISC_ZERO_MASK_ENABLE (1 << 25)
+
+# define R500_FG_ALPHA_FUNC_FP16_DISABLE (0 << 28)
+# define R500_FG_ALPHA_FUNC_FP16_ENABLE (1 << 28)
+
+
+/* Fog: Where does the depth come from? */
+#define R300_FG_DEPTH_SRC 0x4bd8
+# define R300_FG_DEPTH_SRC_SCAN (0 << 0)
+# define R300_FG_DEPTH_SRC_SHADER (1 << 0)
+
+/* Fog: Alpha Compare Value */
+#define R500_FG_ALPHA_VALUE 0x4be0
+# define R500_FG_ALPHA_VALUE_MASK 0x0000ffff
+
+#define RV530_FG_ZBREG_DEST 0x4be8
+# define RV530_FG_ZBREG_DEST_PIPE_SELECT_0 (1 << 0)
+# define RV530_FG_ZBREG_DEST_PIPE_SELECT_1 (1 << 1)
+# define RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL (3 << 0)
+
+/* gap */
+
+/* Fragment program parameters in 7.16 floating point */
+#define R300_PFS_PARAM_0_X 0x4C00
+#define R300_PFS_PARAM_0_Y 0x4C04
+#define R300_PFS_PARAM_0_Z 0x4C08
+#define R300_PFS_PARAM_0_W 0x4C0C
+/* last consts */
+#define R300_PFS_PARAM_31_X 0x4DF0
+#define R300_PFS_PARAM_31_Y 0x4DF4
+#define R300_PFS_PARAM_31_Z 0x4DF8
+#define R300_PFS_PARAM_31_W 0x4DFC
+
+/* Unpipelined. */
+#define R300_RB3D_CCTL 0x4e00
+# define R300_RB3D_CCTL_NUM_MULTIWRITES_1_BUFFER (0 << 5)
+# define R300_RB3D_CCTL_NUM_MULTIWRITES_2_BUFFERS (1 << 5)
+# define R300_RB3D_CCTL_NUM_MULTIWRITES_3_BUFFERS (2 << 5)
+# define R300_RB3D_CCTL_NUM_MULTIWRITES_4_BUFFERS (3 << 5)
+# define R300_RB3D_CCTL_CLRCMP_FLIPE_DISABLE (0 << 7)
+# define R300_RB3D_CCTL_CLRCMP_FLIPE_ENABLE (1 << 7)
+# define R300_RB3D_CCTL_AA_COMPRESSION_DISABLE (0 << 9)
+# define R300_RB3D_CCTL_AA_COMPRESSION_ENABLE (1 << 9)
+# define R300_RB3D_CCTL_CMASK_DISABLE (0 << 10)
+# define R300_RB3D_CCTL_CMASK_ENABLE (1 << 10)
+/* reserved */
+# define R300_RB3D_CCTL_INDEPENDENT_COLOR_CHANNEL_MASK_DISABLE (0 << 12)
+# define R300_RB3D_CCTL_INDEPENDENT_COLOR_CHANNEL_MASK_ENABLE (1 << 12)
+# define R300_RB3D_CCTL_WRITE_COMPRESSION_ENABLE (0 << 13)
+# define R300_RB3D_CCTL_WRITE_COMPRESSION_DISABLE (1 << 13)
+# define R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_DISABLE (0 << 14)
+# define R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_ENABLE (1 << 14)
+
+
+/* Notes:
+ * - AFAIK fglrx always sets BLEND_UNKNOWN when blending is used in
+ * the application
+ * - AFAIK fglrx always sets BLEND_NO_SEPARATE when CBLEND and ABLEND
+ * are set to the same
+ * function (both registers are always set up completely in any case)
+ * - Most blend flags are simply copied from R200 and not tested yet
+ */
+#define R300_RB3D_CBLEND 0x4E04
+#define R300_RB3D_ABLEND 0x4E08
+/* the following only appear in CBLEND */
+# define R300_ALPHA_BLEND_ENABLE (1 << 0)
+# define R300_SEPARATE_ALPHA_ENABLE (1 << 1)
+# define R300_READ_ENABLE (1 << 2)
+# define R300_DISCARD_SRC_PIXELS_DIS (0 << 3)
+# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0 (1 << 3)
+# define R300_DISCARD_SRC_PIXELS_SRC_COLOR_0 (2 << 3)
+# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_0 (3 << 3)
+# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1 (4 << 3)
+# define R300_DISCARD_SRC_PIXELS_SRC_COLOR_1 (5 << 3)
+# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_1 (6 << 3)
+
+/* the following are shared between CBLEND and ABLEND */
+# define R300_FCN_MASK (3 << 12)
+# define R300_COMB_FCN_ADD_CLAMP (0 << 12)
+# define R300_COMB_FCN_ADD_NOCLAMP (1 << 12)
+# define R300_COMB_FCN_SUB_CLAMP (2 << 12)
+# define R300_COMB_FCN_SUB_NOCLAMP (3 << 12)
+# define R300_COMB_FCN_MIN (4 << 12)
+# define R300_COMB_FCN_MAX (5 << 12)
+# define R300_COMB_FCN_RSUB_CLAMP (6 << 12)
+# define R300_COMB_FCN_RSUB_NOCLAMP (7 << 12)
+# define R300_BLEND_GL_ZERO (32)
+# define R300_BLEND_GL_ONE (33)
+# define R300_BLEND_GL_SRC_COLOR (34)
+# define R300_BLEND_GL_ONE_MINUS_SRC_COLOR (35)
+# define R300_BLEND_GL_DST_COLOR (36)
+# define R300_BLEND_GL_ONE_MINUS_DST_COLOR (37)
+# define R300_BLEND_GL_SRC_ALPHA (38)
+# define R300_BLEND_GL_ONE_MINUS_SRC_ALPHA (39)
+# define R300_BLEND_GL_DST_ALPHA (40)
+# define R300_BLEND_GL_ONE_MINUS_DST_ALPHA (41)
+# define R300_BLEND_GL_SRC_ALPHA_SATURATE (42)
+# define R300_BLEND_GL_CONST_COLOR (43)
+# define R300_BLEND_GL_ONE_MINUS_CONST_COLOR (44)
+# define R300_BLEND_GL_CONST_ALPHA (45)
+# define R300_BLEND_GL_ONE_MINUS_CONST_ALPHA (46)
+# define R300_BLEND_MASK (63)
+# define R300_SRC_BLEND_SHIFT (16)
+# define R300_DST_BLEND_SHIFT (24)
+
+/* Constant color used by the blender. Pipelined through the blender.
+ * Note: For R520, this field is ignored, use RB3D_CONSTANT_COLOR_GB__BLUE,
+ * RB3D_CONSTANT_COLOR_GB__GREEN, etc. instead.
+ */
+#define R300_RB3D_BLEND_COLOR 0x4E10
+
+
+/* 3D Color Channel Mask. If all the channels used in the current color format
+ * are disabled, then the cb will discard all the incoming quads. Pipelined
+ * through the blender.
+ */
+#define RB3D_COLOR_CHANNEL_MASK 0x4E0C
+# define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 (1 << 0)
+# define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 (1 << 1)
+# define RB3D_COLOR_CHANNEL_MASK_RED_MASK0 (1 << 2)
+# define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 (1 << 3)
+# define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK1 (1 << 4)
+# define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK1 (1 << 5)
+# define RB3D_COLOR_CHANNEL_MASK_RED_MASK1 (1 << 6)
+# define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK1 (1 << 7)
+# define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK2 (1 << 8)
+# define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK2 (1 << 9)
+# define RB3D_COLOR_CHANNEL_MASK_RED_MASK2 (1 << 10)
+# define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK2 (1 << 11)
+# define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK3 (1 << 12)
+# define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK3 (1 << 13)
+# define RB3D_COLOR_CHANNEL_MASK_RED_MASK3 (1 << 14)
+# define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK3 (1 << 15)
+
+/* Clear color that is used when the color mask is set to 00. Unpipelined.
+ * Program this register with a 32-bit value in ARGB8888 or ARGB2101010
+ * formats, ignoring the fields.
+ */
+#define RB3D_COLOR_CLEAR_VALUE 0x4e14
+
+/* gap */
+
+/* Color Compare Color. Stalls the 2d/3d datapath until it is idle. */
+#define RB3D_CLRCMP_CLR 0x4e20
+
+/* Color Compare Mask. Stalls the 2d/3d datapath until it is idle. */
+#define RB3D_CLRCMP_MSK 0x4e24
+
+/* Color Buffer Address Offset of multibuffer 0. Unpipelined. */
+#define R300_RB3D_COLOROFFSET0 0x4E28
+# define R300_COLOROFFSET_MASK 0xFFFFFFE0
+/* Color Buffer Address Offset of multibuffer 1. Unpipelined. */
+#define R300_RB3D_COLOROFFSET1 0x4E2C
+/* Color Buffer Address Offset of multibuffer 2. Unpipelined. */
+#define R300_RB3D_COLOROFFSET2 0x4E30
+/* Color Buffer Address Offset of multibuffer 3. Unpipelined. */
+#define R300_RB3D_COLOROFFSET3 0x4E34
+
+/* Color buffer format and tiling control for all the multibuffers and the
+ * pitch of multibuffer 0 to 3. Unpipelined. The cache must be empty before any
+ * of the registers are changed.
+ *
+ * Bit 16: Larger tiles
+ * Bit 17: 4x2 tiles
+ * Bit 18: Extremely weird tile like, but some pixels duplicated?
+ */
+#define R300_RB3D_COLORPITCH0 0x4E38
+# define R300_COLORPITCH_MASK 0x00003FFE
+# define R300_COLOR_TILE_DISABLE (0 << 16)
+# define R300_COLOR_TILE_ENABLE (1 << 16)
+# define R300_COLOR_MICROTILE_DISABLE (0 << 17)
+# define R300_COLOR_MICROTILE_ENABLE (1 << 17)
+# define R300_COLOR_MICROTILE_ENABLE_SQUARE (2 << 17) /* Only available in 16-bit */
+# define R300_COLOR_ENDIAN_NO_SWAP (0 << 19)
+# define R300_COLOR_ENDIAN_WORD_SWAP (1 << 19)
+# define R300_COLOR_ENDIAN_DWORD_SWAP (2 << 19)
+# define R300_COLOR_ENDIAN_HALF_DWORD_SWAP (3 << 19)
+# define R500_COLOR_FORMAT_ARGB10101010 (0 << 21)
+# define R500_COLOR_FORMAT_UV1010 (1 << 21)
+# define R500_COLOR_FORMAT_CI8 (2 << 21) /* 2D only */
+# define R300_COLOR_FORMAT_ARGB1555 (3 << 21)
+# define R300_COLOR_FORMAT_RGB565 (4 << 21)
+# define R500_COLOR_FORMAT_ARGB2101010 (5 << 21)
+# define R300_COLOR_FORMAT_ARGB8888 (6 << 21)
+# define R300_COLOR_FORMAT_ARGB32323232 (7 << 21)
+/* reserved */
+# define R300_COLOR_FORMAT_I8 (9 << 21)
+# define R300_COLOR_FORMAT_ARGB16161616 (10 << 21)
+# define R300_COLOR_FORMAT_VYUY (11 << 21)
+# define R300_COLOR_FORMAT_YVYU (12 << 21)
+# define R300_COLOR_FORMAT_UV88 (13 << 21)
+# define R500_COLOR_FORMAT_I10 (14 << 21)
+# define R300_COLOR_FORMAT_ARGB4444 (15 << 21)
+#define R300_RB3D_COLORPITCH1 0x4E3C
+#define R300_RB3D_COLORPITCH2 0x4E40
+#define R300_RB3D_COLORPITCH3 0x4E44
+
+/* gap */
+
+/* Destination Color Buffer Cache Control/Status. If the cb is in e2 mode, then
+ * a flush or free will not occur upon a write to this register, but a sync
+ * will be immediately sent if one is requested. If both DC_FLUSH and DC_FREE
+ * are zero but DC_FINISH is one, then a sync will be sent immediately -- the
+ * cb will not wait for all the previous operations to complete before sending
+ * the sync. Unpipelined except when DC_FINISH and DC_FREE are both set to
+ * zero.
+ *
+ * Set to 0A before 3D operations, set to 02 afterwards.
+ */
+#define R300_RB3D_DSTCACHE_CTLSTAT 0x4e4c
+# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_NO_EFFECT (0 << 0)
+# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_NO_EFFECT_1 (1 << 0)
+# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D (2 << 0)
+# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D_1 (3 << 0)
+# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_NO_EFFECT (0 << 2)
+# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_NO_EFFECT_1 (1 << 2)
+# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS (2 << 2)
+# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS_1 (3 << 2)
+# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_NO_SIGNAL (0 << 4)
+# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_SIGNAL (1 << 4)
+
+#define R300_RB3D_DITHER_CTL 0x4E50
+# define R300_RB3D_DITHER_CTL_DITHER_MODE_TRUNCATE (0 << 0)
+# define R300_RB3D_DITHER_CTL_DITHER_MODE_ROUND (1 << 0)
+# define R300_RB3D_DITHER_CTL_DITHER_MODE_LUT (2 << 0)
+/* reserved */
+# define R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_TRUNCATE (0 << 2)
+# define R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_ROUND (1 << 2)
+# define R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_LUT (2 << 2)
+/* reserved */
+
+/* Resolve buffer destination address. The cache must be empty before changing
+ * this register if the cb is in resolve mode. Unpipelined
+ */
+#define R300_RB3D_AARESOLVE_OFFSET 0x4e80
+# define R300_RB3D_AARESOLVE_OFFSET_SHIFT 5
+# define R300_RB3D_AARESOLVE_OFFSET_MASK 0xffffffe0 /* At least according to the calculations of Christoph Brill */
+
+/* Resolve Buffer Pitch and Tiling Control. The cache must be empty before
+ * changing this register if the cb is in resolve mode. Unpipelined
+ */
+#define R300_RB3D_AARESOLVE_PITCH 0x4e84
+# define R300_RB3D_AARESOLVE_PITCH_SHIFT 1
+# define R300_RB3D_AARESOLVE_PITCH_MASK 0x00003ffe /* At least according to the calculations of Christoph Brill */
+
+/* Resolve Buffer Control. Unpipelined */
+#define R300_RB3D_AARESOLVE_CTL 0x4e88
+# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_NORMAL (0 << 0)
+# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE (1 << 0)
+# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_GAMMA_10 (0 << 1)
+# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_GAMMA_22 (1 << 1)
+# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_SAMPLE0 (0 << 2)
+# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE (1 << 2)
+
+
+/* Discard src pixels less than or equal to threshold. */
+#define R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD 0x4ea0
+/* Discard src pixels greater than or equal to threshold. */
+#define R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD 0x4ea4
+# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_BLUE_SHIFT 0
+# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_BLUE_MASK 0x000000ff
+# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_GREEN_SHIFT 8
+# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_GREEN_MASK 0x0000ff00
+# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_RED_SHIFT 16
+# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_RED_MASK 0x00ff0000
+# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_ALPHA_SHIFT 24
+# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_ALPHA_MASK 0xff000000
+
+/* 3D ROP Control. Stalls the 2d/3d datapath until it is idle. */
+#define R300_RB3D_ROPCNTL 0x4e18
+# define R300_RB3D_ROPCNTL_ROP_ENABLE 0x00000004
+# define R300_RB3D_ROPCNTL_ROP_MASK (15 << 8)
+# define R300_RB3D_ROPCNTL_ROP_SHIFT 8
+
+/* Color Compare Flip. Stalls the 2d/3d datapath until it is idle. */
+#define R300_RB3D_CLRCMP_FLIPE 0x4e1c
+
+/* Sets the fifo sizes */
+#define R500_RB3D_FIFO_SIZE 0x4ef4
+# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_FULL (0 << 0)
+# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_HALF (1 << 0)
+# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_QUATER (2 << 0)
+# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_EIGTHS (3 << 0)
+
+/* Constant color used by the blender. Pipelined through the blender. */
+#define R500_RB3D_CONSTANT_COLOR_AR 0x4ef8
+# define R500_RB3D_CONSTANT_COLOR_AR_RED_MASK 0x0000ffff
+# define R500_RB3D_CONSTANT_COLOR_AR_RED_SHIFT 0
+# define R500_RB3D_CONSTANT_COLOR_AR_ALPHA_MASK 0xffff0000
+# define R500_RB3D_CONSTANT_COLOR_AR_ALPHA_SHIFT 16
+
+/* Constant color used by the blender. Pipelined through the blender. */
+#define R500_RB3D_CONSTANT_COLOR_GB 0x4efc
+# define R500_RB3D_CONSTANT_COLOR_AR_BLUE_MASK 0x0000ffff
+# define R500_RB3D_CONSTANT_COLOR_AR_BLUE_SHIFT 0
+# define R500_RB3D_CONSTANT_COLOR_AR_GREEN_MASK 0xffff0000
+# define R500_RB3D_CONSTANT_COLOR_AR_GREEN_SHIFT 16
+
+/* gap */
+/* There seems to be no "write only" setting, so use Z-test = ALWAYS
+ * for this.
+ * Bit (1<<8) is the "test" bit. so plain write is 6 - vd
+ */
+#define R300_ZB_CNTL 0x4F00
+# define R300_STENCIL_ENABLE (1 << 0)
+# define R300_Z_ENABLE (1 << 1)
+# define R300_Z_WRITE_ENABLE (1 << 2)
+# define R300_Z_SIGNED_COMPARE (1 << 3)
+# define R300_STENCIL_FRONT_BACK (1 << 4)
+# define R400_ZSIGNED_MAGNITUDE (1 << 5)
+# define R500_STENCIL_REFMASK_FRONT_BACK (1 << 6)
+
+#define R300_ZB_ZSTENCILCNTL 0x4f04
+ /* functions */
+# define R300_ZS_NEVER 0
+# define R300_ZS_LESS 1
+# define R300_ZS_LEQUAL 2
+# define R300_ZS_EQUAL 3
+# define R300_ZS_GEQUAL 4
+# define R300_ZS_GREATER 5
+# define R300_ZS_NOTEQUAL 6
+# define R300_ZS_ALWAYS 7
+# define R300_ZS_MASK 7
+ /* operations */
+# define R300_ZS_KEEP 0
+# define R300_ZS_ZERO 1
+# define R300_ZS_REPLACE 2
+# define R300_ZS_INCR 3
+# define R300_ZS_DECR 4
+# define R300_ZS_INVERT 5
+# define R300_ZS_INCR_WRAP 6
+# define R300_ZS_DECR_WRAP 7
+# define R300_Z_FUNC_SHIFT 0
+ /* front and back refer to operations done for front
+ and back faces, i.e. separate stencil function support */
+# define R300_S_FRONT_FUNC_SHIFT 3
+# define R300_S_FRONT_SFAIL_OP_SHIFT 6
+# define R300_S_FRONT_ZPASS_OP_SHIFT 9
+# define R300_S_FRONT_ZFAIL_OP_SHIFT 12
+# define R300_S_BACK_FUNC_SHIFT 15
+# define R300_S_BACK_SFAIL_OP_SHIFT 18
+# define R300_S_BACK_ZPASS_OP_SHIFT 21
+# define R300_S_BACK_ZFAIL_OP_SHIFT 24
+
+#define R300_ZB_STENCILREFMASK 0x4f08
+# define R300_STENCILREF_SHIFT 0
+# define R300_STENCILREF_MASK 0x000000ff
+# define R300_STENCILMASK_SHIFT 8
+# define R300_STENCILMASK_MASK 0x0000ff00
+# define R300_STENCILWRITEMASK_SHIFT 16
+# define R300_STENCILWRITEMASK_MASK 0x00ff0000
+
+/* gap */
+
+#define R300_ZB_FORMAT 0x4f10
+# define R300_DEPTHFORMAT_16BIT_INT_Z (0 << 0)
+# define R300_DEPTHFORMAT_16BIT_13E3 (1 << 0)
+# define R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL (2 << 0)
+/* reserved up to (15 << 0) */
+# define R300_INVERT_13E3_LEADING_ONES (0 << 4)
+# define R300_INVERT_13E3_LEADING_ZEROS (1 << 4)
+
+#define R300_ZB_ZTOP 0x4F14
+# define R300_ZTOP_DISABLE (0 << 0)
+# define R300_ZTOP_ENABLE (1 << 0)
+
+/* gap */
+
+#define R300_ZB_ZCACHE_CTLSTAT 0x4f18
+# define R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_NO_EFFECT (0 << 0)
+# define R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE (1 << 0)
+# define R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_NO_EFFECT (0 << 1)
+# define R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE (1 << 1)
+# define R300_ZB_ZCACHE_CTLSTAT_ZC_BUSY_IDLE (0 << 31)
+# define R300_ZB_ZCACHE_CTLSTAT_ZC_BUSY_BUSY (1 << 31)
+
+#define R300_ZB_BW_CNTL 0x4f1c
+# define R300_HIZ_DISABLE (0 << 0)
+# define R300_HIZ_ENABLE (1 << 0)
+# define R300_HIZ_MIN (0 << 1)
+# define R300_HIZ_MAX (1 << 1)
+# define R300_FAST_FILL_DISABLE (0 << 2)
+# define R300_FAST_FILL_ENABLE (1 << 2)
+# define R300_RD_COMP_DISABLE (0 << 3)
+# define R300_RD_COMP_ENABLE (1 << 3)
+# define R300_WR_COMP_DISABLE (0 << 4)
+# define R300_WR_COMP_ENABLE (1 << 4)
+# define R300_ZB_CB_CLEAR_RMW (0 << 5)
+# define R300_ZB_CB_CLEAR_CACHE_LINEAR (1 << 5)
+# define R300_FORCE_COMPRESSED_STENCIL_VALUE_DISABLE (0 << 6)
+# define R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE (1 << 6)
+
+# define R500_ZEQUAL_OPTIMIZE_ENABLE (0 << 7)
+# define R500_ZEQUAL_OPTIMIZE_DISABLE (1 << 7)
+# define R500_SEQUAL_OPTIMIZE_ENABLE (0 << 8)
+# define R500_SEQUAL_OPTIMIZE_DISABLE (1 << 8)
+
+# define R500_BMASK_ENABLE (0 << 10)
+# define R500_BMASK_DISABLE (1 << 10)
+# define R500_HIZ_EQUAL_REJECT_DISABLE (0 << 11)
+# define R500_HIZ_EQUAL_REJECT_ENABLE (1 << 11)
+# define R500_HIZ_FP_EXP_BITS_DISABLE (0 << 12)
+# define R500_HIZ_FP_EXP_BITS_1 (1 << 12)
+# define R500_HIZ_FP_EXP_BITS_2 (2 << 12)
+# define R500_HIZ_FP_EXP_BITS_3 (3 << 12)
+# define R500_HIZ_FP_EXP_BITS_4 (4 << 12)
+# define R500_HIZ_FP_EXP_BITS_5 (5 << 12)
+# define R500_HIZ_FP_INVERT_LEADING_ONES (0 << 15)
+# define R500_HIZ_FP_INVERT_LEADING_ZEROS (1 << 15)
+# define R500_TILE_OVERWRITE_RECOMPRESSION_ENABLE (0 << 16)
+# define R500_TILE_OVERWRITE_RECOMPRESSION_DISABLE (1 << 16)
+# define R500_CONTIGUOUS_6XAA_SAMPLES_ENABLE (0 << 17)
+# define R500_CONTIGUOUS_6XAA_SAMPLES_DISABLE (1 << 17)
+# define R500_PEQ_PACKING_DISABLE (0 << 18)
+# define R500_PEQ_PACKING_ENABLE (1 << 18)
+# define R500_COVERED_PTR_MASKING_DISABLE (0 << 18)
+# define R500_COVERED_PTR_MASKING_ENABLE (1 << 18)
+
+
+/* gap */
+
+/* Z Buffer Address Offset.
+ * Bits 31 to 5 are used for aligned Z buffer address offset for macro tiles.
+ */
+#define R300_ZB_DEPTHOFFSET 0x4f20
+
+/* Z Buffer Pitch and Endian Control */
+#define R300_ZB_DEPTHPITCH 0x4f24
+# define R300_DEPTHPITCH_MASK 0x00003FFC
+# define R300_DEPTHMACROTILE_DISABLE (0 << 16)
+# define R300_DEPTHMACROTILE_ENABLE (1 << 16)
+# define R300_DEPTHMICROTILE_LINEAR (0 << 17)
+# define R300_DEPTHMICROTILE_TILED (1 << 17)
+# define R300_DEPTHMICROTILE_TILED_SQUARE (2 << 17)
+# define R300_DEPTHENDIAN_NO_SWAP (0 << 18)
+# define R300_DEPTHENDIAN_WORD_SWAP (1 << 18)
+# define R300_DEPTHENDIAN_DWORD_SWAP (2 << 18)
+# define R300_DEPTHENDIAN_HALF_DWORD_SWAP (3 << 18)
+
+/* Z Buffer Clear Value */
+#define R300_ZB_DEPTHCLEARVALUE 0x4f28
+
+#define R300_ZB_ZMASK_OFFSET 0x4f30
+#define R300_ZB_ZMASK_PITCH 0x4f34
+#define R300_ZB_ZMASK_WRINDEX 0x4f38
+#define R300_ZB_ZMASK_DWORD 0x4f3c
+#define R300_ZB_ZMASK_RDINDEX 0x4f40
+
+/* Hierarchical Z Memory Offset */
+#define R300_ZB_HIZ_OFFSET 0x4f44
+
+/* Hierarchical Z Write Index */
+#define R300_ZB_HIZ_WRINDEX 0x4f48
+
+/* Hierarchical Z Data */
+#define R300_ZB_HIZ_DWORD 0x4f4c
+
+/* Hierarchical Z Read Index */
+#define R300_ZB_HIZ_RDINDEX 0x4f50
+
+/* Hierarchical Z Pitch */
+#define R300_ZB_HIZ_PITCH 0x4f54
+
+/* Z Buffer Z Pass Counter Data */
+#define R300_ZB_ZPASS_DATA 0x4f58
+
+/* Z Buffer Z Pass Counter Address */
+#define R300_ZB_ZPASS_ADDR 0x4f5c
+
+/* Depth buffer X and Y coordinate offset */
+#define R300_ZB_DEPTHXY_OFFSET 0x4f60
+# define R300_DEPTHX_OFFSET_SHIFT 1
+# define R300_DEPTHX_OFFSET_MASK 0x000007FE
+# define R300_DEPTHY_OFFSET_SHIFT 17
+# define R300_DEPTHY_OFFSET_MASK 0x07FE0000
+
+/* Sets the fifo sizes */
+#define R500_ZB_FIFO_SIZE 0x4fd0
+# define R500_OP_FIFO_SIZE_FULL (0 << 0)
+# define R500_OP_FIFO_SIZE_HALF (1 << 0)
+# define R500_OP_FIFO_SIZE_QUATER (2 << 0)
+# define R500_OP_FIFO_SIZE_EIGTHS (4 << 0)
+
+/* Stencil Reference Value and Mask for backfacing quads */
+/* R300_ZB_STENCILREFMASK handles front face */
+#define R500_ZB_STENCILREFMASK_BF 0x4fd4
+# define R500_STENCILREF_SHIFT 0
+# define R500_STENCILREF_MASK 0x000000ff
+# define R500_STENCILMASK_SHIFT 8
+# define R500_STENCILMASK_MASK 0x0000ff00
+# define R500_STENCILWRITEMASK_SHIFT 16
+# define R500_STENCILWRITEMASK_MASK 0x00ff0000
+
+/**
+ * \defgroup R3XX_R5XX_PROGRAMMABLE_VERTEX_SHADER_DESCRIPTION R3XX-R5XX PROGRAMMABLE VERTEX SHADER DESCRIPTION
+ *
+ * The PVS_DST_MATH_INST is used to identify whether the instruction is a Vector
+ * Engine instruction or a Math Engine instruction.
+ */
+
+/*\{*/
+
+enum {
+ /* R3XX */
+ VECTOR_NO_OP = 0,
+ VE_DOT_PRODUCT = 1,
+ VE_MULTIPLY = 2,
+ VE_ADD = 3,
+ VE_MULTIPLY_ADD = 4,
+ VE_DISTANCE_VECTOR = 5,
+ VE_FRACTION = 6,
+ VE_MAXIMUM = 7,
+ VE_MINIMUM = 8,
+ VE_SET_GREATER_THAN_EQUAL = 9,
+ VE_SET_LESS_THAN = 10,
+ VE_MULTIPLYX2_ADD = 11,
+ VE_MULTIPLY_CLAMP = 12,
+ VE_FLT2FIX_DX = 13,
+ VE_FLT2FIX_DX_RND = 14,
+ /* R5XX */
+ VE_PRED_SET_EQ_PUSH = 15,
+ VE_PRED_SET_GT_PUSH = 16,
+ VE_PRED_SET_GTE_PUSH = 17,
+ VE_PRED_SET_NEQ_PUSH = 18,
+ VE_COND_WRITE_EQ = 19,
+ VE_COND_WRITE_GT = 20,
+ VE_COND_WRITE_GTE = 21,
+ VE_COND_WRITE_NEQ = 22,
+ VE_COND_MUX_EQ = 23,
+ VE_COND_MUX_GT = 24,
+ VE_COND_MUX_GTE = 25,
+ VE_SET_GREATER_THAN = 26,
+ VE_SET_EQUAL = 27,
+ VE_SET_NOT_EQUAL = 28,
+};
+
+enum {
+ /* R3XX */
+ MATH_NO_OP = 0,
+ ME_EXP_BASE2_DX = 1,
+ ME_LOG_BASE2_DX = 2,
+ ME_EXP_BASEE_FF = 3,
+ ME_LIGHT_COEFF_DX = 4,
+ ME_POWER_FUNC_FF = 5,
+ ME_RECIP_DX = 6,
+ ME_RECIP_FF = 7,
+ ME_RECIP_SQRT_DX = 8,
+ ME_RECIP_SQRT_FF = 9,
+ ME_MULTIPLY = 10,
+ ME_EXP_BASE2_FULL_DX = 11,
+ ME_LOG_BASE2_FULL_DX = 12,
+ ME_POWER_FUNC_FF_CLAMP_B = 13,
+ ME_POWER_FUNC_FF_CLAMP_B1 = 14,
+ ME_POWER_FUNC_FF_CLAMP_01 = 15,
+ ME_SIN = 16,
+ ME_COS = 17,
+ /* R5XX */
+ ME_LOG_BASE2_IEEE = 18,
+ ME_RECIP_IEEE = 19,
+ ME_RECIP_SQRT_IEEE = 20,
+ ME_PRED_SET_EQ = 21,
+ ME_PRED_SET_GT = 22,
+ ME_PRED_SET_GTE = 23,
+ ME_PRED_SET_NEQ = 24,
+ ME_PRED_SET_CLR = 25,
+ ME_PRED_SET_INV = 26,
+ ME_PRED_SET_POP = 27,
+ ME_PRED_SET_RESTORE = 28,
+};
+
+enum {
+ /* R3XX */
+ PVS_MACRO_OP_2CLK_MADD = 0,
+ PVS_MACRO_OP_2CLK_M2X_ADD = 1,
+};
+
+enum {
+ PVS_SRC_REG_TEMPORARY = 0, /* Intermediate Storage */
+ PVS_SRC_REG_INPUT = 1, /* Input Vertex Storage */
+ PVS_SRC_REG_CONSTANT = 2, /* Constant State Storage */
+ PVS_SRC_REG_ALT_TEMPORARY = 3, /* Alternate Intermediate Storage */
+};
+
+enum {
+ PVS_DST_REG_TEMPORARY = 0, /* Intermediate Storage */
+ PVS_DST_REG_A0 = 1, /* Address Register Storage */
+ PVS_DST_REG_OUT = 2, /* Output Memory. Used for all outputs */
+ PVS_DST_REG_OUT_REPL_X = 3, /* Output Memory & Replicate X to all channels */
+ PVS_DST_REG_ALT_TEMPORARY = 4, /* Alternate Intermediate Storage */
+ PVS_DST_REG_INPUT = 5, /* Output Memory & Replicate X to all channels */
+};
+
+enum {
+ PVS_SRC_SELECT_X = 0, /* Select X Component */
+ PVS_SRC_SELECT_Y = 1, /* Select Y Component */
+ PVS_SRC_SELECT_Z = 2, /* Select Z Component */
+ PVS_SRC_SELECT_W = 3, /* Select W Component */
+ PVS_SRC_SELECT_FORCE_0 = 4, /* Force Component to 0.0 */
+ PVS_SRC_SELECT_FORCE_1 = 5, /* Force Component to 1.0 */
+};
+
+/* PVS Opcode & Destination Operand Description */
+
+enum {
+ PVS_DST_OPCODE_MASK = 0x3f,
+ PVS_DST_OPCODE_SHIFT = 0,
+ PVS_DST_MATH_INST_MASK = 0x1,
+ PVS_DST_MATH_INST_SHIFT = 6,
+ PVS_DST_MACRO_INST_MASK = 0x1,
+ PVS_DST_MACRO_INST_SHIFT = 7,
+ PVS_DST_REG_TYPE_MASK = 0xf,
+ PVS_DST_REG_TYPE_SHIFT = 8,
+ PVS_DST_ADDR_MODE_1_MASK = 0x1,
+ PVS_DST_ADDR_MODE_1_SHIFT = 12,
+ PVS_DST_OFFSET_MASK = 0x7f,
+ PVS_DST_OFFSET_SHIFT = 13,
+ PVS_DST_WE_X_MASK = 0x1,
+ PVS_DST_WE_X_SHIFT = 20,
+ PVS_DST_WE_Y_MASK = 0x1,
+ PVS_DST_WE_Y_SHIFT = 21,
+ PVS_DST_WE_Z_MASK = 0x1,
+ PVS_DST_WE_Z_SHIFT = 22,
+ PVS_DST_WE_W_MASK = 0x1,
+ PVS_DST_WE_W_SHIFT = 23,
+ PVS_DST_VE_SAT_MASK = 0x1,
+ PVS_DST_VE_SAT_SHIFT = 24,
+ PVS_DST_ME_SAT_MASK = 0x1,
+ PVS_DST_ME_SAT_SHIFT = 25,
+ PVS_DST_PRED_ENABLE_MASK = 0x1,
+ PVS_DST_PRED_ENABLE_SHIFT = 26,
+ PVS_DST_PRED_SENSE_MASK = 0x1,
+ PVS_DST_PRED_SENSE_SHIFT = 27,
+ PVS_DST_DUAL_MATH_OP_MASK = 0x3,
+ PVS_DST_DUAL_MATH_OP_SHIFT = 27,
+ PVS_DST_ADDR_SEL_MASK = 0x3,
+ PVS_DST_ADDR_SEL_SHIFT = 29,
+ PVS_DST_ADDR_MODE_0_MASK = 0x1,
+ PVS_DST_ADDR_MODE_0_SHIFT = 31,
+};
+
+/* PVS Source Operand Description */
+
+enum {
+ PVS_SRC_REG_TYPE_MASK = 0x3,
+ PVS_SRC_REG_TYPE_SHIFT = 0,
+ SPARE_0_MASK = 0x1,
+ SPARE_0_SHIFT = 2,
+ PVS_SRC_ABS_XYZW_MASK = 0x1,
+ PVS_SRC_ABS_XYZW_SHIFT = 3,
+ PVS_SRC_ADDR_MODE_0_MASK = 0x1,
+ PVS_SRC_ADDR_MODE_0_SHIFT = 4,
+ PVS_SRC_OFFSET_MASK = 0xff,
+ PVS_SRC_OFFSET_SHIFT = 5,
+ PVS_SRC_SWIZZLE_X_MASK = 0x7,
+ PVS_SRC_SWIZZLE_X_SHIFT = 13,
+ PVS_SRC_SWIZZLE_Y_MASK = 0x7,
+ PVS_SRC_SWIZZLE_Y_SHIFT = 16,
+ PVS_SRC_SWIZZLE_Z_MASK = 0x7,
+ PVS_SRC_SWIZZLE_Z_SHIFT = 19,
+ PVS_SRC_SWIZZLE_W_MASK = 0x7,
+ PVS_SRC_SWIZZLE_W_SHIFT = 22,
+ PVS_SRC_MODIFIER_X_MASK = 0x1,
+ PVS_SRC_MODIFIER_X_SHIFT = 25,
+ PVS_SRC_MODIFIER_Y_MASK = 0x1,
+ PVS_SRC_MODIFIER_Y_SHIFT = 26,
+ PVS_SRC_MODIFIER_Z_MASK = 0x1,
+ PVS_SRC_MODIFIER_Z_SHIFT = 27,
+ PVS_SRC_MODIFIER_W_MASK = 0x1,
+ PVS_SRC_MODIFIER_W_SHIFT = 28,
+ PVS_SRC_ADDR_SEL_MASK = 0x3,
+ PVS_SRC_ADDR_SEL_SHIFT = 29,
+ PVS_SRC_ADDR_MODE_1_MASK = 0x0,
+ PVS_SRC_ADDR_MODE_1_SHIFT = 32,
+};
+
+
+#define PVS_OP_DST_OPERAND(opcode, math_inst, macro_inst, reg_index, reg_writemask, reg_class) \
+ (((opcode & PVS_DST_OPCODE_MASK) << PVS_DST_OPCODE_SHIFT) \
+ | ((math_inst & PVS_DST_MATH_INST_MASK) << PVS_DST_MATH_INST_SHIFT) \
+ | ((macro_inst & PVS_DST_MACRO_INST_MASK) << PVS_DST_MACRO_INST_SHIFT) \
+ | ((reg_index & PVS_DST_OFFSET_MASK) << PVS_DST_OFFSET_SHIFT) \
+ | ((reg_writemask & 0xf) << PVS_DST_WE_X_SHIFT) /* X Y Z W */ \
+ | ((reg_class & PVS_DST_REG_TYPE_MASK) << PVS_DST_REG_TYPE_SHIFT))
+
+#define PVS_SRC_OPERAND(in_reg_index, comp_x, comp_y, comp_z, comp_w, reg_class, negate) \
+ (((in_reg_index & PVS_SRC_OFFSET_MASK) << PVS_SRC_OFFSET_SHIFT) \
+ | ((comp_x & PVS_SRC_SWIZZLE_X_MASK) << PVS_SRC_SWIZZLE_X_SHIFT) \
+ | ((comp_y & PVS_SRC_SWIZZLE_Y_MASK) << PVS_SRC_SWIZZLE_Y_SHIFT) \
+ | ((comp_z & PVS_SRC_SWIZZLE_Z_MASK) << PVS_SRC_SWIZZLE_Z_SHIFT) \
+ | ((comp_w & PVS_SRC_SWIZZLE_W_MASK) << PVS_SRC_SWIZZLE_W_SHIFT) \
+ | ((negate & 0xf) << PVS_SRC_MODIFIER_X_SHIFT) /* X Y Z W */ \
+ | ((reg_class & PVS_SRC_REG_TYPE_MASK) << PVS_SRC_REG_TYPE_SHIFT))
+
+/*\}*/
+
+/* BEGIN: Packet 3 commands */
+
+/* A primitive emission dword. */
+#define R300_PRIM_TYPE_NONE (0 << 0)
+#define R300_PRIM_TYPE_POINT (1 << 0)
+#define R300_PRIM_TYPE_LINE (2 << 0)
+#define R300_PRIM_TYPE_LINE_STRIP (3 << 0)
+#define R300_PRIM_TYPE_TRI_LIST (4 << 0)
+#define R300_PRIM_TYPE_TRI_FAN (5 << 0)
+#define R300_PRIM_TYPE_TRI_STRIP (6 << 0)
+#define R300_PRIM_TYPE_TRI_TYPE2 (7 << 0)
+#define R300_PRIM_TYPE_RECT_LIST (8 << 0)
+#define R300_PRIM_TYPE_3VRT_POINT_LIST (9 << 0)
+#define R300_PRIM_TYPE_3VRT_LINE_LIST (10 << 0)
+ /* GUESS (based on r200) */
+#define R300_PRIM_TYPE_POINT_SPRITES (11 << 0)
+#define R300_PRIM_TYPE_LINE_LOOP (12 << 0)
+#define R300_PRIM_TYPE_QUADS (13 << 0)
+#define R300_PRIM_TYPE_QUAD_STRIP (14 << 0)
+#define R300_PRIM_TYPE_POLYGON (15 << 0)
+#define R300_PRIM_TYPE_MASK 0xF
+#define R300_PRIM_WALK_IND (1 << 4)
+#define R300_PRIM_WALK_LIST (2 << 4)
+#define R300_PRIM_WALK_RING (3 << 4)
+#define R300_PRIM_WALK_MASK (3 << 4)
+ /* GUESS (based on r200) */
+#define R300_PRIM_COLOR_ORDER_BGRA (0 << 6)
+#define R300_PRIM_COLOR_ORDER_RGBA (1 << 6)
+#define R300_PRIM_NUM_VERTICES_SHIFT 16
+#define R300_PRIM_NUM_VERTICES_MASK 0xffff
+
+
+
+/*
+ * The R500 unified shader (US) registers come in banks of 512 each, one
+ * for each instruction slot in the shader. You can't touch them directly.
+ * R500_US_VECTOR_INDEX() sets the base instruction to modify; successive
+ * writes to R500_GA_US_VECTOR_DATA autoincrement the index after the
+ * instruction is fully specified.
+ */
+#define R500_US_ALU_ALPHA_INST_0 0xa800
+# define R500_ALPHA_OP_MAD 0
+# define R500_ALPHA_OP_DP 1
+# define R500_ALPHA_OP_MIN 2
+# define R500_ALPHA_OP_MAX 3
+/* #define R500_ALPHA_OP_RESERVED 4 */
+# define R500_ALPHA_OP_CND 5
+# define R500_ALPHA_OP_CMP 6
+# define R500_ALPHA_OP_FRC 7
+# define R500_ALPHA_OP_EX2 8
+# define R500_ALPHA_OP_LN2 9
+# define R500_ALPHA_OP_RCP 10
+# define R500_ALPHA_OP_RSQ 11
+# define R500_ALPHA_OP_SIN 12
+# define R500_ALPHA_OP_COS 13
+# define R500_ALPHA_OP_MDH 14
+# define R500_ALPHA_OP_MDV 15
+# define R500_ALPHA_ADDRD(x) ((x) << 4)
+# define R500_ALPHA_ADDRD_REL (1 << 11)
+# define R500_ALPHA_SEL_A_SHIFT 12
+# define R500_ALPHA_SEL_A_SRC0 (0 << 12)
+# define R500_ALPHA_SEL_A_SRC1 (1 << 12)
+# define R500_ALPHA_SEL_A_SRC2 (2 << 12)
+# define R500_ALPHA_SEL_A_SRCP (3 << 12)
+# define R500_ALPHA_SWIZ_A_R (0 << 14)
+# define R500_ALPHA_SWIZ_A_G (1 << 14)
+# define R500_ALPHA_SWIZ_A_B (2 << 14)
+# define R500_ALPHA_SWIZ_A_A (3 << 14)
+# define R500_ALPHA_SWIZ_A_0 (4 << 14)
+# define R500_ALPHA_SWIZ_A_HALF (5 << 14)
+# define R500_ALPHA_SWIZ_A_1 (6 << 14)
+/* #define R500_ALPHA_SWIZ_A_UNUSED (7 << 14) */
+# define R500_ALPHA_MOD_A_NOP (0 << 17)
+# define R500_ALPHA_MOD_A_NEG (1 << 17)
+# define R500_ALPHA_MOD_A_ABS (2 << 17)
+# define R500_ALPHA_MOD_A_NAB (3 << 17)
+# define R500_ALPHA_SEL_B_SHIFT 19
+# define R500_ALPHA_SEL_B_SRC0 (0 << 19)
+# define R500_ALPHA_SEL_B_SRC1 (1 << 19)
+# define R500_ALPHA_SEL_B_SRC2 (2 << 19)
+# define R500_ALPHA_SEL_B_SRCP (3 << 19)
+# define R500_ALPHA_SWIZ_B_R (0 << 21)
+# define R500_ALPHA_SWIZ_B_G (1 << 21)
+# define R500_ALPHA_SWIZ_B_B (2 << 21)
+# define R500_ALPHA_SWIZ_B_A (3 << 21)
+# define R500_ALPHA_SWIZ_B_0 (4 << 21)
+# define R500_ALPHA_SWIZ_B_HALF (5 << 21)
+# define R500_ALPHA_SWIZ_B_1 (6 << 21)
+/* #define R500_ALPHA_SWIZ_B_UNUSED (7 << 21) */
+# define R500_ALPHA_MOD_B_NOP (0 << 24)
+# define R500_ALPHA_MOD_B_NEG (1 << 24)
+# define R500_ALPHA_MOD_B_ABS (2 << 24)
+# define R500_ALPHA_MOD_B_NAB (3 << 24)
+# define R500_ALPHA_OMOD_IDENTITY (0 << 26)
+# define R500_ALPHA_OMOD_MUL_2 (1 << 26)
+# define R500_ALPHA_OMOD_MUL_4 (2 << 26)
+# define R500_ALPHA_OMOD_MUL_8 (3 << 26)
+# define R500_ALPHA_OMOD_DIV_2 (4 << 26)
+# define R500_ALPHA_OMOD_DIV_4 (5 << 26)
+# define R500_ALPHA_OMOD_DIV_8 (6 << 26)
+# define R500_ALPHA_OMOD_DISABLE (7 << 26)
+# define R500_ALPHA_TARGET(x) ((x) << 29)
+# define R500_ALPHA_W_OMASK (1 << 31)
+#define R500_US_ALU_ALPHA_ADDR_0 0x9800
+# define R500_ALPHA_ADDR0(x) ((x) << 0)
+# define R500_ALPHA_ADDR0_CONST (1 << 8)
+# define R500_ALPHA_ADDR0_REL (1 << 9)
+# define R500_ALPHA_ADDR1(x) ((x) << 10)
+# define R500_ALPHA_ADDR1_CONST (1 << 18)
+# define R500_ALPHA_ADDR1_REL (1 << 19)
+# define R500_ALPHA_ADDR2(x) ((x) << 20)
+# define R500_ALPHA_ADDR2_CONST (1 << 28)
+# define R500_ALPHA_ADDR2_REL (1 << 29)
+# define R500_ALPHA_SRCP_OP_1_MINUS_2A0 (0 << 30)
+# define R500_ALPHA_SRCP_OP_A1_MINUS_A0 (1 << 30)
+# define R500_ALPHA_SRCP_OP_A1_PLUS_A0 (2 << 30)
+# define R500_ALPHA_SRCP_OP_1_MINUS_A0 (3 << 30)
+#define R500_US_ALU_RGBA_INST_0 0xb000
+# define R500_ALU_RGBA_OP_MAD (0 << 0)
+# define R500_ALU_RGBA_OP_DP3 (1 << 0)
+# define R500_ALU_RGBA_OP_DP4 (2 << 0)
+# define R500_ALU_RGBA_OP_D2A (3 << 0)
+# define R500_ALU_RGBA_OP_MIN (4 << 0)
+# define R500_ALU_RGBA_OP_MAX (5 << 0)
+/* #define R500_ALU_RGBA_OP_RESERVED (6 << 0) */
+# define R500_ALU_RGBA_OP_CND (7 << 0)
+# define R500_ALU_RGBA_OP_CMP (8 << 0)
+# define R500_ALU_RGBA_OP_FRC (9 << 0)
+# define R500_ALU_RGBA_OP_SOP (10 << 0)
+# define R500_ALU_RGBA_OP_MDH (11 << 0)
+# define R500_ALU_RGBA_OP_MDV (12 << 0)
+# define R500_ALU_RGBA_ADDRD(x) ((x) << 4)
+# define R500_ALU_RGBA_ADDRD_REL (1 << 11)
+# define R500_ALU_RGBA_SEL_C_SHIFT 12
+# define R500_ALU_RGBA_SEL_C_SRC0 (0 << 12)
+# define R500_ALU_RGBA_SEL_C_SRC1 (1 << 12)
+# define R500_ALU_RGBA_SEL_C_SRC2 (2 << 12)
+# define R500_ALU_RGBA_SEL_C_SRCP (3 << 12)
+# define R500_ALU_RGBA_R_SWIZ_R (0 << 14)
+# define R500_ALU_RGBA_R_SWIZ_G (1 << 14)
+# define R500_ALU_RGBA_R_SWIZ_B (2 << 14)
+# define R500_ALU_RGBA_R_SWIZ_A (3 << 14)
+# define R500_ALU_RGBA_R_SWIZ_0 (4 << 14)
+# define R500_ALU_RGBA_R_SWIZ_HALF (5 << 14)
+# define R500_ALU_RGBA_R_SWIZ_1 (6 << 14)
+/* #define R500_ALU_RGBA_R_SWIZ_UNUSED (7 << 14) */
+# define R500_ALU_RGBA_G_SWIZ_R (0 << 17)
+# define R500_ALU_RGBA_G_SWIZ_G (1 << 17)
+# define R500_ALU_RGBA_G_SWIZ_B (2 << 17)
+# define R500_ALU_RGBA_G_SWIZ_A (3 << 17)
+# define R500_ALU_RGBA_G_SWIZ_0 (4 << 17)
+# define R500_ALU_RGBA_G_SWIZ_HALF (5 << 17)
+# define R500_ALU_RGBA_G_SWIZ_1 (6 << 17)
+/* #define R500_ALU_RGBA_G_SWIZ_UNUSED (7 << 17) */
+# define R500_ALU_RGBA_B_SWIZ_R (0 << 20)
+# define R500_ALU_RGBA_B_SWIZ_G (1 << 20)
+# define R500_ALU_RGBA_B_SWIZ_B (2 << 20)
+# define R500_ALU_RGBA_B_SWIZ_A (3 << 20)
+# define R500_ALU_RGBA_B_SWIZ_0 (4 << 20)
+# define R500_ALU_RGBA_B_SWIZ_HALF (5 << 20)
+# define R500_ALU_RGBA_B_SWIZ_1 (6 << 20)
+/* #define R500_ALU_RGBA_B_SWIZ_UNUSED (7 << 20) */
+# define R500_ALU_RGBA_MOD_C_NOP (0 << 23)
+# define R500_ALU_RGBA_MOD_C_NEG (1 << 23)
+# define R500_ALU_RGBA_MOD_C_ABS (2 << 23)
+# define R500_ALU_RGBA_MOD_C_NAB (3 << 23)
+# define R500_ALU_RGBA_ALPHA_SEL_C_SHIFT 25
+# define R500_ALU_RGBA_ALPHA_SEL_C_SRC0 (0 << 25)
+# define R500_ALU_RGBA_ALPHA_SEL_C_SRC1 (1 << 25)
+# define R500_ALU_RGBA_ALPHA_SEL_C_SRC2 (2 << 25)
+# define R500_ALU_RGBA_ALPHA_SEL_C_SRCP (3 << 25)
+# define R500_ALU_RGBA_A_SWIZ_R (0 << 27)
+# define R500_ALU_RGBA_A_SWIZ_G (1 << 27)
+# define R500_ALU_RGBA_A_SWIZ_B (2 << 27)
+# define R500_ALU_RGBA_A_SWIZ_A (3 << 27)
+# define R500_ALU_RGBA_A_SWIZ_0 (4 << 27)
+# define R500_ALU_RGBA_A_SWIZ_HALF (5 << 27)
+# define R500_ALU_RGBA_A_SWIZ_1 (6 << 27)
+/* #define R500_ALU_RGBA_A_SWIZ_UNUSED (7 << 27) */
+# define R500_ALU_RGBA_ALPHA_MOD_C_NOP (0 << 30)
+# define R500_ALU_RGBA_ALPHA_MOD_C_NEG (1 << 30)
+# define R500_ALU_RGBA_ALPHA_MOD_C_ABS (2 << 30)
+# define R500_ALU_RGBA_ALPHA_MOD_C_NAB (3 << 30)
+#define R500_US_ALU_RGB_INST_0 0xa000
+# define R500_ALU_RGB_SEL_A_SHIFT 0
+# define R500_ALU_RGB_SEL_A_SRC0 (0 << 0)
+# define R500_ALU_RGB_SEL_A_SRC1 (1 << 0)
+# define R500_ALU_RGB_SEL_A_SRC2 (2 << 0)
+# define R500_ALU_RGB_SEL_A_SRCP (3 << 0)
+# define R500_ALU_RGB_R_SWIZ_A_R (0 << 2)
+# define R500_ALU_RGB_R_SWIZ_A_G (1 << 2)
+# define R500_ALU_RGB_R_SWIZ_A_B (2 << 2)
+# define R500_ALU_RGB_R_SWIZ_A_A (3 << 2)
+# define R500_ALU_RGB_R_SWIZ_A_0 (4 << 2)
+# define R500_ALU_RGB_R_SWIZ_A_HALF (5 << 2)
+# define R500_ALU_RGB_R_SWIZ_A_1 (6 << 2)
+/* #define R500_ALU_RGB_R_SWIZ_A_UNUSED (7 << 2) */
+# define R500_ALU_RGB_G_SWIZ_A_R (0 << 5)
+# define R500_ALU_RGB_G_SWIZ_A_G (1 << 5)
+# define R500_ALU_RGB_G_SWIZ_A_B (2 << 5)
+# define R500_ALU_RGB_G_SWIZ_A_A (3 << 5)
+# define R500_ALU_RGB_G_SWIZ_A_0 (4 << 5)
+# define R500_ALU_RGB_G_SWIZ_A_HALF (5 << 5)
+# define R500_ALU_RGB_G_SWIZ_A_1 (6 << 5)
+/* #define R500_ALU_RGB_G_SWIZ_A_UNUSED (7 << 5) */
+# define R500_ALU_RGB_B_SWIZ_A_R (0 << 8)
+# define R500_ALU_RGB_B_SWIZ_A_G (1 << 8)
+# define R500_ALU_RGB_B_SWIZ_A_B (2 << 8)
+# define R500_ALU_RGB_B_SWIZ_A_A (3 << 8)
+# define R500_ALU_RGB_B_SWIZ_A_0 (4 << 8)
+# define R500_ALU_RGB_B_SWIZ_A_HALF (5 << 8)
+# define R500_ALU_RGB_B_SWIZ_A_1 (6 << 8)
+/* #define R500_ALU_RGB_B_SWIZ_A_UNUSED (7 << 8) */
+# define R500_ALU_RGB_MOD_A_NOP (0 << 11)
+# define R500_ALU_RGB_MOD_A_NEG (1 << 11)
+# define R500_ALU_RGB_MOD_A_ABS (2 << 11)
+# define R500_ALU_RGB_MOD_A_NAB (3 << 11)
+# define R500_ALU_RGB_SEL_B_SHIFT 13
+# define R500_ALU_RGB_SEL_B_SRC0 (0 << 13)
+# define R500_ALU_RGB_SEL_B_SRC1 (1 << 13)
+# define R500_ALU_RGB_SEL_B_SRC2 (2 << 13)
+# define R500_ALU_RGB_SEL_B_SRCP (3 << 13)
+# define R500_ALU_RGB_R_SWIZ_B_R (0 << 15)
+# define R500_ALU_RGB_R_SWIZ_B_G (1 << 15)
+# define R500_ALU_RGB_R_SWIZ_B_B (2 << 15)
+# define R500_ALU_RGB_R_SWIZ_B_A (3 << 15)
+# define R500_ALU_RGB_R_SWIZ_B_0 (4 << 15)
+# define R500_ALU_RGB_R_SWIZ_B_HALF (5 << 15)
+# define R500_ALU_RGB_R_SWIZ_B_1 (6 << 15)
+/* #define R500_ALU_RGB_R_SWIZ_B_UNUSED (7 << 15) */
+# define R500_ALU_RGB_G_SWIZ_B_R (0 << 18)
+# define R500_ALU_RGB_G_SWIZ_B_G (1 << 18)
+# define R500_ALU_RGB_G_SWIZ_B_B (2 << 18)
+# define R500_ALU_RGB_G_SWIZ_B_A (3 << 18)
+# define R500_ALU_RGB_G_SWIZ_B_0 (4 << 18)
+# define R500_ALU_RGB_G_SWIZ_B_HALF (5 << 18)
+# define R500_ALU_RGB_G_SWIZ_B_1 (6 << 18)
+/* #define R500_ALU_RGB_G_SWIZ_B_UNUSED (7 << 18) */
+# define R500_ALU_RGB_B_SWIZ_B_R (0 << 21)
+# define R500_ALU_RGB_B_SWIZ_B_G (1 << 21)
+# define R500_ALU_RGB_B_SWIZ_B_B (2 << 21)
+# define R500_ALU_RGB_B_SWIZ_B_A (3 << 21)
+# define R500_ALU_RGB_B_SWIZ_B_0 (4 << 21)
+# define R500_ALU_RGB_B_SWIZ_B_HALF (5 << 21)
+# define R500_ALU_RGB_B_SWIZ_B_1 (6 << 21)
+/* #define R500_ALU_RGB_B_SWIZ_B_UNUSED (7 << 21) */
+# define R500_ALU_RGB_MOD_B_NOP (0 << 24)
+# define R500_ALU_RGB_MOD_B_NEG (1 << 24)
+# define R500_ALU_RGB_MOD_B_ABS (2 << 24)
+# define R500_ALU_RGB_MOD_B_NAB (3 << 24)
+# define R500_ALU_RGB_OMOD_IDENTITY (0 << 26)
+# define R500_ALU_RGB_OMOD_MUL_2 (1 << 26)
+# define R500_ALU_RGB_OMOD_MUL_4 (2 << 26)
+# define R500_ALU_RGB_OMOD_MUL_8 (3 << 26)
+# define R500_ALU_RGB_OMOD_DIV_2 (4 << 26)
+# define R500_ALU_RGB_OMOD_DIV_4 (5 << 26)
+# define R500_ALU_RGB_OMOD_DIV_8 (6 << 26)
+# define R500_ALU_RGB_OMOD_DISABLE (7 << 26)
+# define R500_ALU_RGB_TARGET(x) ((x) << 29)
+# define R500_ALU_RGB_WMASK (1 << 31)
+#define R500_US_ALU_RGB_ADDR_0 0x9000
+# define R500_RGB_ADDR0(x) ((x) << 0)
+# define R500_RGB_ADDR0_CONST (1 << 8)
+# define R500_RGB_ADDR0_REL (1 << 9)
+# define R500_RGB_ADDR1(x) ((x) << 10)
+# define R500_RGB_ADDR1_CONST (1 << 18)
+# define R500_RGB_ADDR1_REL (1 << 19)
+# define R500_RGB_ADDR2(x) ((x) << 20)
+# define R500_RGB_ADDR2_CONST (1 << 28)
+# define R500_RGB_ADDR2_REL (1 << 29)
+# define R500_RGB_SRCP_OP_1_MINUS_2RGB0 (0 << 30)
+# define R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 (1 << 30)
+# define R500_RGB_SRCP_OP_RGB1_PLUS_RGB0 (2 << 30)
+# define R500_RGB_SRCP_OP_1_MINUS_RGB0 (3 << 30)
+#define R500_US_CMN_INST_0 0xb800
+# define R500_INST_TYPE_MASK (3 << 0)
+# define R500_INST_TYPE_ALU (0 << 0)
+# define R500_INST_TYPE_OUT (1 << 0)
+# define R500_INST_TYPE_FC (2 << 0)
+# define R500_INST_TYPE_TEX (3 << 0)
+# define R500_INST_TEX_SEM_WAIT (1 << 2)
+# define R500_INST_RGB_PRED_SEL_NONE (0 << 3)
+# define R500_INST_RGB_PRED_SEL_RGBA (1 << 3)
+# define R500_INST_RGB_PRED_SEL_RRRR (2 << 3)
+# define R500_INST_RGB_PRED_SEL_GGGG (3 << 3)
+# define R500_INST_RGB_PRED_SEL_BBBB (4 << 3)
+# define R500_INST_RGB_PRED_SEL_AAAA (5 << 3)
+# define R500_INST_RGB_PRED_INV (1 << 6)
+# define R500_INST_WRITE_INACTIVE (1 << 7)
+# define R500_INST_LAST (1 << 8)
+# define R500_INST_NOP (1 << 9)
+# define R500_INST_ALU_WAIT (1 << 10)
+# define R500_INST_RGB_WMASK_R (1 << 11)
+# define R500_INST_RGB_WMASK_G (1 << 12)
+# define R500_INST_RGB_WMASK_B (1 << 13)
+# define R500_INST_ALPHA_WMASK (1 << 14)
+# define R500_INST_RGB_OMASK_R (1 << 15)
+# define R500_INST_RGB_OMASK_G (1 << 16)
+# define R500_INST_RGB_OMASK_B (1 << 17)
+# define R500_INST_ALPHA_OMASK (1 << 18)
+# define R500_INST_RGB_CLAMP (1 << 19)
+# define R500_INST_ALPHA_CLAMP (1 << 20)
+# define R500_INST_ALU_RESULT_SEL (1 << 21)
+# define R500_INST_ALU_RESULT_SEL_RED (0 << 21)
+# define R500_INST_ALU_RESULT_SEL_ALPHA (1 << 21)
+# define R500_INST_ALPHA_PRED_INV (1 << 22)
+# define R500_INST_ALU_RESULT_OP_EQ (0 << 23)
+# define R500_INST_ALU_RESULT_OP_LT (1 << 23)
+# define R500_INST_ALU_RESULT_OP_GE (2 << 23)
+# define R500_INST_ALU_RESULT_OP_NE (3 << 23)
+# define R500_INST_ALPHA_PRED_SEL_NONE (0 << 25)
+# define R500_INST_ALPHA_PRED_SEL_RGBA (1 << 25)
+# define R500_INST_ALPHA_PRED_SEL_RRRR (2 << 25)
+# define R500_INST_ALPHA_PRED_SEL_GGGG (3 << 25)
+# define R500_INST_ALPHA_PRED_SEL_BBBB (4 << 25)
+# define R500_INST_ALPHA_PRED_SEL_AAAA (5 << 25)
+/* XXX next four are kind of guessed */
+# define R500_INST_STAT_WE_R (1 << 28)
+# define R500_INST_STAT_WE_G (1 << 29)
+# define R500_INST_STAT_WE_B (1 << 30)
+# define R500_INST_STAT_WE_A (1 << 31)
+
+/* note that these are 8 bit lengths, despite the offsets, at least for R500 */
+#define R500_US_CODE_ADDR 0x4630
+# define R500_US_CODE_START_ADDR(x) ((x) << 0)
+# define R500_US_CODE_END_ADDR(x) ((x) << 16)
+#define R500_US_CODE_OFFSET 0x4638
+# define R500_US_CODE_OFFSET_ADDR(x) ((x) << 0)
+#define R500_US_CODE_RANGE 0x4634
+# define R500_US_CODE_RANGE_ADDR(x) ((x) << 0)
+# define R500_US_CODE_RANGE_SIZE(x) ((x) << 16)
+#define R500_US_CONFIG 0x4600
+# define R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO (1 << 1)
+#define R500_US_FC_ADDR_0 0xa000
+# define R500_FC_BOOL_ADDR(x) ((x) << 0)
+# define R500_FC_INT_ADDR(x) ((x) << 8)
+# define R500_FC_JUMP_ADDR(x) ((x) << 16)
+# define R500_FC_JUMP_GLOBAL (1 << 31)
+#define R500_US_FC_BOOL_CONST 0x4620
+# define R500_FC_KBOOL(x) (x)
+#define R500_US_FC_CTRL 0x4624
+# define R500_FC_TEST_EN (1 << 30)
+# define R500_FC_FULL_FC_EN (1 << 31)
+#define R500_US_FC_INST_0 0x9800
+# define R500_FC_OP_JUMP (0 << 0)
+# define R500_FC_OP_LOOP (1 << 0)
+# define R500_FC_OP_ENDLOOP (2 << 0)
+# define R500_FC_OP_REP (3 << 0)
+# define R500_FC_OP_ENDREP (4 << 0)
+# define R500_FC_OP_BREAKLOOP (5 << 0)
+# define R500_FC_OP_BREAKREP (6 << 0)
+# define R500_FC_OP_CONTINUE (7 << 0)
+# define R500_FC_B_ELSE (1 << 4)
+# define R500_FC_JUMP_ANY (1 << 5)
+# define R500_FC_A_OP_NONE (0 << 6)
+# define R500_FC_A_OP_POP (1 << 6)
+# define R500_FC_A_OP_PUSH (2 << 6)
+# define R500_FC_JUMP_FUNC(x) ((x) << 8)
+# define R500_FC_B_POP_CNT(x) ((x) << 16)
+# define R500_FC_B_OP0_NONE (0 << 24)
+# define R500_FC_B_OP0_DECR (1 << 24)
+# define R500_FC_B_OP0_INCR (2 << 24)
+# define R500_FC_B_OP1_DECR (0 << 26)
+# define R500_FC_B_OP1_NONE (1 << 26)
+# define R500_FC_B_OP1_INCR (2 << 26)
+# define R500_FC_IGNORE_UNCOVERED (1 << 28)
+#define R500_US_FC_INT_CONST_0 0x4c00
+# define R500_FC_INT_CONST_KR(x) ((x) << 0)
+# define R500_FC_INT_CONST_KG(x) ((x) << 8)
+# define R500_FC_INT_CONST_KB(x) ((x) << 16)
+/* _0 through _15 */
+#define R500_US_FORMAT0_0 0x4640
+# define R500_FORMAT_TXWIDTH(x) ((x) << 0)
+# define R500_FORMAT_TXHEIGHT(x) ((x) << 11)
+# define R500_FORMAT_TXDEPTH(x) ((x) << 22)
+/* _0 through _3 */
+#define R500_US_OUT_FMT_0 0x46a4
+# define R500_OUT_FMT_C4_8 (0 << 0)
+# define R500_OUT_FMT_C4_10 (1 << 0)
+# define R500_OUT_FMT_C4_10_GAMMA (2 << 0)
+# define R500_OUT_FMT_C_16 (3 << 0)
+# define R500_OUT_FMT_C2_16 (4 << 0)
+# define R500_OUT_FMT_C4_16 (5 << 0)
+# define R500_OUT_FMT_C_16_MPEG (6 << 0)
+# define R500_OUT_FMT_C2_16_MPEG (7 << 0)
+# define R500_OUT_FMT_C2_4 (8 << 0)
+# define R500_OUT_FMT_C_3_3_2 (9 << 0)
+# define R500_OUT_FMT_C_6_5_6 (10 << 0)
+# define R500_OUT_FMT_C_11_11_10 (11 << 0)
+# define R500_OUT_FMT_C_10_11_11 (12 << 0)
+# define R500_OUT_FMT_C_2_10_10_10 (13 << 0)
+/* #define R500_OUT_FMT_RESERVED (14 << 0) */
+# define R500_OUT_FMT_UNUSED (15 << 0)
+# define R500_OUT_FMT_C_16_FP (16 << 0)
+# define R500_OUT_FMT_C2_16_FP (17 << 0)
+# define R500_OUT_FMT_C4_16_FP (18 << 0)
+# define R500_OUT_FMT_C_32_FP (19 << 0)
+# define R500_OUT_FMT_C2_32_FP (20 << 0)
+# define R500_OUT_FMT_C4_32_FP (21 << 0)
+# define R500_C0_SEL_A (0 << 8)
+# define R500_C0_SEL_R (1 << 8)
+# define R500_C0_SEL_G (2 << 8)
+# define R500_C0_SEL_B (3 << 8)
+# define R500_C1_SEL_A (0 << 10)
+# define R500_C1_SEL_R (1 << 10)
+# define R500_C1_SEL_G (2 << 10)
+# define R500_C1_SEL_B (3 << 10)
+# define R500_C2_SEL_A (0 << 12)
+# define R500_C2_SEL_R (1 << 12)
+# define R500_C2_SEL_G (2 << 12)
+# define R500_C2_SEL_B (3 << 12)
+# define R500_C3_SEL_A (0 << 14)
+# define R500_C3_SEL_R (1 << 14)
+# define R500_C3_SEL_G (2 << 14)
+# define R500_C3_SEL_B (3 << 14)
+# define R500_OUT_SIGN(x) ((x) << 16)
+# define R500_ROUND_ADJ (1 << 20)
+#define R500_US_PIXSIZE 0x4604
+# define R500_PIX_SIZE(x) (x)
+#define R500_US_TEX_ADDR_0 0x9800
+# define R500_TEX_SRC_ADDR(x) ((x) << 0)
+# define R500_TEX_SRC_ADDR_REL (1 << 7)
+# define R500_TEX_SRC_S_SWIZ_R (0 << 8)
+# define R500_TEX_SRC_S_SWIZ_G (1 << 8)
+# define R500_TEX_SRC_S_SWIZ_B (2 << 8)
+# define R500_TEX_SRC_S_SWIZ_A (3 << 8)
+# define R500_TEX_SRC_T_SWIZ_R (0 << 10)
+# define R500_TEX_SRC_T_SWIZ_G (1 << 10)
+# define R500_TEX_SRC_T_SWIZ_B (2 << 10)
+# define R500_TEX_SRC_T_SWIZ_A (3 << 10)
+# define R500_TEX_SRC_R_SWIZ_R (0 << 12)
+# define R500_TEX_SRC_R_SWIZ_G (1 << 12)
+# define R500_TEX_SRC_R_SWIZ_B (2 << 12)
+# define R500_TEX_SRC_R_SWIZ_A (3 << 12)
+# define R500_TEX_SRC_Q_SWIZ_R (0 << 14)
+# define R500_TEX_SRC_Q_SWIZ_G (1 << 14)
+# define R500_TEX_SRC_Q_SWIZ_B (2 << 14)
+# define R500_TEX_SRC_Q_SWIZ_A (3 << 14)
+# define R500_TEX_DST_ADDR(x) ((x) << 16)
+# define R500_TEX_DST_ADDR_REL (1 << 23)
+# define R500_TEX_DST_R_SWIZ_R (0 << 24)
+# define R500_TEX_DST_R_SWIZ_G (1 << 24)
+# define R500_TEX_DST_R_SWIZ_B (2 << 24)
+# define R500_TEX_DST_R_SWIZ_A (3 << 24)
+# define R500_TEX_DST_G_SWIZ_R (0 << 26)
+# define R500_TEX_DST_G_SWIZ_G (1 << 26)
+# define R500_TEX_DST_G_SWIZ_B (2 << 26)
+# define R500_TEX_DST_G_SWIZ_A (3 << 26)
+# define R500_TEX_DST_B_SWIZ_R (0 << 28)
+# define R500_TEX_DST_B_SWIZ_G (1 << 28)
+# define R500_TEX_DST_B_SWIZ_B (2 << 28)
+# define R500_TEX_DST_B_SWIZ_A (3 << 28)
+# define R500_TEX_DST_A_SWIZ_R (0 << 30)
+# define R500_TEX_DST_A_SWIZ_G (1 << 30)
+# define R500_TEX_DST_A_SWIZ_B (2 << 30)
+# define R500_TEX_DST_A_SWIZ_A (3 << 30)
+#define R500_US_TEX_ADDR_DXDY_0 0xa000
+# define R500_DX_ADDR(x) ((x) << 0)
+# define R500_DX_ADDR_REL (1 << 7)
+# define R500_DX_S_SWIZ_R (0 << 8)
+# define R500_DX_S_SWIZ_G (1 << 8)
+# define R500_DX_S_SWIZ_B (2 << 8)
+# define R500_DX_S_SWIZ_A (3 << 8)
+# define R500_DX_T_SWIZ_R (0 << 10)
+# define R500_DX_T_SWIZ_G (1 << 10)
+# define R500_DX_T_SWIZ_B (2 << 10)
+# define R500_DX_T_SWIZ_A (3 << 10)
+# define R500_DX_R_SWIZ_R (0 << 12)
+# define R500_DX_R_SWIZ_G (1 << 12)
+# define R500_DX_R_SWIZ_B (2 << 12)
+# define R500_DX_R_SWIZ_A (3 << 12)
+# define R500_DX_Q_SWIZ_R (0 << 14)
+# define R500_DX_Q_SWIZ_G (1 << 14)
+# define R500_DX_Q_SWIZ_B (2 << 14)
+# define R500_DX_Q_SWIZ_A (3 << 14)
+# define R500_DY_ADDR(x) ((x) << 16)
+# define R500_DY_ADDR_REL (1 << 17)
+# define R500_DY_S_SWIZ_R (0 << 24)
+# define R500_DY_S_SWIZ_G (1 << 24)
+# define R500_DY_S_SWIZ_B (2 << 24)
+# define R500_DY_S_SWIZ_A (3 << 24)
+# define R500_DY_T_SWIZ_R (0 << 26)
+# define R500_DY_T_SWIZ_G (1 << 26)
+# define R500_DY_T_SWIZ_B (2 << 26)
+# define R500_DY_T_SWIZ_A (3 << 26)
+# define R500_DY_R_SWIZ_R (0 << 28)
+# define R500_DY_R_SWIZ_G (1 << 28)
+# define R500_DY_R_SWIZ_B (2 << 28)
+# define R500_DY_R_SWIZ_A (3 << 28)
+# define R500_DY_Q_SWIZ_R (0 << 30)
+# define R500_DY_Q_SWIZ_G (1 << 30)
+# define R500_DY_Q_SWIZ_B (2 << 30)
+# define R500_DY_Q_SWIZ_A (3 << 30)
+#define R500_US_TEX_INST_0 0x9000
+# define R500_TEX_ID(x) ((x) << 16)
+# define R500_TEX_INST_NOP (0 << 22)
+# define R500_TEX_INST_LD (1 << 22)
+# define R500_TEX_INST_TEXKILL (2 << 22)
+# define R500_TEX_INST_PROJ (3 << 22)
+# define R500_TEX_INST_LODBIAS (4 << 22)
+# define R500_TEX_INST_LOD (5 << 22)
+# define R500_TEX_INST_DXDY (6 << 22)
+# define R500_TEX_SEM_ACQUIRE (1 << 25)
+# define R500_TEX_IGNORE_UNCOVERED (1 << 26)
+# define R500_TEX_UNSCALED (1 << 27)
+#define R300_US_W_FMT 0x46b4
+# define R300_W_FMT_W0 (0 << 0)
+# define R300_W_FMT_W24 (1 << 0)
+# define R300_W_FMT_W24FP (2 << 0)
+# define R300_W_SRC_US (0 << 2)
+# define R300_W_SRC_RAS (1 << 2)
+
+
+/* Packet0 field ordering to write all values to the same reg */
+#define RADEON_ONE_REG_WR (1 << 15)
+
+/* Draw a primitive from vertex data in arrays loaded via 3D_LOAD_VBPNTR.
+ * Two parameter dwords:
+ * 0. VAP_VTX_FMT: The first parameter is not written to hardware
+ * 1. VAP_VF_CTL: The second parameter is a standard primitive emission dword.
+ */
+#define R300_PACKET3_3D_DRAW_VBUF 0x00002800
+
+/* Draw a primitive from immediate vertices in this packet
+ * Up to 16382 dwords:
+ * 0. VAP_VTX_FMT: The first parameter is not written to hardware
+ * 1. VAP_VF_CTL: The second parameter is a standard primitive emission dword.
+ * 2 to end: Up to 16380 dwords of vertex data.
+ */
+#define R300_PACKET3_3D_DRAW_IMMD 0x00002900
+
+/* Draw a primitive from vertex data in arrays loaded via 3D_LOAD_VBPNTR and
+ * immediate vertices in this packet
+ * Up to 16382 dwords:
+ * 0. VAP_VTX_FMT: The first parameter is not written to hardware
+ * 1. VAP_VF_CTL: The second parameter is a standard primitive emission dword.
+ * 2 to end: Up to 16380 dwords of vertex data.
+ */
+#define R300_PACKET3_3D_DRAW_INDX 0x00002A00
+
+
+/* Specify the full set of vertex arrays as (address, stride).
+ * The first parameter is the number of vertex arrays specified.
+ * The rest of the command is a variable length list of blocks, where
+ * each block is three dwords long and specifies two arrays.
+ * The first dword of a block is split into two words, the lower significant
+ * word refers to the first array, the more significant word to the second
+ * array in the block.
+ * The low byte of each word contains the size of an array entry in dwords,
+ * the high byte contains the stride of the array.
+ * The second dword of a block contains the pointer to the first array,
+ * the third dword of a block contains the pointer to the second array.
+ * Note that if the total number of arrays is odd, the third dword of
+ * the last block is omitted.
+ */
+#define R300_PACKET3_3D_LOAD_VBPNTR 0x00002F00
+
+#define R300_PACKET3_INDX_BUFFER 0x00003300
+# define R300_INDX_BUFFER_DST_SHIFT 0
+# define R300_INDX_BUFFER_SKIP_SHIFT 16
+# define R300_INDX_BUFFER_ONE_REG_WR (1<<31)
+
+/* Same as R300_PACKET3_3D_DRAW_VBUF but without VAP_VTX_FMT */
+#define R300_PACKET3_3D_DRAW_VBUF_2 0x00003400
+/* Same as R300_PACKET3_3D_DRAW_IMMD but without VAP_VTX_FMT */
+#define R300_PACKET3_3D_DRAW_IMMD_2 0x00003500
+/* Same as R300_PACKET3_3D_DRAW_INDX but without VAP_VTX_FMT */
+#define R300_PACKET3_3D_DRAW_INDX_2 0x00003600
+
+/* Clears a portion of hierachical Z RAM
+ * 3 dword parameters
+ * 0. START
+ * 1. COUNT: 13:0 (max is 0x3FFF)
+ * 2. CLEAR_VALUE: Value to write into HIZ RAM.
+ */
+#define R300_PACKET3_3D_CLEAR_HIZ 0x00003700
+
+/* Draws a set of primitives using vertex buffers pointed by the state data.
+ * At least 2 Parameters:
+ * 0. VAP_VF_CNTL: The first parameter is a standard primitive emission dword.
+ * 2 to end: Data or indices (see other 3D_DRAW_* packets for details)
+ */
+#define R300_PACKET3_3D_DRAW_128 0x00003900
+
+/* END: Packet 3 commands */
+
+
+/* Color formats for 2d packets
+ */
+#define R300_CP_COLOR_FORMAT_CI8 2
+#define R300_CP_COLOR_FORMAT_ARGB1555 3
+#define R300_CP_COLOR_FORMAT_RGB565 4
+#define R300_CP_COLOR_FORMAT_ARGB8888 6
+#define R300_CP_COLOR_FORMAT_RGB332 7
+#define R300_CP_COLOR_FORMAT_RGB8 9
+#define R300_CP_COLOR_FORMAT_ARGB4444 15
+
+/*
+ * CP type-3 packets
+ */
+#define R300_CP_CMD_BITBLT_MULTI 0xC0009B00
+
+#endif /* _R300_REG_H */
+
+/* *INDENT-ON* */
+
+/* vim: set foldenable foldmarker=\\{,\\} foldmethod=marker : */
diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c
new file mode 100644
index 0000000000..bb8f91491f
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_render.c
@@ -0,0 +1,505 @@
+/**************************************************************************
+
+Copyright (C) 2004 Nicolai Haehnle.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/**
+ * \file
+ *
+ * \brief R300 Render (Vertex Buffer Implementation)
+ *
+ * The immediate implementation has been removed from CVS in favor of the vertex
+ * buffer implementation.
+ *
+ * The render functions are called by the pipeline manager to render a batch of
+ * primitives. They return TRUE to pass on to the next stage (i.e. software
+ * rasterization) or FALSE to indicate that the pipeline has finished after
+ * rendering something.
+ *
+ * When falling back to software TCL still attempt to use hardware
+ * rasterization.
+ *
+ * I am not sure that the cache related registers are setup correctly, but
+ * obviously this does work... Further investigation is needed.
+ *
+ * \author Nicolai Haehnle <prefect_@gmx.net>
+ *
+ * \todo Add immediate implementation back? Perhaps this is useful if there are
+ * no bugs...
+ */
+
+#include "r300_render.h"
+
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/enums.h"
+#include "main/macros.h"
+#include "main/context.h"
+#include "main/dd.h"
+#include "main/simple_list.h"
+#include "main/api_arrayelt.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "vbo/vbo.h"
+#include "vbo/vbo_split.h"
+#include "r300_context.h"
+#include "r300_state.h"
+#include "r300_reg.h"
+#include "r300_emit.h"
+#include "r300_swtcl.h"
+
+/**
+ * \brief Convert a OpenGL primitive type into a R300 primitive type.
+ */
+int r300PrimitiveType(r300ContextPtr rmesa, int prim)
+{
+ switch (prim & PRIM_MODE_MASK) {
+ case GL_POINTS:
+ return R300_VAP_VF_CNTL__PRIM_POINTS;
+ break;
+ case GL_LINES:
+ return R300_VAP_VF_CNTL__PRIM_LINES;
+ break;
+ case GL_LINE_STRIP:
+ return R300_VAP_VF_CNTL__PRIM_LINE_STRIP;
+ break;
+ case GL_LINE_LOOP:
+ return R300_VAP_VF_CNTL__PRIM_LINE_LOOP;
+ break;
+ case GL_TRIANGLES:
+ return R300_VAP_VF_CNTL__PRIM_TRIANGLES;
+ break;
+ case GL_TRIANGLE_STRIP:
+ return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP;
+ break;
+ case GL_TRIANGLE_FAN:
+ return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN;
+ break;
+ case GL_QUADS:
+ return R300_VAP_VF_CNTL__PRIM_QUADS;
+ break;
+ case GL_QUAD_STRIP:
+ return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP;
+ break;
+ case GL_POLYGON:
+ return R300_VAP_VF_CNTL__PRIM_POLYGON;
+ break;
+ default:
+ assert(0);
+ return -1;
+ break;
+ }
+}
+
+int r300NumVerts(r300ContextPtr rmesa, int num_verts, int prim)
+{
+ int verts_off = 0;
+
+ switch (prim & PRIM_MODE_MASK) {
+ case GL_POINTS:
+ verts_off = 0;
+ break;
+ case GL_LINES:
+ verts_off = num_verts % 2;
+ break;
+ case GL_LINE_STRIP:
+ if (num_verts < 2)
+ verts_off = num_verts;
+ break;
+ case GL_LINE_LOOP:
+ if (num_verts < 2)
+ verts_off = num_verts;
+ break;
+ case GL_TRIANGLES:
+ verts_off = num_verts % 3;
+ break;
+ case GL_TRIANGLE_STRIP:
+ if (num_verts < 3)
+ verts_off = num_verts;
+ break;
+ case GL_TRIANGLE_FAN:
+ if (num_verts < 3)
+ verts_off = num_verts;
+ break;
+ case GL_QUADS:
+ verts_off = num_verts % 4;
+ break;
+ case GL_QUAD_STRIP:
+ if (num_verts < 4)
+ verts_off = num_verts;
+ else
+ verts_off = num_verts % 2;
+ break;
+ case GL_POLYGON:
+ if (num_verts < 3)
+ verts_off = num_verts;
+ break;
+ default:
+ assert(0);
+ return -1;
+ break;
+ }
+
+ return num_verts - verts_off;
+}
+
+static void r300FireEB(r300ContextPtr rmesa, int vertex_count, int type, int offset)
+{
+ BATCH_LOCALS(&rmesa->radeon);
+ int size;
+
+ /* offset is in indices */
+ BEGIN_BATCH(10);
+ OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0);
+ if (rmesa->ind_buf.is_32bit) {
+ /* convert to bytes */
+ offset *= 4;
+ size = vertex_count;
+ OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
+ (vertex_count << 16) | type |
+ R300_VAP_VF_CNTL__INDEX_SIZE_32bit);
+ } else {
+ /* convert to bytes */
+ offset *= 2;
+ size = (vertex_count + 1) >> 1;
+ OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_INDICES |
+ (vertex_count << 16) | type);
+ }
+
+ if (!rmesa->radeon.radeonScreen->kernel_mm) {
+ OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
+ OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
+ (R300_VAP_PORT_IDX0 >> 2));
+ OUT_BATCH_RELOC(0, rmesa->ind_buf.bo, rmesa->ind_buf.bo_offset + offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
+ OUT_BATCH(size);
+ } else {
+ OUT_BATCH_PACKET3(R300_PACKET3_INDX_BUFFER, 2);
+ OUT_BATCH(R300_INDX_BUFFER_ONE_REG_WR | (0 << R300_INDX_BUFFER_SKIP_SHIFT) |
+ (R300_VAP_PORT_IDX0 >> 2));
+ OUT_BATCH(rmesa->ind_buf.bo_offset + offset);
+ OUT_BATCH(size);
+ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+ rmesa->ind_buf.bo, RADEON_GEM_DOMAIN_GTT, 0, 0);
+ }
+ END_BATCH();
+}
+
+static void r300EmitAOS(r300ContextPtr rmesa, GLuint nr, GLuint offset)
+{
+ BATCH_LOCALS(&rmesa->radeon);
+ uint32_t voffset;
+ int sz = 1 + (nr >> 1) * 3 + (nr & 1) * 2;
+ int i;
+
+ if (RADEON_DEBUG & RADEON_VERTS)
+ fprintf(stderr, "%s: nr=%d, ofs=0x%08x\n", __FUNCTION__, nr,
+ offset);
+
+ if (!rmesa->radeon.radeonScreen->kernel_mm) {
+ BEGIN_BATCH(sz+2+(nr * 2));
+ OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
+ OUT_BATCH(nr);
+
+ for (i = 0; i + 1 < nr; i += 2) {
+ OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
+ (rmesa->radeon.tcl.aos[i].stride << 8) |
+ (rmesa->radeon.tcl.aos[i + 1].components << 16) |
+ (rmesa->radeon.tcl.aos[i + 1].stride << 24));
+
+ voffset = rmesa->radeon.tcl.aos[i + 0].offset +
+ offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
+ OUT_BATCH_RELOC(voffset,
+ rmesa->radeon.tcl.aos[i].bo,
+ voffset,
+ RADEON_GEM_DOMAIN_GTT,
+ 0, 0);
+ voffset = rmesa->radeon.tcl.aos[i + 1].offset +
+ offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
+ OUT_BATCH_RELOC(voffset,
+ rmesa->radeon.tcl.aos[i+1].bo,
+ voffset,
+ RADEON_GEM_DOMAIN_GTT,
+ 0, 0);
+ }
+
+ if (nr & 1) {
+ OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
+ (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
+ voffset = rmesa->radeon.tcl.aos[nr - 1].offset +
+ offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
+ OUT_BATCH_RELOC(voffset,
+ rmesa->radeon.tcl.aos[nr - 1].bo,
+ voffset,
+ RADEON_GEM_DOMAIN_GTT,
+ 0, 0);
+ }
+ END_BATCH();
+ } else {
+
+ BEGIN_BATCH(sz+2+(nr * 2));
+ OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, sz - 1);
+ OUT_BATCH(nr);
+
+ for (i = 0; i + 1 < nr; i += 2) {
+ OUT_BATCH((rmesa->radeon.tcl.aos[i].components << 0) |
+ (rmesa->radeon.tcl.aos[i].stride << 8) |
+ (rmesa->radeon.tcl.aos[i + 1].components << 16) |
+ (rmesa->radeon.tcl.aos[i + 1].stride << 24));
+
+ voffset = rmesa->radeon.tcl.aos[i + 0].offset +
+ offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
+ OUT_BATCH(voffset);
+ voffset = rmesa->radeon.tcl.aos[i + 1].offset +
+ offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
+ OUT_BATCH(voffset);
+ }
+
+ if (nr & 1) {
+ OUT_BATCH((rmesa->radeon.tcl.aos[nr - 1].components << 0) |
+ (rmesa->radeon.tcl.aos[nr - 1].stride << 8));
+ voffset = rmesa->radeon.tcl.aos[nr - 1].offset +
+ offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
+ OUT_BATCH(voffset);
+ }
+ for (i = 0; i + 1 < nr; i += 2) {
+ voffset = rmesa->radeon.tcl.aos[i + 0].offset +
+ offset * 4 * rmesa->radeon.tcl.aos[i + 0].stride;
+ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+ rmesa->radeon.tcl.aos[i+0].bo,
+ RADEON_GEM_DOMAIN_GTT,
+ 0, 0);
+ voffset = rmesa->radeon.tcl.aos[i + 1].offset +
+ offset * 4 * rmesa->radeon.tcl.aos[i + 1].stride;
+ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+ rmesa->radeon.tcl.aos[i+1].bo,
+ RADEON_GEM_DOMAIN_GTT,
+ 0, 0);
+ }
+ if (nr & 1) {
+ voffset = rmesa->radeon.tcl.aos[nr - 1].offset +
+ offset * 4 * rmesa->radeon.tcl.aos[nr - 1].stride;
+ radeon_cs_write_reloc(rmesa->radeon.cmdbuf.cs,
+ rmesa->radeon.tcl.aos[nr-1].bo,
+ RADEON_GEM_DOMAIN_GTT,
+ 0, 0);
+ }
+ END_BATCH();
+ }
+
+}
+
+static void r300FireAOS(r300ContextPtr rmesa, int vertex_count, int type)
+{
+ BATCH_LOCALS(&rmesa->radeon);
+
+ r300_emit_scissor(rmesa->radeon.glCtx);
+ BEGIN_BATCH(3);
+ OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
+ OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (vertex_count << 16) | type);
+ END_BATCH();
+}
+
+void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ BATCH_LOCALS(&rmesa->radeon);
+ int type, num_verts;
+
+ type = r300PrimitiveType(rmesa, prim);
+ num_verts = r300NumVerts(rmesa, end - start, prim);
+
+ if (type < 0 || num_verts <= 0)
+ return;
+
+ if (rmesa->ind_buf.bo) {
+ GLuint first, incr, offset = 0;
+
+ if (!split_prim_inplace(prim & PRIM_MODE_MASK, &first, &incr) &&
+ num_verts > 65500) {
+ WARN_ONCE("Fixme: can't handle spliting prim %d\n", prim);
+ return;
+ }
+
+
+ r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, 0);
+ if (rmesa->radeon.radeonScreen->kernel_mm) {
+ BEGIN_BATCH_NO_AUTOSTATE(2);
+ OUT_BATCH_REGSEQ(R300_VAP_VF_MAX_VTX_INDX, 1);
+ OUT_BATCH(rmesa->radeon.tcl.aos[0].count);
+ END_BATCH();
+ }
+
+ r300_emit_scissor(rmesa->radeon.glCtx);
+ while (num_verts > 0) {
+ int nr;
+ int align;
+
+ nr = MIN2(num_verts, 65535);
+ nr -= (nr - first) % incr;
+
+ /* get alignment for IB correct */
+ if (nr != num_verts) {
+ do {
+ align = nr * (rmesa->ind_buf.is_32bit ? 4 : 2);
+ if (align % 4)
+ nr -= incr;
+ } while(align % 4);
+ if (nr <= 0) {
+ WARN_ONCE("did the impossible happen? we never aligned nr to dword\n");
+ return;
+ }
+
+ }
+ r300FireEB(rmesa, nr, type, offset);
+
+ num_verts -= nr;
+ offset += nr;
+ }
+
+ } else {
+ GLuint first, incr, offset = 0;
+
+ if (!split_prim_inplace(prim & PRIM_MODE_MASK, &first, &incr) &&
+ num_verts > 65535) {
+ WARN_ONCE("Fixme: can't handle spliting prim %d\n", prim);
+ return;
+ }
+
+ if (rmesa->radeon.radeonScreen->kernel_mm) {
+ BEGIN_BATCH_NO_AUTOSTATE(2);
+ OUT_BATCH_REGSEQ(R300_VAP_VF_MAX_VTX_INDX, 1);
+ OUT_BATCH(rmesa->radeon.tcl.aos[0].count);
+ END_BATCH();
+ }
+
+ r300_emit_scissor(rmesa->radeon.glCtx);
+ while (num_verts > 0) {
+ int nr;
+ nr = MIN2(num_verts, 65535);
+ nr -= (nr - first) % incr;
+ r300EmitAOS(rmesa, rmesa->radeon.tcl.aos_count, start + offset);
+ r300FireAOS(rmesa, nr, type);
+ num_verts -= nr;
+ offset += nr;
+ }
+ }
+ COMMIT_BATCH();
+}
+
+static const char *getFallbackString(r300ContextPtr rmesa, uint32_t bit)
+{
+ static char common_fallback_str[32];
+ switch (bit) {
+ case R300_FALLBACK_VERTEX_PROGRAM :
+ return "vertex program";
+ case R300_FALLBACK_LINE_SMOOTH:
+ return "smooth lines";
+ case R300_FALLBACK_POINT_SMOOTH:
+ return "smooth points";
+ case R300_FALLBACK_POLYGON_SMOOTH:
+ return "smooth polygons";
+ case R300_FALLBACK_LINE_STIPPLE:
+ return "line stipple";
+ case R300_FALLBACK_POLYGON_STIPPLE:
+ return "polygon stipple";
+ case R300_FALLBACK_STENCIL_TWOSIDE:
+ return "two-sided stencil";
+ case R300_FALLBACK_RENDER_MODE:
+ return "render mode != GL_RENDER";
+ case R300_FALLBACK_FRAGMENT_PROGRAM:
+ return "fragment program";
+ case R300_FALLBACK_RADEON_COMMON:
+ snprintf(common_fallback_str, 32, "radeon common 0x%08x", rmesa->radeon.Fallback);
+ return common_fallback_str;
+ case R300_FALLBACK_AOS_LIMIT:
+ return "aos limit";
+ case R300_FALLBACK_INVALID_BUFFERS:
+ return "invalid buffers";
+ default:
+ return "unknown";
+ }
+}
+
+void r300SwitchFallback(GLcontext *ctx, uint32_t bit, GLboolean mode)
+{
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ uint32_t old_fallback = rmesa->fallback;
+ static uint32_t fallback_warn = 0;
+
+ if (mode) {
+ if ((fallback_warn & bit) == 0) {
+ if (RADEON_DEBUG & RADEON_FALLBACKS)
+ fprintf(stderr, "WARNING! Falling back to software for %s\n", getFallbackString(rmesa, bit));
+ fallback_warn |= bit;
+ }
+ rmesa->fallback |= bit;
+
+ /* update only if we change from no tcl fallbacks to some tcl fallbacks */
+ if (rmesa->options.hw_tcl_enabled) {
+ if (((old_fallback & R300_TCL_FALLBACK_MASK) == 0) &&
+ ((bit & R300_TCL_FALLBACK_MASK) > 0)) {
+ R300_STATECHANGE(rmesa, vap_cntl_status);
+ rmesa->hw.vap_cntl_status.cmd[1] |= R300_VAP_TCL_BYPASS;
+ }
+ }
+
+ /* update only if we change from no raster fallbacks to some raster fallbacks */
+ if (((old_fallback & R300_RASTER_FALLBACK_MASK) == 0) &&
+ ((bit & R300_RASTER_FALLBACK_MASK) > 0)) {
+
+ radeon_firevertices(&rmesa->radeon);
+ rmesa->radeon.swtcl.RenderIndex = ~0;
+ _swsetup_Wakeup( ctx );
+ }
+ } else {
+ rmesa->fallback &= ~bit;
+
+ /* update only if we have disabled all tcl fallbacks */
+ if (rmesa->options.hw_tcl_enabled) {
+ if ((old_fallback & R300_TCL_FALLBACK_MASK) == bit) {
+ R300_STATECHANGE(rmesa, vap_cntl_status);
+ rmesa->hw.vap_cntl_status.cmd[1] &= ~R300_VAP_TCL_BYPASS;
+ }
+ }
+
+ /* update only if we have disabled all raster fallbacks */
+ if ((old_fallback & R300_RASTER_FALLBACK_MASK) == bit) {
+ _swrast_flush( ctx );
+
+ tnl->Driver.Render.Start = r300RenderStart;
+ tnl->Driver.Render.Finish = r300RenderFinish;
+ tnl->Driver.Render.PrimitiveNotify = r300RenderPrimitive;
+ tnl->Driver.Render.ResetLineStipple = r300ResetLineStipple;
+ tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
+ tnl->Driver.Render.CopyPV = _tnl_copy_pv;
+ tnl->Driver.Render.Interp = _tnl_interp;
+
+ _tnl_invalidate_vertex_state( ctx, ~0 );
+ _tnl_invalidate_vertices( ctx, ~0 );
+ }
+ }
+
+}
diff --git a/src/mesa/drivers/dri/r300/r300_render.h b/src/mesa/drivers/dri/r300/r300_render.h
new file mode 100644
index 0000000000..581e9fa0cc
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_render.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2009 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __R300_RENDER_H__
+#define __R300_RENDER_H__
+
+#include "main/mtypes.h"
+
+#define R300_FALLBACK_VERTEX_PROGRAM (1 << 0)
+#define R300_TCL_FALLBACK_MASK 0x0000ffff
+
+#define R300_FALLBACK_LINE_SMOOTH (1 << 16)
+#define R300_FALLBACK_POINT_SMOOTH (1 << 17)
+#define R300_FALLBACK_POLYGON_SMOOTH (1 << 18)
+#define R300_FALLBACK_LINE_STIPPLE (1 << 19)
+#define R300_FALLBACK_POLYGON_STIPPLE (1 << 20)
+#define R300_FALLBACK_STENCIL_TWOSIDE (1 << 21)
+#define R300_FALLBACK_RENDER_MODE (1 << 22)
+#define R300_FALLBACK_FRAGMENT_PROGRAM (1 << 23)
+#define R300_FALLBACK_RADEON_COMMON (1 << 29)
+#define R300_FALLBACK_AOS_LIMIT (1 << 30)
+#define R300_FALLBACK_INVALID_BUFFERS (1 << 31)
+#define R300_RASTER_FALLBACK_MASK 0xffff0000
+
+#define MASK_XYZW (R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W)
+#define MASK_X R300_WRITE_ENA_X
+#define MASK_Y R300_WRITE_ENA_Y
+#define MASK_Z R300_WRITE_ENA_Z
+#define MASK_W R300_WRITE_ENA_W
+
+#if SWIZZLE_X != R300_INPUT_ROUTE_SELECT_X || \
+ SWIZZLE_Y != R300_INPUT_ROUTE_SELECT_Y || \
+ SWIZZLE_Z != R300_INPUT_ROUTE_SELECT_Z || \
+ SWIZZLE_W != R300_INPUT_ROUTE_SELECT_W || \
+ SWIZZLE_ZERO != R300_INPUT_ROUTE_SELECT_ZERO || \
+ SWIZZLE_ONE != R300_INPUT_ROUTE_SELECT_ONE
+#error Cannot change these!
+#endif
+
+extern const struct tnl_pipeline_stage _r300_render_stage;
+
+extern void r300SwitchFallback(GLcontext *ctx, uint32_t bit, GLboolean mode);
+
+extern void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim);
+
+#endif
diff --git a/src/mesa/drivers/dri/r300/r300_shader.c b/src/mesa/drivers/dri/r300/r300_shader.c
new file mode 100644
index 0000000000..9c24166ec5
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_shader.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright 2009 Maciej Cencora <m.cencora@gmail.com>
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "main/glheader.h"
+
+#include "shader/program.h"
+#include "tnl/tnl.h"
+#include "r300_context.h"
+#include "r300_fragprog_common.h"
+
+static void freeFragProgCache(GLcontext *ctx, struct r300_fragment_program_cont *cache)
+{
+ struct r300_fragment_program *tmp, *fp = cache->progs;
+
+ while (fp) {
+ tmp = fp->next;
+ rc_constants_destroy(&fp->code.constants);
+ free(fp);
+ fp = tmp;
+ }
+}
+
+static void freeVertProgCache(GLcontext *ctx, struct r300_vertex_program_cont *cache)
+{
+ struct r300_vertex_program *tmp, *vp = cache->progs;
+
+ while (vp) {
+ tmp = vp->next;
+ rc_constants_destroy(&vp->code.constants);
+ _mesa_reference_vertprog(ctx, &vp->Base, NULL);
+ free(vp);
+ vp = tmp;
+ }
+}
+
+static struct gl_program *r300NewProgram(GLcontext * ctx, GLenum target,
+ GLuint id)
+{
+ struct r300_vertex_program_cont *vp;
+ struct r300_fragment_program_cont *fp;
+
+ switch (target) {
+ case GL_VERTEX_STATE_PROGRAM_NV:
+ case GL_VERTEX_PROGRAM_ARB:
+ vp = CALLOC_STRUCT(r300_vertex_program_cont);
+ return _mesa_init_vertex_program(ctx, &vp->mesa_program, target, id);
+
+ case GL_FRAGMENT_PROGRAM_NV:
+ case GL_FRAGMENT_PROGRAM_ARB:
+ fp = CALLOC_STRUCT(r300_fragment_program_cont);
+ return _mesa_init_fragment_program(ctx, &fp->Base, target, id);
+
+ default:
+ _mesa_problem(ctx, "Bad target in r300NewProgram");
+ }
+
+ return NULL;
+}
+
+static void r300DeleteProgram(GLcontext * ctx, struct gl_program *prog)
+{
+ struct r300_vertex_program_cont *vp = (struct r300_vertex_program_cont *)prog;
+ struct r300_fragment_program_cont *fp = (struct r300_fragment_program_cont *)prog;
+
+ switch (prog->Target) {
+ case GL_VERTEX_PROGRAM_ARB:
+ freeVertProgCache(ctx, vp);
+ break;
+ case GL_FRAGMENT_PROGRAM_ARB:
+ freeFragProgCache(ctx, fp);
+ break;
+ }
+
+ _mesa_delete_program(ctx, prog);
+}
+
+static GLboolean
+r300ProgramStringNotify(GLcontext * ctx, GLenum target, struct gl_program *prog)
+{
+ struct r300_vertex_program_cont *vp = (struct r300_vertex_program_cont *)prog;
+ struct r300_fragment_program_cont *fp = (struct r300_fragment_program_cont *)prog;
+
+ switch (target) {
+ case GL_VERTEX_PROGRAM_ARB:
+ freeVertProgCache(ctx, vp);
+ vp->progs = NULL;
+ break;
+ case GL_FRAGMENT_PROGRAM_ARB:
+ freeFragProgCache(ctx, fp);
+ fp->progs = NULL;
+ break;
+ }
+
+ /* need this for tcl fallbacks */
+ (void) _tnl_program_string(ctx, target, prog);
+
+ /* XXX check if program is legal, within limits */
+ return GL_TRUE;
+}
+
+static GLboolean
+r300IsProgramNative(GLcontext * ctx, GLenum target, struct gl_program *prog)
+{
+ if (target == GL_FRAGMENT_PROGRAM_ARB) {
+ struct r300_fragment_program *fp = r300SelectAndTranslateFragmentShader(ctx);
+
+ return !fp->error;
+ } else {
+ struct r300_vertex_program *vp = r300SelectAndTranslateVertexShader(ctx);
+
+ return !vp->error;
+ }
+}
+
+void r300InitShaderFuncs(struct dd_function_table *functions)
+{
+ functions->NewProgram = r300NewProgram;
+ functions->DeleteProgram = r300DeleteProgram;
+ functions->ProgramStringNotify = r300ProgramStringNotify;
+ functions->IsProgramNative = r300IsProgramNative;
+}
diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
new file mode 100644
index 0000000000..fa33be4998
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_state.c
@@ -0,0 +1,2416 @@
+/*
+Copyright (C) The Weather Channel, Inc. 2002.
+Copyright (C) 2004 Nicolai Haehnle.
+All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/**
+ * \file
+ *
+ * \author Nicolai Haehnle <prefect_@gmx.net>
+ */
+
+#include "main/glheader.h"
+#include "main/state.h"
+#include "main/imports.h"
+#include "main/enums.h"
+#include "main/macros.h"
+#include "main/context.h"
+#include "main/dd.h"
+#include "main/framebuffer.h"
+#include "main/simple_list.h"
+#include "main/api_arrayelt.h"
+
+#include "drivers/common/meta.h"
+#include "swrast/swrast.h"
+#include "swrast_setup/swrast_setup.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_statevars.h"
+#include "vbo/vbo.h"
+#include "tnl/tnl.h"
+
+#include "r300_context.h"
+#include "r300_state.h"
+#include "r300_reg.h"
+#include "r300_emit.h"
+#include "r300_fragprog_common.h"
+#include "r300_render.h"
+#include "r300_vertprog.h"
+
+static void r300BlendColor(GLcontext * ctx, const GLfloat cf[4])
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+
+ R300_STATECHANGE(rmesa, blend_color);
+
+ if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+ GLuint r = IROUND(cf[0]*1023.0f);
+ GLuint g = IROUND(cf[1]*1023.0f);
+ GLuint b = IROUND(cf[2]*1023.0f);
+ GLuint a = IROUND(cf[3]*1023.0f);
+
+ rmesa->hw.blend_color.cmd[1] = r | (a << 16);
+ rmesa->hw.blend_color.cmd[2] = b | (g << 16);
+ } else {
+ GLubyte color[4];
+ CLAMPED_FLOAT_TO_UBYTE(color[0], cf[0]);
+ CLAMPED_FLOAT_TO_UBYTE(color[1], cf[1]);
+ CLAMPED_FLOAT_TO_UBYTE(color[2], cf[2]);
+ CLAMPED_FLOAT_TO_UBYTE(color[3], cf[3]);
+
+ rmesa->hw.blend_color.cmd[1] = PACK_COLOR_8888(color[3], color[0],
+ color[1], color[2]);
+ }
+}
+
+/**
+ * Calculate the hardware blend factor setting. This same function is used
+ * for source and destination of both alpha and RGB.
+ *
+ * \returns
+ * The hardware register value for the specified blend factor. This value
+ * will need to be shifted into the correct position for either source or
+ * destination factor.
+ *
+ * \todo
+ * Since the two cases where source and destination are handled differently
+ * are essentially error cases, they should never happen. Determine if these
+ * cases can be removed.
+ */
+static int blend_factor(GLenum factor, GLboolean is_src)
+{
+ switch (factor) {
+ case GL_ZERO:
+ return R300_BLEND_GL_ZERO;
+ break;
+ case GL_ONE:
+ return R300_BLEND_GL_ONE;
+ break;
+ case GL_DST_COLOR:
+ return R300_BLEND_GL_DST_COLOR;
+ break;
+ case GL_ONE_MINUS_DST_COLOR:
+ return R300_BLEND_GL_ONE_MINUS_DST_COLOR;
+ break;
+ case GL_SRC_COLOR:
+ return R300_BLEND_GL_SRC_COLOR;
+ break;
+ case GL_ONE_MINUS_SRC_COLOR:
+ return R300_BLEND_GL_ONE_MINUS_SRC_COLOR;
+ break;
+ case GL_SRC_ALPHA:
+ return R300_BLEND_GL_SRC_ALPHA;
+ break;
+ case GL_ONE_MINUS_SRC_ALPHA:
+ return R300_BLEND_GL_ONE_MINUS_SRC_ALPHA;
+ break;
+ case GL_DST_ALPHA:
+ return R300_BLEND_GL_DST_ALPHA;
+ break;
+ case GL_ONE_MINUS_DST_ALPHA:
+ return R300_BLEND_GL_ONE_MINUS_DST_ALPHA;
+ break;
+ case GL_SRC_ALPHA_SATURATE:
+ return (is_src) ? R300_BLEND_GL_SRC_ALPHA_SATURATE :
+ R300_BLEND_GL_ZERO;
+ break;
+ case GL_CONSTANT_COLOR:
+ return R300_BLEND_GL_CONST_COLOR;
+ break;
+ case GL_ONE_MINUS_CONSTANT_COLOR:
+ return R300_BLEND_GL_ONE_MINUS_CONST_COLOR;
+ break;
+ case GL_CONSTANT_ALPHA:
+ return R300_BLEND_GL_CONST_ALPHA;
+ break;
+ case GL_ONE_MINUS_CONSTANT_ALPHA:
+ return R300_BLEND_GL_ONE_MINUS_CONST_ALPHA;
+ break;
+ default:
+ fprintf(stderr, "unknown blend factor %x\n", factor);
+ return (is_src) ? R300_BLEND_GL_ONE : R300_BLEND_GL_ZERO;
+ break;
+ }
+}
+
+/**
+ * Sets both the blend equation and the blend function.
+ * This is done in a single
+ * function because some blend equations (i.e., \c GL_MIN and \c GL_MAX)
+ * change the interpretation of the blend function.
+ * Also, make sure that blend function and blend equation are set to their
+ * default value if color blending is not enabled, since at least blend
+ * equations GL_MIN and GL_FUNC_REVERSE_SUBTRACT will cause wrong results
+ * otherwise for unknown reasons.
+ */
+
+/* helper function */
+static void r300SetBlendCntl(r300ContextPtr r300, int func, int eqn,
+ int cbits, int funcA, int eqnA)
+{
+ GLuint new_ablend, new_cblend;
+
+#if 0
+ fprintf(stderr,
+ "eqnA=%08x funcA=%08x eqn=%08x func=%08x cbits=%08x\n",
+ eqnA, funcA, eqn, func, cbits);
+#endif
+ new_ablend = eqnA | funcA;
+ new_cblend = eqn | func;
+
+ /* Some blend factor combinations don't seem to work when the
+ * BLEND_NO_SEPARATE bit is set.
+ *
+ * Especially problematic candidates are the ONE_MINUS_* flags,
+ * but I can't see a real pattern.
+ */
+#if 0
+ if (new_ablend == new_cblend) {
+ new_cblend |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0;
+ }
+#endif
+ new_cblend |= cbits;
+
+ if ((new_ablend != r300->hw.bld.cmd[R300_BLD_ABLEND]) ||
+ (new_cblend != r300->hw.bld.cmd[R300_BLD_CBLEND])) {
+ R300_STATECHANGE(r300, bld);
+ r300->hw.bld.cmd[R300_BLD_ABLEND] = new_ablend;
+ r300->hw.bld.cmd[R300_BLD_CBLEND] = new_cblend;
+ }
+}
+
+static void r300SetBlendState(GLcontext * ctx)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ int func = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) |
+ (R300_BLEND_GL_ZERO << R300_DST_BLEND_SHIFT);
+ int eqn = R300_COMB_FCN_ADD_CLAMP;
+ int funcA = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) |
+ (R300_BLEND_GL_ZERO << R300_DST_BLEND_SHIFT);
+ int eqnA = R300_COMB_FCN_ADD_CLAMP;
+
+ if (RGBA_LOGICOP_ENABLED(ctx) || !ctx->Color.BlendEnabled) {
+ r300SetBlendCntl(r300, func, eqn, 0, func, eqn);
+ return;
+ }
+
+ func =
+ (blend_factor(ctx->Color.BlendSrcRGB, GL_TRUE) <<
+ R300_SRC_BLEND_SHIFT) | (blend_factor(ctx->Color.BlendDstRGB,
+ GL_FALSE) <<
+ R300_DST_BLEND_SHIFT);
+
+ switch (ctx->Color.BlendEquationRGB) {
+ case GL_FUNC_ADD:
+ eqn = R300_COMB_FCN_ADD_CLAMP;
+ break;
+
+ case GL_FUNC_SUBTRACT:
+ eqn = R300_COMB_FCN_SUB_CLAMP;
+ break;
+
+ case GL_FUNC_REVERSE_SUBTRACT:
+ eqn = R300_COMB_FCN_RSUB_CLAMP;
+ break;
+
+ case GL_MIN:
+ eqn = R300_COMB_FCN_MIN;
+ func = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) |
+ (R300_BLEND_GL_ONE << R300_DST_BLEND_SHIFT);
+ break;
+
+ case GL_MAX:
+ eqn = R300_COMB_FCN_MAX;
+ func = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) |
+ (R300_BLEND_GL_ONE << R300_DST_BLEND_SHIFT);
+ break;
+
+ default:
+ fprintf(stderr,
+ "[%s:%u] Invalid RGB blend equation (0x%04x).\n",
+ __FUNCTION__, __LINE__, ctx->Color.BlendEquationRGB);
+ return;
+ }
+
+ funcA =
+ (blend_factor(ctx->Color.BlendSrcA, GL_TRUE) <<
+ R300_SRC_BLEND_SHIFT) | (blend_factor(ctx->Color.BlendDstA,
+ GL_FALSE) <<
+ R300_DST_BLEND_SHIFT);
+
+ switch (ctx->Color.BlendEquationA) {
+ case GL_FUNC_ADD:
+ eqnA = R300_COMB_FCN_ADD_CLAMP;
+ break;
+
+ case GL_FUNC_SUBTRACT:
+ eqnA = R300_COMB_FCN_SUB_CLAMP;
+ break;
+
+ case GL_FUNC_REVERSE_SUBTRACT:
+ eqnA = R300_COMB_FCN_RSUB_CLAMP;
+ break;
+
+ case GL_MIN:
+ eqnA = R300_COMB_FCN_MIN;
+ funcA = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) |
+ (R300_BLEND_GL_ONE << R300_DST_BLEND_SHIFT);
+ break;
+
+ case GL_MAX:
+ eqnA = R300_COMB_FCN_MAX;
+ funcA = (R300_BLEND_GL_ONE << R300_SRC_BLEND_SHIFT) |
+ (R300_BLEND_GL_ONE << R300_DST_BLEND_SHIFT);
+ break;
+
+ default:
+ fprintf(stderr,
+ "[%s:%u] Invalid A blend equation (0x%04x).\n",
+ __FUNCTION__, __LINE__, ctx->Color.BlendEquationA);
+ return;
+ }
+
+ r300SetBlendCntl(r300,
+ func, eqn,
+ (R300_SEPARATE_ALPHA_ENABLE |
+ R300_READ_ENABLE |
+ R300_ALPHA_BLEND_ENABLE), funcA, eqnA);
+}
+
+static void r300BlendEquationSeparate(GLcontext * ctx,
+ GLenum modeRGB, GLenum modeA)
+{
+ r300SetBlendState(ctx);
+}
+
+static void r300BlendFuncSeparate(GLcontext * ctx,
+ GLenum sfactorRGB, GLenum dfactorRGB,
+ GLenum sfactorA, GLenum dfactorA)
+{
+ r300SetBlendState(ctx);
+}
+
+/**
+ * Translate LogicOp enums into hardware representation.
+ * Both use a very logical bit-wise layout, but unfortunately the order
+ * of bits is reversed.
+ */
+static GLuint translate_logicop(GLenum logicop)
+{
+ GLuint bits = logicop - GL_CLEAR;
+ bits = ((bits & 1) << 3) | ((bits & 2) << 1) | ((bits & 4) >> 1) | ((bits & 8) >> 3);
+ return bits << R300_RB3D_ROPCNTL_ROP_SHIFT;
+}
+
+/**
+ * Used internally to update the r300->hw hardware state to match the
+ * current OpenGL state.
+ */
+static void r300SetLogicOpState(GLcontext *ctx)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ R300_STATECHANGE(r300, rop);
+ if (RGBA_LOGICOP_ENABLED(ctx)) {
+ r300->hw.rop.cmd[1] = R300_RB3D_ROPCNTL_ROP_ENABLE |
+ translate_logicop(ctx->Color.LogicOp);
+ } else {
+ r300->hw.rop.cmd[1] = 0;
+ }
+}
+
+/**
+ * Called by Mesa when an application program changes the LogicOp state
+ * via glLogicOp.
+ */
+static void r300LogicOpcode(GLcontext *ctx, GLenum logicop)
+{
+ if (RGBA_LOGICOP_ENABLED(ctx))
+ r300SetLogicOpState(ctx);
+}
+
+static void r300ClipPlane( GLcontext *ctx, GLenum plane, const GLfloat *eq )
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ GLint p;
+ GLint *ip;
+
+ /* no VAP UCP on non-TCL chipsets */
+ if (!rmesa->options.hw_tcl_enabled)
+ return;
+
+ p = (GLint) plane - (GLint) GL_CLIP_PLANE0;
+ ip = (GLint *)ctx->Transform._ClipUserPlane[p];
+
+ R300_STATECHANGE( rmesa, vpucp[p] );
+ rmesa->hw.vpucp[p].cmd[R300_VPUCP_X] = ip[0];
+ rmesa->hw.vpucp[p].cmd[R300_VPUCP_Y] = ip[1];
+ rmesa->hw.vpucp[p].cmd[R300_VPUCP_Z] = ip[2];
+ rmesa->hw.vpucp[p].cmd[R300_VPUCP_W] = ip[3];
+}
+
+static void r300SetClipPlaneState(GLcontext * ctx, GLenum cap, GLboolean state)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ GLuint p;
+
+ /* no VAP UCP on non-TCL chipsets */
+ if (!r300->options.hw_tcl_enabled)
+ return;
+
+ p = cap - GL_CLIP_PLANE0;
+ R300_STATECHANGE(r300, vap_clip_cntl);
+ if (state) {
+ r300->hw.vap_clip_cntl.cmd[1] |= (R300_VAP_UCP_ENABLE_0 << p);
+ r300ClipPlane(ctx, cap, NULL);
+ } else {
+ r300->hw.vap_clip_cntl.cmd[1] &= ~(R300_VAP_UCP_ENABLE_0 << p);
+ }
+}
+
+/**
+ * Update our tracked culling state based on Mesa's state.
+ */
+static void r300UpdateCulling(GLcontext * ctx)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ uint32_t val = 0;
+
+ if (ctx->Polygon.CullFlag) {
+ switch (ctx->Polygon.CullFaceMode) {
+ case GL_FRONT:
+ val = R300_CULL_FRONT;
+ break;
+ case GL_BACK:
+ val = R300_CULL_BACK;
+ break;
+ case GL_FRONT_AND_BACK:
+ val = R300_CULL_FRONT | R300_CULL_BACK;
+ break;
+ default:
+ break;
+ }
+ }
+
+ switch (ctx->Polygon.FrontFace) {
+ case GL_CW:
+ val |= R300_FRONT_FACE_CW;
+ break;
+ case GL_CCW:
+ val |= R300_FRONT_FACE_CCW;
+ break;
+ default:
+ break;
+ }
+
+ /* Winding is inverted when rendering to FBO */
+ if (ctx->DrawBuffer && ctx->DrawBuffer->Name)
+ val ^= R300_FRONT_FACE_CW;
+
+ R300_STATECHANGE(r300, cul);
+ r300->hw.cul.cmd[R300_CUL_CULL] = val;
+}
+
+static void r300SetPolygonOffsetState(GLcontext * ctx, GLboolean state)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+ R300_STATECHANGE(r300, occlusion_cntl);
+ if (state) {
+ r300->hw.occlusion_cntl.cmd[1] |= (3 << 0);
+ } else {
+ r300->hw.occlusion_cntl.cmd[1] &= ~(3 << 0);
+ }
+}
+
+static GLboolean current_fragment_program_writes_depth(GLcontext* ctx)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+ return ctx->FragmentProgram._Current && r300->selected_fp->code.writes_depth;
+}
+
+static void r300SetEarlyZState(GLcontext * ctx)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ GLuint topZ = R300_ZTOP_ENABLE;
+ GLuint w_fmt, fgdepthsrc;
+
+ if (ctx->Color.AlphaEnabled && ctx->Color.AlphaFunc != GL_ALWAYS)
+ topZ = R300_ZTOP_DISABLE;
+ else if (current_fragment_program_writes_depth(ctx))
+ topZ = R300_ZTOP_DISABLE;
+ else if (ctx->FragmentProgram._Current && ctx->FragmentProgram._Current->UsesKill)
+ topZ = R300_ZTOP_DISABLE;
+ else if (r300->radeon.query.current)
+ topZ = R300_ZTOP_DISABLE;
+
+ if (topZ != r300->hw.zstencil_format.cmd[2]) {
+ /* Note: This completely reemits the stencil format.
+ * I have not tested whether this is strictly necessary,
+ * or if emitting a write to ZB_ZTOP is enough.
+ */
+ R300_STATECHANGE(r300, zstencil_format);
+ r300->hw.zstencil_format.cmd[2] = topZ;
+ }
+
+ /* w_fmt value is set to get best performance
+ * see p.130 R5xx 3D acceleration guide v1.3 */
+ if (current_fragment_program_writes_depth(ctx)) {
+ fgdepthsrc = R300_FG_DEPTH_SRC_SHADER;
+ w_fmt = R300_W_FMT_W24 | R300_W_SRC_US;
+ } else {
+ fgdepthsrc = R300_FG_DEPTH_SRC_SCAN;
+ w_fmt = R300_W_FMT_W0 | R300_W_SRC_US;
+ }
+
+ if (w_fmt != r300->hw.us_out_fmt.cmd[5]) {
+ R300_STATECHANGE(r300, us_out_fmt);
+ r300->hw.us_out_fmt.cmd[5] = w_fmt;
+ }
+
+ if (fgdepthsrc != r300->hw.fg_depth_src.cmd[1]) {
+ R300_STATECHANGE(r300, fg_depth_src);
+ r300->hw.fg_depth_src.cmd[1] = fgdepthsrc;
+ }
+}
+
+static void r300SetAlphaState(GLcontext * ctx)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ GLubyte refByte;
+ uint32_t pp_misc = 0x0;
+ GLboolean really_enabled = ctx->Color.AlphaEnabled;
+
+ CLAMPED_FLOAT_TO_UBYTE(refByte, ctx->Color.AlphaRef);
+
+ switch (ctx->Color.AlphaFunc) {
+ case GL_NEVER:
+ pp_misc |= R300_FG_ALPHA_FUNC_NEVER;
+ break;
+ case GL_LESS:
+ pp_misc |= R300_FG_ALPHA_FUNC_LESS;
+ break;
+ case GL_EQUAL:
+ pp_misc |= R300_FG_ALPHA_FUNC_EQUAL;
+ break;
+ case GL_LEQUAL:
+ pp_misc |= R300_FG_ALPHA_FUNC_LE;
+ break;
+ case GL_GREATER:
+ pp_misc |= R300_FG_ALPHA_FUNC_GREATER;
+ break;
+ case GL_NOTEQUAL:
+ pp_misc |= R300_FG_ALPHA_FUNC_NOTEQUAL;
+ break;
+ case GL_GEQUAL:
+ pp_misc |= R300_FG_ALPHA_FUNC_GE;
+ break;
+ case GL_ALWAYS:
+ /*pp_misc |= FG_ALPHA_FUNC_ALWAYS; */
+ really_enabled = GL_FALSE;
+ break;
+ }
+
+ if (really_enabled) {
+ pp_misc |= R300_FG_ALPHA_FUNC_ENABLE;
+ pp_misc |= R500_FG_ALPHA_FUNC_8BIT;
+ pp_misc |= (refByte & R300_FG_ALPHA_FUNC_VAL_MASK);
+ } else {
+ pp_misc = 0x0;
+ }
+
+ R300_STATECHANGE(r300, at);
+ r300->hw.at.cmd[R300_AT_ALPHA_TEST] = pp_misc;
+ r300->hw.at.cmd[R300_AT_UNKNOWN] = 0;
+}
+
+static void r300AlphaFunc(GLcontext * ctx, GLenum func, GLfloat ref)
+{
+ (void)func;
+ (void)ref;
+ r300SetAlphaState(ctx);
+}
+
+static int translate_func(int func)
+{
+ switch (func) {
+ case GL_NEVER:
+ return R300_ZS_NEVER;
+ case GL_LESS:
+ return R300_ZS_LESS;
+ case GL_EQUAL:
+ return R300_ZS_EQUAL;
+ case GL_LEQUAL:
+ return R300_ZS_LEQUAL;
+ case GL_GREATER:
+ return R300_ZS_GREATER;
+ case GL_NOTEQUAL:
+ return R300_ZS_NOTEQUAL;
+ case GL_GEQUAL:
+ return R300_ZS_GEQUAL;
+ case GL_ALWAYS:
+ return R300_ZS_ALWAYS;
+ }
+ return 0;
+}
+
+static void r300SetDepthState(GLcontext * ctx)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+ R300_STATECHANGE(r300, zs);
+ r300->hw.zs.cmd[R300_ZS_CNTL_0] &= (R300_STENCIL_ENABLE |
+ R300_STENCIL_FRONT_BACK |
+ R500_STENCIL_REFMASK_FRONT_BACK);
+ r300->hw.zs.cmd[R300_ZS_CNTL_1] &= ~(R300_ZS_MASK << R300_Z_FUNC_SHIFT);
+
+ if (ctx->Depth.Test && ctx->DrawBuffer->_DepthBuffer) {
+ r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_Z_ENABLE;
+ if (ctx->Depth.Mask)
+ r300->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_Z_WRITE_ENABLE;
+ r300->hw.zs.cmd[R300_ZS_CNTL_1] |=
+ translate_func(ctx->Depth.Func) << R300_Z_FUNC_SHIFT;
+ }
+}
+
+static void r300CatchStencilFallback(GLcontext *ctx)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ const unsigned back = ctx->Stencil._BackFace;
+
+ if (rmesa->radeon.radeonScreen->kernel_mm &&
+ (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)) {
+ r300SwitchFallback(ctx, R300_FALLBACK_STENCIL_TWOSIDE, GL_FALSE);
+ } else if (ctx->Stencil._Enabled &&
+ (ctx->Stencil.Ref[0] != ctx->Stencil.Ref[back]
+ || ctx->Stencil.ValueMask[0] != ctx->Stencil.ValueMask[back]
+ || ctx->Stencil.WriteMask[0] != ctx->Stencil.WriteMask[back])) {
+ r300SwitchFallback(ctx, R300_FALLBACK_STENCIL_TWOSIDE, GL_TRUE);
+ } else {
+ r300SwitchFallback(ctx, R300_FALLBACK_STENCIL_TWOSIDE, GL_FALSE);
+ }
+}
+
+static void r300SetStencilState(GLcontext * ctx, GLboolean state)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ GLboolean hw_stencil = GL_FALSE;
+
+ r300CatchStencilFallback(ctx);
+
+ if (ctx->DrawBuffer) {
+ struct radeon_renderbuffer *rrbStencil
+ = radeon_get_renderbuffer(ctx->DrawBuffer, BUFFER_STENCIL);
+ hw_stencil = (rrbStencil && rrbStencil->bo);
+ }
+
+ if (hw_stencil) {
+ R300_STATECHANGE(r300, zs);
+ if (state) {
+ r300->hw.zs.cmd[R300_ZS_CNTL_0] |=
+ R300_STENCIL_ENABLE;
+ } else {
+ r300->hw.zs.cmd[R300_ZS_CNTL_0] &=
+ ~R300_STENCIL_ENABLE;
+ }
+ }
+}
+
+static void r300UpdatePolygonMode(GLcontext * ctx)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ uint32_t hw_mode = R300_GA_POLY_MODE_DISABLE;
+
+ /* Only do something if a polygon mode is wanted, default is GL_FILL */
+ if (ctx->Polygon.FrontMode != GL_FILL ||
+ ctx->Polygon.BackMode != GL_FILL) {
+ GLenum f, b;
+
+ /* Handle GL_CW (clock wise and GL_CCW (counter clock wise)
+ * correctly by selecting the correct front and back face
+ */
+ if (ctx->Polygon.FrontFace == GL_CCW) {
+ f = ctx->Polygon.FrontMode;
+ b = ctx->Polygon.BackMode;
+ } else {
+ f = ctx->Polygon.BackMode;
+ b = ctx->Polygon.FrontMode;
+ }
+
+ /* Enable polygon mode */
+ hw_mode |= R300_GA_POLY_MODE_DUAL;
+
+ switch (f) {
+ case GL_LINE:
+ hw_mode |= R300_GA_POLY_MODE_FRONT_PTYPE_LINE;
+ break;
+ case GL_POINT:
+ hw_mode |= R300_GA_POLY_MODE_FRONT_PTYPE_POINT;
+ break;
+ case GL_FILL:
+ hw_mode |= R300_GA_POLY_MODE_FRONT_PTYPE_TRI;
+ break;
+ }
+
+ switch (b) {
+ case GL_LINE:
+ hw_mode |= R300_GA_POLY_MODE_BACK_PTYPE_LINE;
+ break;
+ case GL_POINT:
+ hw_mode |= R300_GA_POLY_MODE_BACK_PTYPE_POINT;
+ break;
+ case GL_FILL:
+ hw_mode |= R300_GA_POLY_MODE_BACK_PTYPE_TRI;
+ break;
+ }
+ }
+
+ if (r300->hw.polygon_mode.cmd[1] != hw_mode) {
+ R300_STATECHANGE(r300, polygon_mode);
+ r300->hw.polygon_mode.cmd[1] = hw_mode;
+ }
+
+ r300->hw.polygon_mode.cmd[2] = 0x00000001;
+ r300->hw.polygon_mode.cmd[3] = 0x00000000;
+}
+
+/**
+ * Change the culling mode.
+ *
+ * \note Mesa already filters redundant calls to this function.
+ */
+static void r300CullFace(GLcontext * ctx, GLenum mode)
+{
+ (void)mode;
+
+ r300UpdateCulling(ctx);
+}
+
+/**
+ * Change the polygon orientation.
+ *
+ * \note Mesa already filters redundant calls to this function.
+ */
+static void r300FrontFace(GLcontext * ctx, GLenum mode)
+{
+ (void)mode;
+
+ r300UpdateCulling(ctx);
+ r300UpdatePolygonMode(ctx);
+}
+
+/**
+ * Change the depth testing function.
+ *
+ * \note Mesa already filters redundant calls to this function.
+ */
+static void r300DepthFunc(GLcontext * ctx, GLenum func)
+{
+ (void)func;
+ r300SetDepthState(ctx);
+}
+
+/**
+ * Enable/Disable depth writing.
+ *
+ * \note Mesa already filters redundant calls to this function.
+ */
+static void r300DepthMask(GLcontext * ctx, GLboolean mask)
+{
+ (void)mask;
+ r300SetDepthState(ctx);
+}
+
+/**
+ * Handle glColorMask()
+ */
+static void r300ColorMask(GLcontext * ctx,
+ GLboolean r, GLboolean g, GLboolean b, GLboolean a)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ int mask = (r ? RB3D_COLOR_CHANNEL_MASK_RED_MASK0 : 0) |
+ (g ? RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 : 0) |
+ (b ? RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 : 0) |
+ (a ? RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 : 0);
+
+ if (mask != r300->hw.cmk.cmd[R300_CMK_COLORMASK]) {
+ R300_STATECHANGE(r300, cmk);
+ r300->hw.cmk.cmd[R300_CMK_COLORMASK] = mask;
+ }
+}
+
+/* =============================================================
+ * Point state
+ */
+static void r300PointSize(GLcontext * ctx, GLfloat size)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+ /* We need to clamp to user defined range here, because
+ * the HW clamping happens only for per vertex point size. */
+ size = CLAMP(size, ctx->Point.MinSize, ctx->Point.MaxSize);
+
+ /* same size limits for AA, non-AA points */
+ size = CLAMP(size, ctx->Const.MinPointSize, ctx->Const.MaxPointSize);
+
+ R300_STATECHANGE(r300, ps);
+ r300->hw.ps.cmd[R300_PS_POINTSIZE] =
+ ((int)(size * 6) << R300_POINTSIZE_X_SHIFT) |
+ ((int)(size * 6) << R300_POINTSIZE_Y_SHIFT);
+}
+
+static void r300PointParameter(GLcontext * ctx, GLenum pname, const GLfloat * param)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+ switch (pname) {
+ case GL_POINT_SIZE_MIN:
+ R300_STATECHANGE(r300, ga_point_minmax);
+ r300->hw.ga_point_minmax.cmd[1] &= ~R300_GA_POINT_MINMAX_MIN_MASK;
+ r300->hw.ga_point_minmax.cmd[1] |= (GLuint)(ctx->Point.MinSize * 6.0);
+ r300PointSize(ctx, ctx->Point.Size);
+ break;
+ case GL_POINT_SIZE_MAX:
+ R300_STATECHANGE(r300, ga_point_minmax);
+ r300->hw.ga_point_minmax.cmd[1] &= ~R300_GA_POINT_MINMAX_MAX_MASK;
+ r300->hw.ga_point_minmax.cmd[1] |= (GLuint)(ctx->Point.MaxSize * 6.0)
+ << R300_GA_POINT_MINMAX_MAX_SHIFT;
+ r300PointSize(ctx, ctx->Point.Size);
+ break;
+ case GL_POINT_DISTANCE_ATTENUATION:
+ break;
+ case GL_POINT_FADE_THRESHOLD_SIZE:
+ break;
+ default:
+ break;
+ }
+}
+
+/* =============================================================
+ * Line state
+ */
+static void r300LineWidth(GLcontext * ctx, GLfloat widthf)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+ widthf = CLAMP(widthf,
+ ctx->Const.MinPointSize,
+ ctx->Const.MaxPointSize);
+ R300_STATECHANGE(r300, lcntl);
+ r300->hw.lcntl.cmd[1] =
+ R300_LINE_CNT_HO | R300_LINE_CNT_VE | (int)(widthf * 6.0);
+}
+
+static void r300PolygonMode(GLcontext * ctx, GLenum face, GLenum mode)
+{
+ (void)face;
+ (void)mode;
+
+ r300UpdatePolygonMode(ctx);
+}
+
+/* =============================================================
+ * Stencil
+ */
+
+static int translate_stencil_op(int op)
+{
+ switch (op) {
+ case GL_KEEP:
+ return R300_ZS_KEEP;
+ case GL_ZERO:
+ return R300_ZS_ZERO;
+ case GL_REPLACE:
+ return R300_ZS_REPLACE;
+ case GL_INCR:
+ return R300_ZS_INCR;
+ case GL_DECR:
+ return R300_ZS_DECR;
+ case GL_INCR_WRAP_EXT:
+ return R300_ZS_INCR_WRAP;
+ case GL_DECR_WRAP_EXT:
+ return R300_ZS_DECR_WRAP;
+ case GL_INVERT:
+ return R300_ZS_INVERT;
+ default:
+ WARN_ONCE("Do not know how to translate stencil op");
+ return R300_ZS_KEEP;
+ }
+ return 0;
+}
+
+static void r300ShadeModel(GLcontext * ctx, GLenum mode)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+
+ R300_STATECHANGE(rmesa, shade);
+ rmesa->hw.shade.cmd[1] = 0x00000002;
+ R300_STATECHANGE(rmesa, shade2);
+ switch (mode) {
+ case GL_FLAT:
+ rmesa->hw.shade2.cmd[1] = R300_RE_SHADE_MODEL_FLAT;
+ break;
+ case GL_SMOOTH:
+ rmesa->hw.shade2.cmd[1] = R300_RE_SHADE_MODEL_SMOOTH;
+ break;
+ default:
+ return;
+ }
+ rmesa->hw.shade2.cmd[2] = 0x00000000;
+ rmesa->hw.shade2.cmd[3] = 0x00000000;
+}
+
+static void r300StencilFuncSeparate(GLcontext * ctx, GLenum face,
+ GLenum func, GLint ref, GLuint mask)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ GLuint refmask;
+ GLuint flag;
+ const unsigned back = ctx->Stencil._BackFace;
+
+ r300CatchStencilFallback(ctx);
+
+ refmask = ((ctx->Stencil.Ref[0] & 0xff) << R300_STENCILREF_SHIFT)
+ | ((ctx->Stencil.ValueMask[0] & 0xff) << R300_STENCILMASK_SHIFT);
+
+ R300_STATECHANGE(rmesa, zs);
+ rmesa->hw.zs.cmd[R300_ZS_CNTL_0] |= R300_STENCIL_FRONT_BACK;
+ rmesa->hw.zs.cmd[R300_ZS_CNTL_1] &= ~((R300_ZS_MASK <<
+ R300_S_FRONT_FUNC_SHIFT)
+ | (R300_ZS_MASK <<
+ R300_S_BACK_FUNC_SHIFT));
+
+ rmesa->hw.zs.cmd[R300_ZS_CNTL_2] &=
+ ~((R300_STENCILREF_MASK << R300_STENCILREF_SHIFT) |
+ (R300_STENCILREF_MASK << R300_STENCILMASK_SHIFT));
+
+ flag = translate_func(ctx->Stencil.Function[0]);
+ rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |=
+ (flag << R300_S_FRONT_FUNC_SHIFT);
+
+ flag = translate_func(ctx->Stencil.Function[back]);
+
+ rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |=
+ (flag << R300_S_BACK_FUNC_SHIFT);
+ rmesa->hw.zs.cmd[R300_ZS_CNTL_2] |= refmask;
+
+ if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+ rmesa->hw.zs.cmd[R300_ZS_CNTL_0] |= R500_STENCIL_REFMASK_FRONT_BACK;
+ R300_STATECHANGE(rmesa, zsb);
+ refmask = ((ctx->Stencil.Ref[back] & 0xff) << R300_STENCILREF_SHIFT)
+ | ((ctx->Stencil.ValueMask[back] & 0xff) << R300_STENCILMASK_SHIFT);
+
+ rmesa->hw.zsb.cmd[R300_ZSB_CNTL_0] &=
+ ~((R300_STENCILREF_MASK << R300_STENCILREF_SHIFT) |
+ (R300_STENCILREF_MASK << R300_STENCILMASK_SHIFT));
+ rmesa->hw.zsb.cmd[R300_ZSB_CNTL_0] |= refmask;
+ }
+}
+
+static void r300StencilMaskSeparate(GLcontext * ctx, GLenum face, GLuint mask)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ const unsigned back = ctx->Stencil._BackFace;
+
+ r300CatchStencilFallback(ctx);
+
+ R300_STATECHANGE(rmesa, zs);
+ rmesa->hw.zs.cmd[R300_ZS_CNTL_2] &=
+ ~(R300_STENCILREF_MASK <<
+ R300_STENCILWRITEMASK_SHIFT);
+ rmesa->hw.zs.cmd[R300_ZS_CNTL_2] |=
+ (ctx->Stencil.
+ WriteMask[0] & R300_STENCILREF_MASK) <<
+ R300_STENCILWRITEMASK_SHIFT;
+ if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+ R300_STATECHANGE(rmesa, zsb);
+ rmesa->hw.zsb.cmd[R300_ZSB_CNTL_0] |=
+ (ctx->Stencil.
+ WriteMask[back] & R300_STENCILREF_MASK) <<
+ R300_STENCILWRITEMASK_SHIFT;
+ }
+}
+
+static void r300StencilOpSeparate(GLcontext * ctx, GLenum face,
+ GLenum fail, GLenum zfail, GLenum zpass)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ const unsigned back = ctx->Stencil._BackFace;
+
+ r300CatchStencilFallback(ctx);
+
+ R300_STATECHANGE(rmesa, zs);
+ /* It is easier to mask what's left.. */
+ rmesa->hw.zs.cmd[R300_ZS_CNTL_1] &=
+ (R300_ZS_MASK << R300_Z_FUNC_SHIFT) |
+ (R300_ZS_MASK << R300_S_FRONT_FUNC_SHIFT) |
+ (R300_ZS_MASK << R300_S_BACK_FUNC_SHIFT);
+
+ rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |=
+ (translate_stencil_op(ctx->Stencil.FailFunc[0]) <<
+ R300_S_FRONT_SFAIL_OP_SHIFT)
+ | (translate_stencil_op(ctx->Stencil.ZFailFunc[0]) <<
+ R300_S_FRONT_ZFAIL_OP_SHIFT)
+ | (translate_stencil_op(ctx->Stencil.ZPassFunc[0]) <<
+ R300_S_FRONT_ZPASS_OP_SHIFT);
+
+ rmesa->hw.zs.cmd[R300_ZS_CNTL_1] |=
+ (translate_stencil_op(ctx->Stencil.FailFunc[back]) <<
+ R300_S_BACK_SFAIL_OP_SHIFT)
+ | (translate_stencil_op(ctx->Stencil.ZFailFunc[back]) <<
+ R300_S_BACK_ZFAIL_OP_SHIFT)
+ | (translate_stencil_op(ctx->Stencil.ZPassFunc[back]) <<
+ R300_S_BACK_ZPASS_OP_SHIFT);
+}
+
+/* =============================================================
+ * Window position and viewport transformation
+ */
+
+static void r300UpdateWindow(GLcontext * ctx)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon);
+ GLfloat xoffset = dPriv ? (GLfloat) dPriv->x : 0;
+ GLfloat yoffset = dPriv ? (GLfloat) dPriv->y + dPriv->h : 0;
+ const GLfloat *v = ctx->Viewport._WindowMap.m;
+ const GLfloat depthScale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
+ const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0);
+ GLfloat y_scale, y_bias;
+
+ if (render_to_fbo) {
+ y_scale = 1.0;
+ y_bias = 0;
+ } else {
+ y_scale = -1.0;
+ y_bias = yoffset;
+ }
+
+ GLfloat sx = v[MAT_SX];
+ GLfloat tx = v[MAT_TX] + xoffset;
+ GLfloat sy = v[MAT_SY] * y_scale;
+ GLfloat ty = (v[MAT_TY] * y_scale) + y_bias;
+ GLfloat sz = v[MAT_SZ] * depthScale;
+ GLfloat tz = v[MAT_TZ] * depthScale;
+
+ R300_STATECHANGE(rmesa, vpt);
+
+ rmesa->hw.vpt.cmd[R300_VPT_XSCALE] = r300PackFloat32(sx);
+ rmesa->hw.vpt.cmd[R300_VPT_XOFFSET] = r300PackFloat32(tx);
+ rmesa->hw.vpt.cmd[R300_VPT_YSCALE] = r300PackFloat32(sy);
+ rmesa->hw.vpt.cmd[R300_VPT_YOFFSET] = r300PackFloat32(ty);
+ rmesa->hw.vpt.cmd[R300_VPT_ZSCALE] = r300PackFloat32(sz);
+ rmesa->hw.vpt.cmd[R300_VPT_ZOFFSET] = r300PackFloat32(tz);
+}
+
+static void r300Viewport(GLcontext * ctx, GLint x, GLint y,
+ GLsizei width, GLsizei height)
+{
+ /* Don't pipeline viewport changes, conflict with window offset
+ * setting below. Could apply deltas to rescue pipelined viewport
+ * values, or keep the originals hanging around.
+ */
+ r300UpdateWindow(ctx);
+
+ radeon_viewport(ctx, x, y, width, height);
+}
+
+static void r300DepthRange(GLcontext * ctx, GLclampd nearval, GLclampd farval)
+{
+ r300UpdateWindow(ctx);
+}
+
+void r300UpdateViewportOffset(GLcontext * ctx)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ __DRIdrawable *dPriv = radeon_get_drawable(&rmesa->radeon);
+ GLfloat xoffset = (GLfloat) dPriv->x;
+ GLfloat yoffset = (GLfloat) dPriv->y + dPriv->h;
+ const GLfloat *v = ctx->Viewport._WindowMap.m;
+
+ GLfloat tx = v[MAT_TX] + xoffset;
+ GLfloat ty = (-v[MAT_TY]) + yoffset;
+
+ if (rmesa->hw.vpt.cmd[R300_VPT_XOFFSET] != r300PackFloat32(tx) ||
+ rmesa->hw.vpt.cmd[R300_VPT_YOFFSET] != r300PackFloat32(ty)) {
+ /* Note: this should also modify whatever data the context reset
+ * code uses...
+ */
+ R300_STATECHANGE(rmesa, vpt);
+ rmesa->hw.vpt.cmd[R300_VPT_XOFFSET] = r300PackFloat32(tx);
+ rmesa->hw.vpt.cmd[R300_VPT_YOFFSET] = r300PackFloat32(ty);
+
+ }
+
+ radeonUpdateScissor(ctx);
+}
+
+/**
+ * Update R300's own internal state parameters.
+ * For now just STATE_R300_WINDOW_DIMENSION
+ */
+static void r300UpdateStateParameters(GLcontext * ctx, GLuint new_state)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ struct gl_program_parameter_list *paramList;
+
+ if (!(new_state & (_NEW_BUFFERS | _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS)))
+ return;
+
+ if (!ctx->FragmentProgram._Current || !rmesa->selected_fp)
+ return;
+
+ paramList = ctx->FragmentProgram._Current->Base.Parameters;
+
+ if (!paramList)
+ return;
+
+ _mesa_load_state_parameters(ctx, paramList);
+}
+
+/* =============================================================
+ * Polygon state
+ */
+static void r300PolygonOffset(GLcontext * ctx, GLfloat factor, GLfloat units)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ GLfloat constant = units;
+
+ switch (ctx->Visual.depthBits) {
+ case 16:
+ constant *= 4.0;
+ break;
+ case 24:
+ constant *= 2.0;
+ break;
+ }
+
+ factor *= 12.0;
+
+/* fprintf(stderr, "%s f:%f u:%f\n", __FUNCTION__, factor, constant); */
+
+ R300_STATECHANGE(rmesa, zbs);
+ rmesa->hw.zbs.cmd[R300_ZBS_T_FACTOR] = r300PackFloat32(factor);
+ rmesa->hw.zbs.cmd[R300_ZBS_T_CONSTANT] = r300PackFloat32(constant);
+ rmesa->hw.zbs.cmd[R300_ZBS_W_FACTOR] = r300PackFloat32(factor);
+ rmesa->hw.zbs.cmd[R300_ZBS_W_CONSTANT] = r300PackFloat32(constant);
+}
+
+/* Routing and texture-related */
+
+/* r300 doesnt handle GL_CLAMP and GL_MIRROR_CLAMP_EXT correctly when filter is NEAREST.
+ * Since texwrap produces same results for GL_CLAMP and GL_CLAMP_TO_EDGE we use them instead.
+ * We need to recalculate wrap modes whenever filter mode is changed because someone might do:
+ * glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ * glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP);
+ * glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+ * Since r300 completely ignores R300_TX_CLAMP when either min or mag is nearest it cant handle
+ * combinations where only one of them is nearest.
+ */
+static unsigned long gen_fixed_filter(unsigned long f)
+{
+ unsigned long mag, min, needs_fixing = 0;
+ //return f;
+
+ /* We ignore MIRROR bit so we dont have to do everything twice */
+ if ((f & ((7 - 1) << R300_TX_WRAP_S_SHIFT)) ==
+ (R300_TX_CLAMP << R300_TX_WRAP_S_SHIFT)) {
+ needs_fixing |= 1;
+ }
+ if ((f & ((7 - 1) << R300_TX_WRAP_T_SHIFT)) ==
+ (R300_TX_CLAMP << R300_TX_WRAP_T_SHIFT)) {
+ needs_fixing |= 2;
+ }
+ if ((f & ((7 - 1) << R300_TX_WRAP_R_SHIFT)) ==
+ (R300_TX_CLAMP << R300_TX_WRAP_R_SHIFT)) {
+ needs_fixing |= 4;
+ }
+
+ if (!needs_fixing)
+ return f;
+
+ mag = f & R300_TX_MAG_FILTER_MASK;
+ min = f & (R300_TX_MIN_FILTER_MASK|R300_TX_MIN_FILTER_MIP_MASK);
+
+ /* TODO: Check for anisto filters too */
+ if ((mag != R300_TX_MAG_FILTER_NEAREST)
+ && (min != R300_TX_MIN_FILTER_NEAREST))
+ return f;
+
+ /* r300 cant handle these modes hence we force nearest to linear */
+ if ((mag == R300_TX_MAG_FILTER_NEAREST)
+ && (min != R300_TX_MIN_FILTER_NEAREST)) {
+ f &= ~R300_TX_MAG_FILTER_NEAREST;
+ f |= R300_TX_MAG_FILTER_LINEAR;
+ return f;
+ }
+
+ if ((min == R300_TX_MIN_FILTER_NEAREST)
+ && (mag != R300_TX_MAG_FILTER_NEAREST)) {
+ f &= ~R300_TX_MIN_FILTER_NEAREST;
+ f |= R300_TX_MIN_FILTER_LINEAR;
+ return f;
+ }
+
+ /* Both are nearest */
+ if (needs_fixing & 1) {
+ f &= ~((7 - 1) << R300_TX_WRAP_S_SHIFT);
+ f |= R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_S_SHIFT;
+ }
+ if (needs_fixing & 2) {
+ f &= ~((7 - 1) << R300_TX_WRAP_T_SHIFT);
+ f |= R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_T_SHIFT;
+ }
+ if (needs_fixing & 4) {
+ f &= ~((7 - 1) << R300_TX_WRAP_R_SHIFT);
+ f |= R300_TX_CLAMP_TO_EDGE << R300_TX_WRAP_R_SHIFT;
+ }
+ return f;
+}
+
+static void r300SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ int i;
+ struct r300_fragment_program_code *code = &r300->selected_fp->code.code.r300;
+
+ R300_STATECHANGE(r300, fpt);
+
+ for (i = 0; i < code->tex.length; i++) {
+ int unit;
+ int opcode;
+ unsigned long val;
+
+ unit = code->tex.inst[i] >> R300_TEX_ID_SHIFT;
+ unit &= 15;
+
+ val = code->tex.inst[i];
+ val &= ~R300_TEX_ID_MASK;
+
+ opcode =
+ (val & R300_TEX_INST_MASK) >> R300_TEX_INST_SHIFT;
+ if (opcode == R300_TEX_OP_KIL) {
+ r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val;
+ } else {
+ if (tmu_mappings[unit] >= 0) {
+ val |=
+ tmu_mappings[unit] <<
+ R300_TEX_ID_SHIFT;
+ r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val;
+ } else {
+ // We get here when the corresponding texture image is incomplete
+ // (e.g. incomplete mipmaps etc.)
+ r300->hw.fpt.cmd[R300_FPT_INSTR_0 + i] = val;
+ }
+ }
+ }
+
+ r300->hw.fpt.cmd[R300_FPT_CMD_0] =
+ cmdpacket0(r300->radeon.radeonScreen,
+ R300_US_TEX_INST_0, code->tex.length);
+}
+
+static void r500SetupFragmentShaderTextures(GLcontext *ctx, int *tmu_mappings)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ int i;
+ struct r500_fragment_program_code *code = &r300->selected_fp->code.code.r500;
+
+ /* find all the texture instructions and relocate the texture units */
+ for (i = 0; i < code->inst_end + 1; i++) {
+ if ((code->inst[i].inst0 & 0x3) == R500_INST_TYPE_TEX) {
+ uint32_t val;
+ int unit, opcode, new_unit;
+
+ val = code->inst[i].inst1;
+
+ unit = (val >> 16) & 0xf;
+
+ val &= ~(0xf << 16);
+
+ opcode = val & (0x7 << 22);
+ if (opcode == R500_TEX_INST_TEXKILL) {
+ new_unit = 0;
+ } else {
+ if (tmu_mappings[unit] >= 0) {
+ new_unit = tmu_mappings[unit];
+ } else {
+ new_unit = 0;
+ }
+ }
+ val |= R500_TEX_ID(new_unit);
+ code->inst[i].inst1 = val;
+ }
+ }
+}
+
+static GLuint translate_lod_bias(GLfloat bias)
+{
+ GLint b = (int)(bias*32);
+ if (b >= (1 << 9))
+ b = (1 << 9)-1;
+ else if (b < -(1 << 9))
+ b = -(1 << 9);
+ return (((GLuint)b) << R300_LOD_BIAS_SHIFT) & R300_LOD_BIAS_MASK;
+}
+
+
+static void r300SetupTextures(GLcontext * ctx)
+{
+ int i, mtu;
+ struct radeon_tex_obj *t;
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ int hw_tmu = 0;
+ int last_hw_tmu = -1; /* -1 translates into no setup costs for fields */
+ int tmu_mappings[R300_MAX_TEXTURE_UNITS] = { -1, };
+
+ R300_STATECHANGE(r300, txe);
+ R300_STATECHANGE(r300, tex.filter);
+ R300_STATECHANGE(r300, tex.filter_1);
+ R300_STATECHANGE(r300, tex.size);
+ R300_STATECHANGE(r300, tex.format);
+ R300_STATECHANGE(r300, tex.pitch);
+ R300_STATECHANGE(r300, tex.offset);
+ R300_STATECHANGE(r300, tex.chroma_key);
+ R300_STATECHANGE(r300, tex.border_color);
+
+ r300->hw.txe.cmd[R300_TXE_ENABLE] = 0x0;
+
+ mtu = r300->radeon.glCtx->Const.MaxTextureUnits;
+ if (RADEON_DEBUG & RADEON_STATE)
+ fprintf(stderr, "mtu=%d\n", mtu);
+
+ if (mtu > R300_MAX_TEXTURE_UNITS) {
+ fprintf(stderr,
+ "Aiiee ! mtu=%d is greater than R300_MAX_TEXTURE_UNITS=%d\n",
+ mtu, R300_MAX_TEXTURE_UNITS);
+ exit(-1);
+ }
+
+ /* We cannot let disabled tmu offsets pass DRM */
+ for (i = 0; i < mtu; i++) {
+ if (ctx->Texture.Unit[i]._ReallyEnabled) {
+ tmu_mappings[i] = hw_tmu;
+
+ t = radeon_tex_obj(ctx->Texture.Unit[i]._Current);
+ if (!t)
+ continue;
+
+ if ((t->pp_txformat & 0xffffff00) == 0xffffff00) {
+ WARN_ONCE
+ ("unknown texture format (entry %x) encountered. Help me !\n",
+ t->pp_txformat & 0xff);
+ }
+
+ if (RADEON_DEBUG & RADEON_STATE)
+ fprintf(stderr,
+ "Activating texture unit %d\n", i);
+
+ r300->hw.txe.cmd[R300_TXE_ENABLE] |= (1 << hw_tmu);
+
+ r300->hw.tex.filter.cmd[R300_TEX_VALUE_0 +
+ hw_tmu] =
+ gen_fixed_filter(t->pp_txfilter) | (hw_tmu << 28);
+ /* Note: There is a LOD bias per texture unit and a LOD bias
+ * per texture object. We add them here to get the correct behaviour.
+ * (The per-texture object LOD bias was introduced in OpenGL 1.4
+ * and is not present in the EXT_texture_object extension).
+ */
+ r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0 + hw_tmu] =
+ t->pp_txfilter_1 |
+ translate_lod_bias(ctx->Texture.Unit[i].LodBias + t->base.LodBias);
+ r300->hw.tex.size.cmd[R300_TEX_VALUE_0 + hw_tmu] =
+ t->pp_txsize;
+ r300->hw.tex.format.cmd[R300_TEX_VALUE_0 +
+ hw_tmu] = t->pp_txformat;
+ r300->hw.tex.pitch.cmd[R300_TEX_VALUE_0 + hw_tmu] =
+ t->pp_txpitch;
+ r300->hw.textures[hw_tmu] = t;
+
+ if (t->tile_bits & R300_TXO_MACRO_TILE) {
+ WARN_ONCE("macro tiling enabled!\n");
+ }
+
+ if (t->tile_bits & R300_TXO_MICRO_TILE) {
+ WARN_ONCE("micro tiling enabled!\n");
+ }
+
+ r300->hw.tex.chroma_key.cmd[R300_TEX_VALUE_0 +
+ hw_tmu] = 0x0;
+ r300->hw.tex.border_color.cmd[R300_TEX_VALUE_0 +
+ hw_tmu] =
+ t->pp_border_color;
+
+ last_hw_tmu = hw_tmu;
+
+ hw_tmu++;
+ }
+ }
+
+ /* R3xx and R4xx chips require that the texture unit corresponding to
+ * KIL instructions is really enabled.
+ *
+ * We do some fakery here and in the state atom emit logic to enable
+ * the texture without tripping up the CS checker in the kernel.
+ */
+ if (r300->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV515) {
+ if (ctx->FragmentProgram._Current->UsesKill && last_hw_tmu < 0) {
+ last_hw_tmu++;
+
+ r300->hw.txe.cmd[R300_TXE_ENABLE] |= 1;
+
+ r300->hw.tex.border_color.cmd[R300_TEX_VALUE_0] = 0;
+ r300->hw.tex.chroma_key.cmd[R300_TEX_VALUE_0] = 0;
+ r300->hw.tex.filter.cmd[R300_TEX_VALUE_0] = 0;
+ r300->hw.tex.filter_1.cmd[R300_TEX_VALUE_0] = 0;
+ r300->hw.tex.size.cmd[R300_TEX_VALUE_0] = 0; /* 1x1 texture */
+ r300->hw.tex.format.cmd[R300_TEX_VALUE_0] = 0; /* A8 format */
+ r300->hw.tex.pitch.cmd[R300_TEX_VALUE_0] = 0;
+ }
+ }
+
+ r300->hw.tex.filter.cmd[R300_TEX_CMD_0] =
+ cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER0_0, last_hw_tmu + 1);
+ r300->hw.tex.filter_1.cmd[R300_TEX_CMD_0] =
+ cmdpacket0(r300->radeon.radeonScreen, R300_TX_FILTER1_0, last_hw_tmu + 1);
+ r300->hw.tex.size.cmd[R300_TEX_CMD_0] =
+ cmdpacket0(r300->radeon.radeonScreen, R300_TX_SIZE_0, last_hw_tmu + 1);
+ r300->hw.tex.format.cmd[R300_TEX_CMD_0] =
+ cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT_0, last_hw_tmu + 1);
+ r300->hw.tex.pitch.cmd[R300_TEX_CMD_0] =
+ cmdpacket0(r300->radeon.radeonScreen, R300_TX_FORMAT2_0, last_hw_tmu + 1);
+ r300->hw.tex.offset.cmd[R300_TEX_CMD_0] =
+ cmdpacket0(r300->radeon.radeonScreen, R300_TX_OFFSET_0, last_hw_tmu + 1);
+ r300->hw.tex.chroma_key.cmd[R300_TEX_CMD_0] =
+ cmdpacket0(r300->radeon.radeonScreen, R300_TX_CHROMA_KEY_0, last_hw_tmu + 1);
+ r300->hw.tex.border_color.cmd[R300_TEX_CMD_0] =
+ cmdpacket0(r300->radeon.radeonScreen, R300_TX_BORDER_COLOR_0, last_hw_tmu + 1);
+
+ r300->vtbl.SetupFragmentShaderTextures(ctx, tmu_mappings);
+
+ if (RADEON_DEBUG & RADEON_STATE)
+ fprintf(stderr, "TX_ENABLE: %08x last_hw_tmu=%d\n",
+ r300->hw.txe.cmd[R300_TXE_ENABLE], last_hw_tmu);
+}
+
+union r300_outputs_written {
+ GLuint vp_outputs; /* hw_tcl_on */
+ DECLARE_RENDERINPUTS(index_bitset); /* !hw_tcl_on */
+};
+
+#define R300_OUTPUTS_WRITTEN_TEST(ow, vp_result, tnl_attrib) \
+ ((hw_tcl_on) ? (ow).vp_outputs & (1 << (vp_result)) : \
+ RENDERINPUTS_TEST( (ow.index_bitset), (tnl_attrib) ))
+
+static void r300SetupRSUnit(GLcontext * ctx)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ union r300_outputs_written OutputsWritten;
+ GLuint InputsRead;
+ int fp_reg, high_rr;
+ int col_ip, tex_ip;
+ int rs_tex_count = 0;
+ int i, col_fmt, hw_tcl_on;
+
+ hw_tcl_on = r300->options.hw_tcl_enabled;
+
+ if (hw_tcl_on)
+ OutputsWritten.vp_outputs = r300->selected_vp->code.OutputsWritten;
+ else
+ RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset);
+
+ InputsRead = r300->selected_fp->InputsRead;
+
+ R300_STATECHANGE(r300, ri);
+ R300_STATECHANGE(r300, rc);
+ R300_STATECHANGE(r300, rr);
+
+ fp_reg = col_ip = tex_ip = col_fmt = 0;
+
+ r300->hw.rc.cmd[1] = 0;
+ r300->hw.rc.cmd[2] = 0;
+ for (i=0; i<R300_RR_CMDSIZE-1; ++i)
+ r300->hw.rr.cmd[R300_RR_INST_0 + i] = 0;
+
+ for (i=0; i<R300_RI_CMDSIZE-1; ++i)
+ r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = 0;
+
+
+ if (InputsRead & FRAG_BIT_COL0) {
+ if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) {
+ r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R300_RS_COL_PTR(col_ip) | R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
+ r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R300_RS_INST_COL_ID(col_ip) | R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(fp_reg);
+ InputsRead &= ~FRAG_BIT_COL0;
+ ++col_ip;
+ ++fp_reg;
+ } else {
+ WARN_ONCE("fragprog wants col0, vp doesn't provide it\n");
+ }
+ }
+
+ if (InputsRead & FRAG_BIT_COL1) {
+ if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) {
+ r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R300_RS_COL_PTR(col_ip) | R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
+ r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R300_RS_INST_COL_ID(col_ip) | R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(fp_reg);
+ InputsRead &= ~FRAG_BIT_COL1;
+ ++col_ip;
+ ++fp_reg;
+ } else {
+ WARN_ONCE("fragprog wants col1, vp doesn't provide it\n");
+ }
+ }
+
+ /* We always route 4 texcoord components */
+ for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
+ if (! ( InputsRead & FRAG_BIT_TEX(i) ) )
+ continue;
+
+ if (!R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) {
+ WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i);
+ continue;
+ }
+
+ r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) | R300_RS_TEX_PTR(rs_tex_count);
+ r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg);
+ InputsRead &= ~(FRAG_BIT_TEX0 << i);
+ rs_tex_count += 4;
+ ++tex_ip;
+ ++fp_reg;
+ }
+
+ /* Setup default color if no color or tex was set */
+ if (rs_tex_count == 0 && col_ip == 0) {
+ r300->hw.rr.cmd[R300_RR_INST_0] = R300_RS_INST_COL_ID(0) | R300_RS_INST_COL_ADDR(0);
+ r300->hw.ri.cmd[R300_RI_INTERP_0] = R300_RS_COL_PTR(0) | R300_RS_COL_FMT(R300_RS_COL_FMT_0001);
+ ++col_ip;
+ }
+
+ high_rr = (col_ip > tex_ip) ? col_ip : tex_ip;
+ r300->hw.rc.cmd[1] |= (rs_tex_count << R300_IT_COUNT_SHIFT) | (col_ip << R300_IC_COUNT_SHIFT) | R300_HIRES_EN;
+ r300->hw.rc.cmd[2] |= high_rr - 1;
+
+ r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_INST_0, high_rr);
+ r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R300_RS_IP_0, high_rr);
+
+ if (InputsRead)
+ WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead);
+}
+
+static void r500SetupRSUnit(GLcontext * ctx)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ union r300_outputs_written OutputsWritten;
+ GLuint InputsRead;
+ int fp_reg, high_rr;
+ int col_ip, tex_ip;
+ int rs_tex_count = 0;
+ int i, col_fmt, hw_tcl_on;
+
+ hw_tcl_on = r300->options.hw_tcl_enabled;
+
+ if (hw_tcl_on)
+ OutputsWritten.vp_outputs = r300->selected_vp->code.OutputsWritten;
+ else
+ RENDERINPUTS_COPY(OutputsWritten.index_bitset, r300->render_inputs_bitset);
+
+ InputsRead = r300->selected_fp->InputsRead;
+
+ R300_STATECHANGE(r300, ri);
+ R300_STATECHANGE(r300, rc);
+ R300_STATECHANGE(r300, rr);
+
+ fp_reg = col_ip = tex_ip = col_fmt = 0;
+
+ r300->hw.rc.cmd[1] = 0;
+ r300->hw.rc.cmd[2] = 0;
+ for (i=0; i<R300_RR_CMDSIZE-1; ++i)
+ r300->hw.rr.cmd[R300_RR_INST_0 + i] = 0;
+
+ for (i=0; i<R500_RI_CMDSIZE-1; ++i)
+ r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = 0;
+
+
+ if (InputsRead & FRAG_BIT_COL0) {
+ if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) {
+ r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R500_RS_COL_PTR(col_ip) | R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
+ r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R500_RS_INST_COL_ID(col_ip) | R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(fp_reg);
+ InputsRead &= ~FRAG_BIT_COL0;
+ ++col_ip;
+ ++fp_reg;
+ } else {
+ WARN_ONCE("fragprog wants col0, vp doesn't provide it\n");
+ }
+ }
+
+ if (InputsRead & FRAG_BIT_COL1) {
+ if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) {
+ r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R500_RS_COL_PTR(col_ip) | R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
+ r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R500_RS_INST_COL_ID(col_ip) | R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(fp_reg);
+ InputsRead &= ~FRAG_BIT_COL1;
+ ++col_ip;
+ ++fp_reg;
+ } else {
+ WARN_ONCE("fragprog wants col1, vp doesn't provide it\n");
+ }
+ }
+
+ /* We always route 4 texcoord components */
+ for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
+ if (! ( InputsRead & FRAG_BIT_TEX(i) ) )
+ continue;
+
+ if (!R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) {
+ WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i);
+ continue;
+ }
+
+ r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= ((rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT) |
+ ((rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT) |
+ ((rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT) |
+ ((rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT);
+
+ r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg);
+ InputsRead &= ~(FRAG_BIT_TEX0 << i);
+ rs_tex_count += 4;
+ ++tex_ip;
+ ++fp_reg;
+ }
+
+ /* Setup default color if no color or tex was set */
+ if (rs_tex_count == 0 && col_ip == 0) {
+ r300->hw.rr.cmd[R300_RR_INST_0] = R500_RS_INST_COL_ID(0) | R500_RS_INST_COL_ADDR(0);
+ r300->hw.ri.cmd[R300_RI_INTERP_0] = R500_RS_COL_PTR(0) | R500_RS_COL_FMT(R300_RS_COL_FMT_0001);
+ ++col_ip;
+ }
+
+ high_rr = (col_ip > tex_ip) ? col_ip : tex_ip;
+ r300->hw.rc.cmd[1] = (rs_tex_count << R300_IT_COUNT_SHIFT) | (col_ip << R300_IC_COUNT_SHIFT) | R300_HIRES_EN;
+ r300->hw.rc.cmd[2] = 0xC0 | (high_rr - 1);
+
+ r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_INST_0, high_rr);
+ r300->hw.ri.cmd[R300_RI_CMD_0] = cmdpacket0(r300->radeon.radeonScreen, R500_RS_IP_0, high_rr);
+
+ if (InputsRead)
+ WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead);
+}
+
+#define MIN3(a, b, c) ((a) < (b) ? MIN2(a, c) : MIN2(b, c))
+
+void r300VapCntl(r300ContextPtr rmesa, GLuint input_count,
+ GLuint output_count, GLuint temp_count)
+{
+ int vtx_mem_size;
+ int pvs_num_slots;
+ int pvs_num_cntrls;
+
+ /* Flush PVS engine before changing PVS_NUM_SLOTS, PVS_NUM_CNTRLS.
+ * See r500 docs 6.5.2 - done in emit */
+
+ /* avoid division by zero */
+ if (input_count == 0) input_count = 1;
+ if (output_count == 0) output_count = 1;
+ if (temp_count == 0) temp_count = 1;
+
+ if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
+ vtx_mem_size = 128;
+ else
+ vtx_mem_size = 72;
+
+ pvs_num_slots = MIN3(10, vtx_mem_size/input_count, vtx_mem_size/output_count);
+ pvs_num_cntrls = MIN2(6, vtx_mem_size/temp_count);
+
+ R300_STATECHANGE(rmesa, vap_cntl);
+ if (rmesa->options.hw_tcl_enabled) {
+ rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] =
+ (pvs_num_slots << R300_PVS_NUM_SLOTS_SHIFT) |
+ (pvs_num_cntrls << R300_PVS_NUM_CNTLRS_SHIFT) |
+ (12 << R300_VF_MAX_VTX_NUM_SHIFT);
+ if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
+ rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= R500_TCL_STATE_OPTIMIZATION;
+ } else
+ /* not sure about non-tcl */
+ rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] = ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
+ (5 << R300_PVS_NUM_CNTLRS_SHIFT) |
+ (5 << R300_VF_MAX_VTX_NUM_SHIFT));
+
+ if ((rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R300) ||
+ (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R350))
+ rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (4 << R300_PVS_NUM_FPUS_SHIFT);
+ else if (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV530)
+ rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (5 << R300_PVS_NUM_FPUS_SHIFT);
+ else if ((rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV410) ||
+ (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R420))
+ rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (6 << R300_PVS_NUM_FPUS_SHIFT);
+ else if ((rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R520) ||
+ (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_R580) ||
+ (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV560) ||
+ (rmesa->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV570))
+ rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (8 << R300_PVS_NUM_FPUS_SHIFT);
+ else
+ rmesa->hw.vap_cntl.cmd[R300_VAP_CNTL_INSTR] |= (2 << R300_PVS_NUM_FPUS_SHIFT);
+
+}
+
+/**
+ * Enable/Disable states.
+ *
+ * \note Mesa already filters redundant calls to this function.
+ */
+static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ if (RADEON_DEBUG & RADEON_STATE)
+ fprintf(stderr, "%s( %s = %s )\n", __FUNCTION__,
+ _mesa_lookup_enum_by_nr(cap),
+ state ? "GL_TRUE" : "GL_FALSE");
+
+ switch (cap) {
+ case GL_ALPHA_TEST:
+ r300SetAlphaState(ctx);
+ break;
+ case GL_COLOR_LOGIC_OP:
+ r300SetLogicOpState(ctx);
+ /* fall-through, because logic op overrides blending */
+ case GL_BLEND:
+ r300SetBlendState(ctx);
+ break;
+ case GL_CLIP_PLANE0:
+ case GL_CLIP_PLANE1:
+ case GL_CLIP_PLANE2:
+ case GL_CLIP_PLANE3:
+ case GL_CLIP_PLANE4:
+ case GL_CLIP_PLANE5:
+ r300SetClipPlaneState(ctx, cap, state);
+ break;
+ case GL_CULL_FACE:
+ r300UpdateCulling(ctx);
+ break;
+ case GL_DEPTH_TEST:
+ r300SetDepthState(ctx);
+ break;
+ case GL_LINE_SMOOTH:
+ if (rmesa->options.conformance_mode)
+ r300SwitchFallback(ctx, R300_FALLBACK_LINE_SMOOTH, ctx->Line.SmoothFlag);
+ break;
+ case GL_LINE_STIPPLE:
+ if (rmesa->options.conformance_mode)
+ r300SwitchFallback(ctx, R300_FALLBACK_LINE_STIPPLE, ctx->Line.StippleFlag);
+ break;
+ case GL_POINT_SMOOTH:
+ if (rmesa->options.conformance_mode)
+ r300SwitchFallback(ctx, R300_FALLBACK_POINT_SMOOTH, ctx->Point.SmoothFlag);
+ break;
+ case GL_POLYGON_SMOOTH:
+ if (rmesa->options.conformance_mode)
+ r300SwitchFallback(ctx, R300_FALLBACK_POLYGON_SMOOTH, ctx->Polygon.SmoothFlag);
+ break;
+ case GL_POLYGON_STIPPLE:
+ if (rmesa->options.conformance_mode)
+ r300SwitchFallback(ctx, R300_FALLBACK_POLYGON_STIPPLE, ctx->Polygon.StippleFlag);
+ break;
+ case GL_POLYGON_OFFSET_POINT:
+ case GL_POLYGON_OFFSET_LINE:
+ case GL_POLYGON_OFFSET_FILL:
+ r300SetPolygonOffsetState(ctx, state);
+ break;
+ case GL_SCISSOR_TEST:
+ radeon_firevertices(&rmesa->radeon);
+ rmesa->radeon.state.scissor.enabled = state;
+ radeonUpdateScissor( ctx );
+ break;
+ case GL_STENCIL_TEST:
+ r300SetStencilState(ctx, state);
+ break;
+ default:
+ break;
+ }
+}
+
+/**
+ * Completely recalculates hardware state based on the Mesa state.
+ */
+static void r300ResetHwState(r300ContextPtr r300)
+{
+ GLcontext *ctx = r300->radeon.glCtx;
+ int has_tcl;
+
+ has_tcl = r300->options.hw_tcl_enabled;
+
+ if (RADEON_DEBUG & RADEON_STATE)
+ fprintf(stderr, "%s\n", __FUNCTION__);
+
+ r300ColorMask(ctx,
+ ctx->Color.ColorMask[0][RCOMP],
+ ctx->Color.ColorMask[0][GCOMP],
+ ctx->Color.ColorMask[0][BCOMP],
+ ctx->Color.ColorMask[0][ACOMP]);
+
+ r300Enable(ctx, GL_DEPTH_TEST, ctx->Depth.Test);
+ r300DepthMask(ctx, ctx->Depth.Mask);
+ r300DepthFunc(ctx, ctx->Depth.Func);
+
+ /* stencil */
+ r300Enable(ctx, GL_STENCIL_TEST, ctx->Stencil._Enabled);
+ r300StencilMaskSeparate(ctx, 0, ctx->Stencil.WriteMask[0]);
+ r300StencilFuncSeparate(ctx, 0, ctx->Stencil.Function[0],
+ ctx->Stencil.Ref[0], ctx->Stencil.ValueMask[0]);
+ r300StencilOpSeparate(ctx, 0, ctx->Stencil.FailFunc[0],
+ ctx->Stencil.ZFailFunc[0],
+ ctx->Stencil.ZPassFunc[0]);
+
+ r300UpdateCulling(ctx);
+
+ r300SetBlendState(ctx);
+ r300SetLogicOpState(ctx);
+
+ r300AlphaFunc(ctx, ctx->Color.AlphaFunc, ctx->Color.AlphaRef);
+ r300Enable(ctx, GL_ALPHA_TEST, ctx->Color.AlphaEnabled);
+
+ r300->hw.vte.cmd[1] = R300_VPORT_X_SCALE_ENA
+ | R300_VPORT_X_OFFSET_ENA
+ | R300_VPORT_Y_SCALE_ENA
+ | R300_VPORT_Y_OFFSET_ENA
+ | R300_VPORT_Z_SCALE_ENA
+ | R300_VPORT_Z_OFFSET_ENA | R300_VTX_W0_FMT;
+ r300->hw.vte.cmd[2] = 0x00000008;
+
+ r300->hw.vap_vf_max_vtx_indx.cmd[1] = 0x00FFFFFF;
+ r300->hw.vap_vf_max_vtx_indx.cmd[2] = 0x00000000;
+
+#ifdef MESA_LITTLE_ENDIAN
+ r300->hw.vap_cntl_status.cmd[1] = R300_VC_NO_SWAP;
+#else
+ r300->hw.vap_cntl_status.cmd[1] = R300_VC_32BIT_SWAP;
+#endif
+
+ /* disable VAP/TCL on non-TCL capable chips */
+ if (!has_tcl)
+ r300->hw.vap_cntl_status.cmd[1] |= R300_VAP_TCL_BYPASS;
+
+ r300->hw.vap_psc_sgn_norm_cntl.cmd[1] = 0xAAAAAAAA;
+
+ /* XXX: Other families? */
+ if (has_tcl) {
+ r300->hw.vap_clip_cntl.cmd[1] = R300_PS_UCP_MODE_DIST_COP;
+
+ r300->hw.vap_clip.cmd[1] = r300PackFloat32(1.0); /* X */
+ r300->hw.vap_clip.cmd[2] = r300PackFloat32(1.0); /* X */
+ r300->hw.vap_clip.cmd[3] = r300PackFloat32(1.0); /* Y */
+ r300->hw.vap_clip.cmd[4] = r300PackFloat32(1.0); /* Y */
+
+ switch (r300->radeon.radeonScreen->chip_family) {
+ case CHIP_FAMILY_R300:
+ r300->hw.vap_pvs_vtx_timeout_reg.cmd[1] = R300_2288_R300;
+ break;
+ default:
+ r300->hw.vap_pvs_vtx_timeout_reg.cmd[1] = R300_2288_RV350;
+ break;
+ }
+ }
+
+ r300->hw.gb_enable.cmd[1] = R300_GB_POINT_STUFF_ENABLE
+ | R300_GB_LINE_STUFF_ENABLE
+ | R300_GB_TRIANGLE_STUFF_ENABLE;
+
+ r300->hw.gb_misc.cmd[R300_GB_MISC_MSPOS_0] = 0x66666666;
+ r300->hw.gb_misc.cmd[R300_GB_MISC_MSPOS_1] = 0x06666666;
+
+ r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] =
+ R300_GB_TILE_ENABLE | R300_GB_TILE_SIZE_16 /*| R300_GB_SUBPIXEL_1_16*/;
+ switch (r300->radeon.radeonScreen->num_gb_pipes) {
+ case 1:
+ default:
+ r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |=
+ R300_GB_TILE_PIPE_COUNT_RV300;
+ break;
+ case 2:
+ r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |=
+ R300_GB_TILE_PIPE_COUNT_R300;
+ break;
+ case 3:
+ r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |=
+ R300_GB_TILE_PIPE_COUNT_R420_3P;
+ break;
+ case 4:
+ r300->hw.gb_misc.cmd[R300_GB_MISC_TILE_CONFIG] |=
+ R300_GB_TILE_PIPE_COUNT_R420;
+ break;
+ }
+
+ /* XXX: Enable anti-aliasing? */
+ r300->hw.gb_misc2.cmd[R300_GB_MISC2_AA_CONFIG] = GB_AA_CONFIG_AA_DISABLE;
+ r300->hw.gb_misc2.cmd[R300_GB_MISC2_SELECT] = 0;
+
+ r300->hw.ga_point_s0.cmd[1] = r300PackFloat32(0.0);
+ r300->hw.ga_point_s0.cmd[2] = r300PackFloat32(0.0);
+ r300->hw.ga_point_s0.cmd[3] = r300PackFloat32(1.0);
+ r300->hw.ga_point_s0.cmd[4] = r300PackFloat32(1.0);
+
+ r300->hw.ga_triangle_stipple.cmd[1] = 0x00050005;
+
+ r300PointSize(ctx, 1.0);
+
+ r300->hw.ga_point_minmax.cmd[1] = 0x18000006;
+ r300->hw.ga_point_minmax.cmd[2] = 0x00020006;
+ r300->hw.ga_point_minmax.cmd[3] = r300PackFloat32(1.0 / 192.0);
+
+ r300LineWidth(ctx, 1.0);
+
+ r300->hw.ga_line_stipple.cmd[1] = 0;
+ r300->hw.ga_line_stipple.cmd[2] = r300PackFloat32(0.0);
+ r300->hw.ga_line_stipple.cmd[3] = r300PackFloat32(1.0);
+
+ r300ShadeModel(ctx, ctx->Light.ShadeModel);
+
+ r300PolygonMode(ctx, GL_FRONT, ctx->Polygon.FrontMode);
+ r300PolygonMode(ctx, GL_BACK, ctx->Polygon.BackMode);
+ r300->hw.zbias_cntl.cmd[1] = 0x00000000;
+
+ r300PolygonOffset(ctx, ctx->Polygon.OffsetFactor,
+ ctx->Polygon.OffsetUnits);
+ r300Enable(ctx, GL_POLYGON_OFFSET_POINT, ctx->Polygon.OffsetPoint);
+ r300Enable(ctx, GL_POLYGON_OFFSET_LINE, ctx->Polygon.OffsetLine);
+ r300Enable(ctx, GL_POLYGON_OFFSET_FILL, ctx->Polygon.OffsetFill);
+
+ r300->hw.su_depth_scale.cmd[1] = 0x4B7FFFFF;
+ r300->hw.su_depth_scale.cmd[2] = 0x00000000;
+
+ r300->hw.sc_hyperz.cmd[1] = 0x0000001C;
+ r300->hw.sc_hyperz.cmd[2] = 0x2DA49525;
+
+ r300->hw.sc_screendoor.cmd[1] = 0x00FFFFFF;
+
+ r300->hw.us_out_fmt.cmd[1] = R500_OUT_FMT_C4_8 |
+ R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A;
+ r300->hw.us_out_fmt.cmd[2] = R500_OUT_FMT_UNUSED |
+ R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A;
+ r300->hw.us_out_fmt.cmd[3] = R500_OUT_FMT_UNUSED |
+ R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A;
+ r300->hw.us_out_fmt.cmd[4] = R500_OUT_FMT_UNUSED |
+ R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A;
+ r300->hw.us_out_fmt.cmd[5] = R300_W_FMT_W0 | R300_W_SRC_US;
+
+ /* disable fog unit */
+ r300->hw.fogs.cmd[R300_FOGS_STATE] = 0;
+ r300->hw.fg_depth_src.cmd[1] = R300_FG_DEPTH_SRC_SCAN;
+
+ r300->hw.rb3d_cctl.cmd[1] = 0;
+
+ r300BlendColor(ctx, ctx->Color.BlendColor);
+
+ r300->hw.rb3d_dither_ctl.cmd[1] = 0;
+ r300->hw.rb3d_dither_ctl.cmd[2] = 0;
+ r300->hw.rb3d_dither_ctl.cmd[3] = 0;
+ r300->hw.rb3d_dither_ctl.cmd[4] = 0;
+ r300->hw.rb3d_dither_ctl.cmd[5] = 0;
+ r300->hw.rb3d_dither_ctl.cmd[6] = 0;
+ r300->hw.rb3d_dither_ctl.cmd[7] = 0;
+ r300->hw.rb3d_dither_ctl.cmd[8] = 0;
+ r300->hw.rb3d_dither_ctl.cmd[9] = 0;
+
+ r300->hw.rb3d_aaresolve_ctl.cmd[1] = 0;
+
+ r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[1] = 0x00000000;
+ r300->hw.rb3d_discard_src_pixel_lte_threshold.cmd[2] = 0xffffffff;
+
+ r300->hw.zb_depthclearvalue.cmd[1] = 0;
+
+ r300->hw.zstencil_format.cmd[2] = R300_ZTOP_DISABLE;
+ r300->hw.zstencil_format.cmd[3] = 0x00000003;
+ r300->hw.zstencil_format.cmd[4] = 0x00000000;
+ r300SetEarlyZState(ctx);
+
+ r300->hw.zb_zmask.cmd[1] = 0;
+ r300->hw.zb_zmask.cmd[2] = 0;
+
+ r300->hw.zb_hiz_offset.cmd[1] = 0;
+
+ r300->hw.zb_hiz_pitch.cmd[1] = 0;
+
+ r300VapCntl(r300, 0, 0, 0);
+ if (has_tcl) {
+ r300->hw.vps.cmd[R300_VPS_ZERO_0] = 0;
+ r300->hw.vps.cmd[R300_VPS_ZERO_1] = 0;
+ r300->hw.vps.cmd[R300_VPS_POINTSIZE] = r300PackFloat32(1.0);
+ r300->hw.vps.cmd[R300_VPS_ZERO_3] = 0;
+ }
+
+ r300->radeon.hw.all_dirty = GL_TRUE;
+}
+
+void r300UpdateShaders(r300ContextPtr rmesa)
+{
+ GLcontext *ctx = rmesa->radeon.glCtx;
+
+ /* should only happenen once, just after context is created */
+ /* TODO: shouldn't we fallback to sw here? */
+ if (!ctx->FragmentProgram._Current) {
+ fprintf(stderr, "No ctx->FragmentProgram._Current!!\n");
+ return;
+ }
+
+ {
+ struct r300_fragment_program *fp;
+
+ fp = r300SelectAndTranslateFragmentShader(ctx);
+
+ r300SwitchFallback(ctx, R300_FALLBACK_FRAGMENT_PROGRAM, fp->error);
+ }
+
+ if (rmesa->options.hw_tcl_enabled) {
+ struct r300_vertex_program *vp;
+
+ vp = r300SelectAndTranslateVertexShader(ctx);
+
+ r300SwitchFallback(ctx, R300_FALLBACK_VERTEX_PROGRAM, vp->error);
+ }
+
+ r300UpdateStateParameters(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
+ rmesa->radeon.NewGLState = 0;
+}
+
+static const GLfloat *get_fragmentprogram_constant(GLcontext *ctx, GLuint index, GLfloat * buffer)
+{
+ static const GLfloat dummy[4] = { 0, 0, 0, 0 };
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ struct rc_constant * rcc = &rmesa->selected_fp->code.constants.Constants[index];
+
+ switch(rcc->Type) {
+ case RC_CONSTANT_EXTERNAL:
+ return ctx->FragmentProgram._Current->Base.Parameters->ParameterValues[rcc->u.External];
+ case RC_CONSTANT_IMMEDIATE:
+ return rcc->u.Immediate;
+ case RC_CONSTANT_STATE:
+ switch(rcc->u.State[0]) {
+ case RC_STATE_SHADOW_AMBIENT: {
+ const int unit = (int) rcc->u.State[1];
+ const struct gl_texture_object *texObj = ctx->Texture.Unit[unit]._Current;
+ if (texObj) {
+ buffer[0] =
+ buffer[1] =
+ buffer[2] =
+ buffer[3] = texObj->CompareFailValue;
+ }
+ return buffer;
+ }
+
+ case RC_STATE_R300_WINDOW_DIMENSION: {
+ __DRIdrawable * drawable = radeon_get_drawable(&rmesa->radeon);
+ buffer[0] = drawable->w * 0.5f; /* width*0.5 */
+ buffer[1] = drawable->h * 0.5f; /* height*0.5 */
+ buffer[2] = 0.5F; /* for moving range [-1 1] -> [0 1] */
+ buffer[3] = 1.0F; /* not used */
+ return buffer;
+ }
+
+ case RC_STATE_R300_TEXRECT_FACTOR: {
+ struct gl_texture_object *t =
+ ctx->Texture.Unit[rcc->u.State[1]].CurrentTex[TEXTURE_RECT_INDEX];
+
+ if (t && t->Image[0][t->BaseLevel]) {
+ struct gl_texture_image *image =
+ t->Image[0][t->BaseLevel];
+ buffer[0] = 1.0 / image->Width2;
+ buffer[1] = 1.0 / image->Height2;
+ } else {
+ buffer[0] = 1.0;
+ buffer[1] = 1.0;
+ }
+ buffer[2] = 1.0;
+ buffer[3] = 1.0;
+ return buffer;
+ }
+ }
+ }
+
+ return dummy;
+}
+
+
+static void r300SetupPixelShader(GLcontext *ctx)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ struct r300_fragment_program *fp = rmesa->selected_fp;
+ struct r300_fragment_program_code *code;
+ int i;
+
+ code = &fp->code.code.r300;
+
+ R300_STATECHANGE(rmesa, fpi[0]);
+ R300_STATECHANGE(rmesa, fpi[1]);
+ R300_STATECHANGE(rmesa, fpi[2]);
+ R300_STATECHANGE(rmesa, fpi[3]);
+ rmesa->hw.fpi[0].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_RGB_INST_0, code->alu.length);
+ rmesa->hw.fpi[1].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_RGB_ADDR_0, code->alu.length);
+ rmesa->hw.fpi[2].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_ALPHA_INST_0, code->alu.length);
+ rmesa->hw.fpi[3].cmd[R300_FPI_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_US_ALU_ALPHA_ADDR_0, code->alu.length);
+ for (i = 0; i < code->alu.length; i++) {
+ rmesa->hw.fpi[0].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].rgb_inst;
+ rmesa->hw.fpi[1].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].rgb_addr;
+ rmesa->hw.fpi[2].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].alpha_inst;
+ rmesa->hw.fpi[3].cmd[R300_FPI_INSTR_0 + i] = code->alu.inst[i].alpha_addr;
+ }
+
+ R300_STATECHANGE(rmesa, fp);
+ rmesa->hw.fp.cmd[R300_FP_CNTL0] = code->config;
+ rmesa->hw.fp.cmd[R300_FP_CNTL1] = code->pixsize;
+ rmesa->hw.fp.cmd[R300_FP_CNTL2] = code->code_offset;
+ for (i = 0; i < 4; i++)
+ rmesa->hw.fp.cmd[R300_FP_NODE0 + i] = code->code_addr[i];
+
+ R300_STATECHANGE(rmesa, fpp);
+ rmesa->hw.fpp.cmd[R300_FPP_CMD_0] = cmdpacket0(rmesa->radeon.radeonScreen, R300_PFS_PARAM_0_X, fp->code.constants.Count * 4);
+ for (i = 0; i < fp->code.constants.Count; i++) {
+ GLfloat buffer[4];
+ const GLfloat *constant = get_fragmentprogram_constant(ctx, i, buffer);
+ rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat24(constant[0]);
+ rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat24(constant[1]);
+ rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat24(constant[2]);
+ rmesa->hw.fpp.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat24(constant[3]);
+ }
+}
+
+#define bump_r500fp_count(ptr, new_count) do{\
+ drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr));\
+ int _nc=(new_count)/6; \
+ assert(_nc < 256); \
+ if(_nc>_p->r500fp.count)_p->r500fp.count=_nc;\
+} while(0)
+
+#define bump_r500fp_const_count(ptr, new_count) do{\
+ drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr));\
+ int _nc=(new_count)/4; \
+ assert(_nc < 256); \
+ if(_nc>_p->r500fp.count)_p->r500fp.count=_nc;\
+} while(0)
+
+static void r500SetupPixelShader(GLcontext *ctx)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ struct r300_fragment_program *fp = rmesa->selected_fp;
+ int i;
+ struct r500_fragment_program_code *code;
+
+ ((drm_r300_cmd_header_t *) rmesa->hw.r500fp.cmd)->r500fp.count = 0;
+ ((drm_r300_cmd_header_t *) rmesa->hw.r500fp_const.cmd)->r500fp.count = 0;
+
+ code = &fp->code.code.r500;
+
+ R300_STATECHANGE(rmesa, fp);
+ rmesa->hw.fp.cmd[R500_FP_PIXSIZE] = code->max_temp_idx;
+
+ rmesa->hw.fp.cmd[R500_FP_CODE_ADDR] =
+ R500_US_CODE_START_ADDR(0) |
+ R500_US_CODE_END_ADDR(code->inst_end);
+ rmesa->hw.fp.cmd[R500_FP_CODE_RANGE] =
+ R500_US_CODE_RANGE_ADDR(0) |
+ R500_US_CODE_RANGE_SIZE(code->inst_end);
+ rmesa->hw.fp.cmd[R500_FP_CODE_OFFSET] =
+ R500_US_CODE_OFFSET_ADDR(0);
+
+ R300_STATECHANGE(rmesa, r500fp);
+ /* Emit our shader... */
+ for (i = 0; i < code->inst_end+1; i++) {
+ rmesa->hw.r500fp.cmd[i*6+1] = code->inst[i].inst0;
+ rmesa->hw.r500fp.cmd[i*6+2] = code->inst[i].inst1;
+ rmesa->hw.r500fp.cmd[i*6+3] = code->inst[i].inst2;
+ rmesa->hw.r500fp.cmd[i*6+4] = code->inst[i].inst3;
+ rmesa->hw.r500fp.cmd[i*6+5] = code->inst[i].inst4;
+ rmesa->hw.r500fp.cmd[i*6+6] = code->inst[i].inst5;
+ }
+
+ bump_r500fp_count(rmesa->hw.r500fp.cmd, (code->inst_end + 1) * 6);
+
+ R300_STATECHANGE(rmesa, r500fp_const);
+ for (i = 0; i < fp->code.constants.Count; i++) {
+ GLfloat buffer[4];
+ const GLfloat *constant = get_fragmentprogram_constant(ctx, i, buffer);
+ rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 0] = r300PackFloat32(constant[0]);
+ rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 1] = r300PackFloat32(constant[1]);
+ rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 2] = r300PackFloat32(constant[2]);
+ rmesa->hw.r500fp_const.cmd[R300_FPP_PARAM_0 + 4 * i + 3] = r300PackFloat32(constant[3]);
+ }
+ bump_r500fp_const_count(rmesa->hw.r500fp_const.cmd, fp->code.constants.Count * 4);
+}
+
+void r300SetupVAP(GLcontext *ctx, GLuint InputsRead, GLuint OutputsWritten)
+{
+ r300ContextPtr rmesa = R300_CONTEXT( ctx );
+ struct vertex_attribute *attrs = rmesa->vbuf.attribs;
+ int i, j, reg_count;
+ uint32_t *vir0 = &rmesa->hw.vir[0].cmd[1];
+ uint32_t *vir1 = &rmesa->hw.vir[1].cmd[1];
+
+ for (i = 0; i < R300_VIR_CMDSIZE-1; ++i)
+ vir0[i] = vir1[i] = 0;
+
+ for (i = 0, j = 0; i < rmesa->vbuf.num_attribs; ++i) {
+ int tmp;
+
+ tmp = attrs[i].data_type | (attrs[i].dst_loc << R300_DST_VEC_LOC_SHIFT);
+ if (attrs[i]._signed)
+ tmp |= R300_SIGNED;
+ if (attrs[i].normalize)
+ tmp |= R300_NORMALIZE;
+
+ if (i % 2 == 0) {
+ vir0[j] = tmp << R300_DATA_TYPE_0_SHIFT;
+ vir1[j] = attrs[i].swizzle | (attrs[i].write_mask << R300_WRITE_ENA_SHIFT);
+ } else {
+ vir0[j] |= tmp << R300_DATA_TYPE_1_SHIFT;
+ vir1[j] |= (attrs[i].swizzle | (attrs[i].write_mask << R300_WRITE_ENA_SHIFT)) << R300_SWIZZLE1_SHIFT;
+ ++j;
+ }
+ }
+
+ reg_count = (rmesa->vbuf.num_attribs + 1) >> 1;
+ if (rmesa->vbuf.num_attribs % 2 != 0) {
+ vir0[reg_count-1] |= R300_LAST_VEC << R300_DATA_TYPE_0_SHIFT;
+ } else {
+ vir0[reg_count-1] |= R300_LAST_VEC << R300_DATA_TYPE_1_SHIFT;
+ }
+
+ R300_STATECHANGE(rmesa, vir[0]);
+ R300_STATECHANGE(rmesa, vir[1]);
+ R300_STATECHANGE(rmesa, vof);
+ R300_STATECHANGE(rmesa, vic);
+
+ if (rmesa->radeon.radeonScreen->kernel_mm) {
+ rmesa->hw.vir[0].cmd[0] &= 0xC000FFFF;
+ rmesa->hw.vir[1].cmd[0] &= 0xC000FFFF;
+ rmesa->hw.vir[0].cmd[0] |= (reg_count & 0x3FFF) << 16;
+ rmesa->hw.vir[1].cmd[0] |= (reg_count & 0x3FFF) << 16;
+ } else {
+ ((drm_r300_cmd_header_t *) rmesa->hw.vir[0].cmd)->packet0.count = reg_count;
+ ((drm_r300_cmd_header_t *) rmesa->hw.vir[1].cmd)->packet0.count = reg_count;
+ }
+
+ rmesa->hw.vic.cmd[R300_VIC_CNTL_0] = r300VAPInputCntl0(ctx, InputsRead);
+ rmesa->hw.vic.cmd[R300_VIC_CNTL_1] = r300VAPInputCntl1(ctx, InputsRead);
+ rmesa->hw.vof.cmd[R300_VOF_CNTL_0] = r300VAPOutputCntl0(ctx, OutputsWritten);
+ rmesa->hw.vof.cmd[R300_VOF_CNTL_1] = r300VAPOutputCntl1(ctx, OutputsWritten);
+}
+
+void r300UpdateShaderStates(r300ContextPtr rmesa)
+{
+ GLcontext *ctx;
+ ctx = rmesa->radeon.glCtx;
+
+ /* should only happenen once, just after context is created */
+ if (!ctx->FragmentProgram._Current)
+ return;
+
+ r300SetEarlyZState(ctx);
+
+ r300SetupTextures(ctx);
+
+ rmesa->vtbl.SetupPixelShader(ctx);
+
+ rmesa->vtbl.SetupRSUnit(ctx);
+
+ if (rmesa->options.hw_tcl_enabled) {
+ r300SetupVertexProgram(rmesa);
+ }
+}
+
+#define EASY_US_OUT_FMT(comps, c0, c1, c2, c3) \
+ (R500_OUT_FMT_##comps | R500_C0_SEL_##c0 | R500_C1_SEL_##c1 | \
+ R500_C2_SEL_##c2 | R500_C3_SEL_##c3)
+static void r300SetupUsOutputFormat(GLcontext *ctx)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ uint32_t hw_format;
+ struct radeon_renderbuffer *rrb = radeon_get_colorbuffer(&rmesa->radeon);
+
+ if (!rrb) {
+ return;
+ }
+
+ switch (rrb->base.Format)
+ {
+ case MESA_FORMAT_RGBA5551:
+ case MESA_FORMAT_RGBA8888:
+ hw_format = EASY_US_OUT_FMT(C4_8, A, B, G, R);
+ break;
+ case MESA_FORMAT_RGB565_REV:
+ case MESA_FORMAT_RGBA8888_REV:
+ hw_format = EASY_US_OUT_FMT(C4_8, R, G, B, A);
+ break;
+ case MESA_FORMAT_RGB565:
+ case MESA_FORMAT_ARGB4444:
+ case MESA_FORMAT_ARGB1555:
+ case MESA_FORMAT_XRGB8888:
+ case MESA_FORMAT_ARGB8888:
+ hw_format = EASY_US_OUT_FMT(C4_8, B, G, R, A);
+ break;
+ case MESA_FORMAT_ARGB4444_REV:
+ case MESA_FORMAT_ARGB1555_REV:
+ case MESA_FORMAT_XRGB8888_REV:
+ case MESA_FORMAT_ARGB8888_REV:
+ hw_format = EASY_US_OUT_FMT(C4_8, A, R, G, B);
+ break;
+ case MESA_FORMAT_SRGBA8:
+ hw_format = EASY_US_OUT_FMT(C4_10_GAMMA, A, B, G, R);
+ break;
+ case MESA_FORMAT_SARGB8:
+ hw_format = EASY_US_OUT_FMT(C4_10_GAMMA, B, G, R, A);
+ break;
+ case MESA_FORMAT_SL8:
+ hw_format = EASY_US_OUT_FMT(C4_10_GAMMA, A, A, R, A);
+ break;
+ case MESA_FORMAT_A8:
+ hw_format = EASY_US_OUT_FMT(C4_8, A, A, A, A);
+ break;
+ case MESA_FORMAT_L8:
+ case MESA_FORMAT_I8:
+ hw_format = EASY_US_OUT_FMT(C4_8, A, A, R, A);
+ break;
+ default:
+ assert(!"Unsupported format");
+ break;
+ }
+
+ R300_STATECHANGE(rmesa, us_out_fmt);
+ rmesa->hw.us_out_fmt.cmd[1] = hw_format;
+}
+#undef EASY_US_OUT_FMT
+
+/**
+ * Called by Mesa after an internal state update.
+ */
+static void r300InvalidateState(GLcontext * ctx, GLuint new_state)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+
+ _swrast_InvalidateState(ctx, new_state);
+ _swsetup_InvalidateState(ctx, new_state);
+ _vbo_InvalidateState(ctx, new_state);
+ _tnl_InvalidateState(ctx, new_state);
+
+ if (new_state & _NEW_BUFFERS) {
+ _mesa_update_framebuffer(ctx);
+ /* this updates the DrawBuffer's Width/Height if it's a FBO */
+ _mesa_update_draw_buffer_bounds(ctx);
+
+ R300_STATECHANGE(r300, cb);
+ R300_STATECHANGE(r300, zb);
+ }
+
+ if (new_state & (_NEW_LIGHT)) {
+ R300_STATECHANGE(r300, shade2);
+ if (ctx->Light.ProvokingVertex == GL_LAST_VERTEX_CONVENTION)
+ r300->hw.shade2.cmd[1] |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST;
+ else
+ r300->hw.shade2.cmd[1] &= ~R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST;
+ }
+
+ if (new_state & _NEW_BUFFERS) {
+ r300SetupUsOutputFormat(ctx);
+ }
+
+ r300->radeon.NewGLState |= new_state;
+}
+
+/**
+ * Calculate initial hardware state and register state functions.
+ * Assumes that the command buffer and state atoms have been
+ * initialized already.
+ */
+void r300InitState(r300ContextPtr r300)
+{
+ r300ResetHwState(r300);
+}
+
+static void r300RenderMode(GLcontext * ctx, GLenum mode)
+{
+ r300SwitchFallback(ctx, R300_FALLBACK_RENDER_MODE, ctx->RenderMode != GL_RENDER);
+}
+
+/**
+ * Initialize driver's state callback functions
+ */
+void r300InitStateFuncs(radeonContextPtr radeon, struct dd_function_table *functions)
+{
+
+ functions->UpdateState = r300InvalidateState;
+ functions->AlphaFunc = r300AlphaFunc;
+ functions->BlendColor = r300BlendColor;
+ functions->BlendEquationSeparate = r300BlendEquationSeparate;
+ functions->BlendFuncSeparate = r300BlendFuncSeparate;
+ functions->Enable = r300Enable;
+ functions->ColorMask = r300ColorMask;
+ functions->DepthFunc = r300DepthFunc;
+ functions->DepthMask = r300DepthMask;
+ functions->CullFace = r300CullFace;
+ functions->FrontFace = r300FrontFace;
+ functions->ShadeModel = r300ShadeModel;
+ functions->LogicOpcode = r300LogicOpcode;
+
+ /* ARB_point_parameters */
+ functions->PointParameterfv = r300PointParameter;
+
+ /* Stencil related */
+ functions->StencilFuncSeparate = r300StencilFuncSeparate;
+ functions->StencilMaskSeparate = r300StencilMaskSeparate;
+ functions->StencilOpSeparate = r300StencilOpSeparate;
+
+ /* Viewport related */
+ functions->Viewport = r300Viewport;
+ functions->DepthRange = r300DepthRange;
+ functions->PointSize = r300PointSize;
+ functions->LineWidth = r300LineWidth;
+
+ functions->PolygonOffset = r300PolygonOffset;
+ functions->PolygonMode = r300PolygonMode;
+
+ functions->RenderMode = r300RenderMode;
+
+ functions->ClipPlane = r300ClipPlane;
+ functions->Scissor = radeonScissor;
+
+ functions->DrawBuffer = radeonDrawBuffer;
+ functions->ReadBuffer = radeonReadBuffer;
+
+ functions->CopyPixels = _mesa_meta_CopyPixels;
+ functions->DrawPixels = _mesa_meta_DrawPixels;
+ if (radeon->radeonScreen->kernel_mm)
+ functions->ReadPixels = radeonReadPixels;
+}
+
+void r300InitShaderFunctions(r300ContextPtr r300)
+{
+ if (r300->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+ r300->vtbl.SetupRSUnit = r500SetupRSUnit;
+ r300->vtbl.SetupPixelShader = r500SetupPixelShader;
+ r300->vtbl.SetupFragmentShaderTextures = r500SetupFragmentShaderTextures;
+ } else {
+ r300->vtbl.SetupRSUnit = r300SetupRSUnit;
+ r300->vtbl.SetupPixelShader = r300SetupPixelShader;
+ r300->vtbl.SetupFragmentShaderTextures = r300SetupFragmentShaderTextures;
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/r300_state.h b/src/mesa/drivers/dri/r300/r300_state.h
new file mode 100644
index 0000000000..e70f84f4e4
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_state.h
@@ -0,0 +1,62 @@
+/*
+Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ * Nicolai Haehnle <prefect_@gmx.net>
+ */
+
+#ifndef __R300_STATE_H__
+#define __R300_STATE_H__
+
+#include "r300_context.h"
+
+#define R300_NEWPRIM( rmesa ) \
+ do { \
+ if ( rmesa->radeon.dma.flush ) \
+ rmesa->radeon.dma.flush( rmesa->radeon.glCtx ); \
+ } while (0)
+
+#define R300_STATECHANGE(r300, atom) \
+ do { \
+ R300_NEWPRIM(r300); \
+ r300->hw.atom.dirty = GL_TRUE; \
+ r300->radeon.hw.is_dirty = GL_TRUE; \
+ } while(0)
+
+void r300UpdateViewportOffset (GLcontext * ctx);
+void r300UpdateDrawBuffer (GLcontext * ctx);
+void r300UpdateShaders (r300ContextPtr rmesa);
+void r300UpdateShaderStates (r300ContextPtr rmesa);
+void r300InitState (r300ContextPtr r300);
+void r300InitStateFuncs (radeonContextPtr radeon, struct dd_function_table *functions);
+void r300VapCntl(r300ContextPtr rmesa, GLuint input_count, GLuint output_count, GLuint temp_count);
+void r300SetupVAP(GLcontext *ctx, GLuint InputsRead, GLuint OutputsWritten);
+
+#endif /* __R300_STATE_H__ */
diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c
new file mode 100644
index 0000000000..4dcc7cb022
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_swtcl.c
@@ -0,0 +1,683 @@
+/**************************************************************************
+
+Copyright (C) 2007 Dave Airlie
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ * Dave Airlie <airlied@linux.ie>
+ * Maciej Cencora <m.cencora@gmail.com>
+ */
+
+#include "tnl/tnl.h"
+#include "tnl/t_pipeline.h"
+
+#include "r300_state.h"
+#include "r300_swtcl.h"
+#include "r300_emit.h"
+#include "r300_tex.h"
+#include "r300_render.h"
+#include "main/simple_list.h"
+
+#define EMIT_ATTR( ATTR, STYLE ) \
+do { \
+ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = (ATTR); \
+ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = (STYLE); \
+ rmesa->radeon.swtcl.vertex_attr_count++; \
+} while (0)
+
+#define EMIT_PAD( N ) \
+do { \
+ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].attrib = 0; \
+ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].format = EMIT_PAD; \
+ rmesa->radeon.swtcl.vertex_attrs[rmesa->radeon.swtcl.vertex_attr_count].offset = (N); \
+ rmesa->radeon.swtcl.vertex_attr_count++; \
+} while (0)
+
+#define ADD_ATTR(_attr, _format, _dst_loc, _swizzle, _write_mask, _normalize) \
+do { \
+ attrs[num_attrs].element = (_attr); \
+ attrs[num_attrs].data_type = (_format); \
+ attrs[num_attrs].dst_loc = (_dst_loc); \
+ attrs[num_attrs].swizzle = (_swizzle); \
+ attrs[num_attrs].write_mask = (_write_mask); \
+ attrs[num_attrs]._signed = 0; \
+ attrs[num_attrs].normalize = (_normalize); \
+ ++num_attrs; \
+} while (0)
+
+void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *_InputsRead, GLuint *_OutputsWritten)
+{
+ r300ContextPtr rmesa = R300_CONTEXT( ctx );
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ struct vertex_buffer *VB = &tnl->vb;
+ int first_free_tex = 0;
+ GLuint InputsRead = 0;
+ GLuint OutputsWritten = 0;
+ int num_attrs = 0;
+ GLuint fp_reads = rmesa->selected_fp->InputsRead;
+ struct vertex_attribute *attrs = rmesa->vbuf.attribs;
+
+ radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, "%s\n", __func__);
+ rmesa->swtcl.coloroffset = rmesa->swtcl.specoffset = 0;
+ rmesa->radeon.swtcl.vertex_attr_count = 0;
+
+ if (RADEON_DEBUG & RADEON_VERTS)
+ fprintf(stderr, "%s\n", __func__);
+
+ /* We always want non Ndc coords format */
+ VB->AttribPtr[VERT_ATTRIB_POS] = VB->ClipPtr;
+
+ /* Always write position vector */
+ InputsRead |= 1 << VERT_ATTRIB_POS;
+ OutputsWritten |= 1 << VERT_RESULT_HPOS;
+ EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_4F );
+ ADD_ATTR(VERT_ATTRIB_POS, R300_DATA_TYPE_FLOAT_4, SWTCL_OVM_POS, SWIZZLE_XYZW, MASK_XYZW, 0);
+ rmesa->swtcl.coloroffset = 4;
+
+ if (fp_reads & FRAG_BIT_COL0) {
+ InputsRead |= 1 << VERT_ATTRIB_COLOR0;
+ OutputsWritten |= 1 << VERT_RESULT_COL0;
+#if MESA_LITTLE_ENDIAN
+ EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_RGBA );
+ ADD_ATTR(VERT_ATTRIB_COLOR0, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR0, SWIZZLE_XYZW, MASK_XYZW, 1);
+#else
+ EMIT_ATTR( _TNL_ATTRIB_COLOR0, EMIT_4UB_4F_ABGR );
+ ADD_ATTR(VERT_ATTRIB_COLOR0, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR0, SWIZZLE_XYZW, MASK_XYZW, 1);
+#endif
+ }
+
+ if (fp_reads & FRAG_BIT_COL1) {
+ GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE);
+ InputsRead |= 1 << VERT_ATTRIB_COLOR1;
+ OutputsWritten |= 1 << VERT_RESULT_COL1;
+#if MESA_LITTLE_ENDIAN
+ EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_4UB_4F_RGBA );
+ ADD_ATTR(VERT_ATTRIB_COLOR1, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR1, swiz, MASK_XYZW, 1);
+#else
+ EMIT_ATTR( _TNL_ATTRIB_COLOR1, EMIT_4UB_4F_ABGR );
+ ADD_ATTR(VERT_ATTRIB_COLOR1, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR1, swiz, MASK_XYZW, 1);
+#endif
+ rmesa->swtcl.specoffset = rmesa->swtcl.coloroffset + 1;
+ }
+
+ if (ctx->Light.Enabled && ctx->Light.Model.TwoSide) {
+ VB->AttribPtr[VERT_ATTRIB_GENERIC0] = VB->BackfaceColorPtr;
+ OutputsWritten |= 1 << VERT_RESULT_BFC0;
+#if MESA_LITTLE_ENDIAN
+ EMIT_ATTR( _TNL_ATTRIB_GENERIC0, EMIT_4UB_4F_RGBA );
+ ADD_ATTR(VERT_ATTRIB_GENERIC0, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR2, SWIZZLE_XYZW, MASK_XYZW, 1);
+#else
+ EMIT_ATTR( _TNL_ATTRIB_GENERIC0, EMIT_4UB_4F_ABGR );
+ ADD_ATTR(VERT_ATTRIB_GENERIC0, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR2, SWIZZLE_XYZW, MASK_XYZW, 1);
+#endif
+ if (fp_reads & FRAG_BIT_COL1) {
+ VB->AttribPtr[VERT_ATTRIB_GENERIC1] = VB->BackfaceSecondaryColorPtr;
+ GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE);
+ OutputsWritten |= 1 << VERT_RESULT_BFC1;
+#if MESA_LITTLE_ENDIAN
+ EMIT_ATTR( _TNL_ATTRIB_GENERIC1, EMIT_4UB_4F_RGBA );
+ ADD_ATTR(VERT_ATTRIB_GENERIC1, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR3, swiz, MASK_XYZW, 1);
+#else
+ EMIT_ATTR( _TNL_ATTRIB_GENERIC1, EMIT_4UB_4F_ABGR );
+ ADD_ATTR(VERT_ATTRIB_GENERIC1, R300_DATA_TYPE_BYTE, SWTCL_OVM_COLOR3, swiz, MASK_XYZW, 1);
+#endif
+ }
+ }
+
+ if (RENDERINPUTS_TEST(tnl->render_inputs_bitset, _TNL_ATTRIB_POINTSIZE )) {
+ GLuint swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ZERO);
+ InputsRead |= 1 << VERT_ATTRIB_POINT_SIZE;
+ OutputsWritten |= 1 << VERT_RESULT_PSIZ;
+ EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F );
+ ADD_ATTR(VERT_ATTRIB_POINT_SIZE, R300_DATA_TYPE_FLOAT_1, SWTCL_OVM_POINT_SIZE, swiz, MASK_X, 0);
+ }
+
+ if (rmesa->selected_fp->wpos_attr != FRAG_ATTRIB_MAX) {
+ int tex_id = rmesa->selected_fp->wpos_attr - FRAG_ATTRIB_TEX0;
+
+ VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_POS];
+ VB->AttribPtr[_TNL_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_POS];
+ RENDERINPUTS_SET(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX0 + tex_id);
+ }
+
+ if (rmesa->selected_fp->fog_attr != FRAG_ATTRIB_MAX) {
+ int tex_id = rmesa->selected_fp->fog_attr - FRAG_ATTRIB_TEX0;
+
+ VB->AttribPtr[VERT_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_FOG];
+ VB->AttribPtr[_TNL_ATTRIB_TEX0 + tex_id] = VB->AttribPtr[VERT_ATTRIB_FOG];
+ RENDERINPUTS_SET(tnl->render_inputs_bitset, _TNL_ATTRIB_TEX0 + tex_id);
+ }
+
+ /**
+ * Sending only one texcoord component may lead to lock up,
+ * so for all textures always output 4 texcoord components to RS.
+ */
+ {
+ int i;
+ GLuint swiz, format, hw_format;
+ for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
+ if (fp_reads & FRAG_BIT_TEX(i)) {
+ switch (VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]->size) {
+ case 1:
+ format = EMIT_1F;
+ hw_format = R300_DATA_TYPE_FLOAT_1;
+ swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_ZERO, SWIZZLE_ZERO, SWIZZLE_ONE);
+ break;
+ case 2:
+ format = EMIT_2F;
+ hw_format = R300_DATA_TYPE_FLOAT_2;
+ swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_ZERO, SWIZZLE_ONE);
+ break;
+ case 3:
+ format = EMIT_3F;
+ hw_format = R300_DATA_TYPE_FLOAT_3;
+ swiz = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE);
+ break;
+ case 4:
+ format = EMIT_4F;
+ hw_format = R300_DATA_TYPE_FLOAT_4;
+ swiz = SWIZZLE_XYZW;
+ break;
+ default:
+ continue;
+ }
+ InputsRead |= 1 << (VERT_ATTRIB_TEX0 + i);
+ OutputsWritten |= 1 << (VERT_RESULT_TEX0 + i);
+ EMIT_ATTR(_TNL_ATTRIB_TEX(i), format);
+ ADD_ATTR(VERT_ATTRIB_TEX0 + i, hw_format, SWTCL_OVM_TEX(first_free_tex), swiz, MASK_XYZW, 0);
+ ++first_free_tex;
+ }
+ }
+ }
+
+ if (first_free_tex >= ctx->Const.MaxTextureUnits) {
+ fprintf(stderr, "\tout of free texcoords to write fog coordinate\n");
+ exit(-1);
+ }
+
+ R300_NEWPRIM(rmesa);
+ rmesa->vbuf.num_attribs = num_attrs;
+ *_InputsRead = InputsRead;
+ *_OutputsWritten = OutputsWritten;
+
+ RENDERINPUTS_COPY(rmesa->render_inputs_bitset, tnl->render_inputs_bitset);
+}
+
+static void r300PrepareVertices(GLcontext *ctx)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ GLuint InputsRead, OutputsWritten;
+ radeon_print(RADEON_SWRENDER, RADEON_TRACE, "%s\n", __func__);
+
+ r300ChooseSwtclVertexFormat(ctx, &InputsRead, &OutputsWritten);
+ r300SetupVAP(ctx, InputsRead, OutputsWritten);
+
+ rmesa->radeon.swtcl.vertex_size =
+ _tnl_install_attrs( ctx,
+ rmesa->radeon.swtcl.vertex_attrs,
+ rmesa->radeon.swtcl.vertex_attr_count,
+ NULL, 0 );
+
+ rmesa->radeon.swtcl.vertex_size /= 4;
+}
+
+static void r300_predict_emit_size( r300ContextPtr rmesa )
+{
+ if (!rmesa->radeon.swtcl.emit_prediction) {
+ const int vertex_size = 7;
+ const int prim_size = 3;
+ const int cache_flush_size = 4;
+ const int pre_emit_state = 4;
+ const int scissor_size = 3;
+ const int state_size = radeonCountStateEmitSize(&rmesa->radeon);
+
+ if (rcommonEnsureCmdBufSpace(&rmesa->radeon,
+ state_size + pre_emit_state + scissor_size
+ + vertex_size + prim_size + cache_flush_size * 2,
+ __FUNCTION__))
+ rmesa->radeon.swtcl.emit_prediction = radeonCountStateEmitSize(&rmesa->radeon);
+ else
+ rmesa->radeon.swtcl.emit_prediction = state_size;
+
+ rmesa->radeon.swtcl.emit_prediction += rmesa->radeon.cmdbuf.cs->cdw
+ + vertex_size + scissor_size + prim_size + cache_flush_size * 2 + pre_emit_state;
+ radeon_print(RADEON_SWRENDER, RADEON_VERBOSE,
+ "%s, size %d\n",
+ __func__, rmesa->radeon.cmdbuf.cs->cdw
+ + vertex_size + scissor_size + prim_size + cache_flush_size * 2 + pre_emit_state);
+ }
+}
+
+
+static GLuint reduced_prim[] = {
+ GL_POINTS,
+ GL_LINES,
+ GL_LINES,
+ GL_LINES,
+ GL_TRIANGLES,
+ GL_TRIANGLES,
+ GL_TRIANGLES,
+ GL_TRIANGLES,
+ GL_TRIANGLES,
+ GL_TRIANGLES,
+};
+
+static void r300RasterPrimitive( GLcontext *ctx, GLuint prim );
+
+/***********************************************************************
+ * Emit primitives as inline vertices *
+ ***********************************************************************/
+
+
+#define HAVE_POINTS 1
+#define HAVE_LINES 1
+#define HAVE_LINE_STRIPS 1
+#define HAVE_TRIANGLES 1
+#define HAVE_TRI_STRIPS 1
+#define HAVE_TRI_STRIP_1 0
+#define HAVE_TRI_FANS 1
+#define HAVE_QUADS 0
+#define HAVE_QUAD_STRIPS 0
+#define HAVE_POLYGONS 1
+#define HAVE_ELTS 1
+
+static void* r300_alloc_verts(r300ContextPtr rmesa, GLuint n, GLuint size)
+{
+ void *rv;
+ do {
+ r300_predict_emit_size( rmesa );
+ rv = rcommonAllocDmaLowVerts( &rmesa->radeon, n, size * 4 );
+ } while (!rv);
+ return rv;
+}
+
+#undef LOCAL_VARS
+#undef ALLOC_VERTS
+#define CTX_ARG r300ContextPtr rmesa
+#define GET_VERTEX_DWORDS() rmesa->radeon.swtcl.vertex_size
+#define ALLOC_VERTS( n, size ) r300_alloc_verts(rmesa, n, size);
+#define LOCAL_VARS \
+ r300ContextPtr rmesa = R300_CONTEXT(ctx); \
+ const char *r300verts = (char *)rmesa->radeon.swtcl.verts;
+#define VERT(x) (r300Vertex *)(r300verts + ((x) * vertsize * sizeof(int)))
+#define VERTEX r300Vertex
+#undef TAG
+#define TAG(x) r300_##x
+#include "tnl_dd/t_dd_triemit.h"
+
+
+
+/***********************************************************************
+ * Macros for t_dd_tritmp.h to draw basic primitives *
+ ***********************************************************************/
+
+#define QUAD( a, b, c, d ) r300_quad( rmesa, a, b, c, d )
+#define TRI( a, b, c ) r300_triangle( rmesa, a, b, c )
+#define LINE( a, b ) r300_line( rmesa, a, b )
+#define POINT( a ) r300_point( rmesa, a )
+
+/***********************************************************************
+ * Build render functions from dd templates *
+ ***********************************************************************/
+
+#define R300_UNFILLED_BIT 0x01
+#define R300_MAX_TRIFUNC 0x02
+
+static struct {
+ tnl_points_func points;
+ tnl_line_func line;
+ tnl_triangle_func triangle;
+ tnl_quad_func quad;
+} rast_tab[R300_MAX_TRIFUNC];
+
+#define DO_FALLBACK 0
+#define DO_UNFILLED (IND & R300_UNFILLED_BIT)
+#define DO_TWOSIDE 0
+#define DO_FLAT 0
+#define DO_OFFSET 0
+#define DO_TRI 1
+#define DO_QUAD 1
+#define DO_LINE 1
+#define DO_POINTS 1
+#define DO_FULL_QUAD 1
+
+#define HAVE_SPEC 1
+#define HAVE_BACK_COLORS 0
+#define HAVE_HW_FLATSHADE 1
+#define TAB rast_tab
+
+#define DEPTH_SCALE 1.0
+#define UNFILLED_TRI unfilled_tri
+#define UNFILLED_QUAD unfilled_quad
+#define VERT_X(_v) _v->v.x
+#define VERT_Y(_v) _v->v.y
+#define VERT_Z(_v) _v->v.z
+#define AREA_IS_CCW( a ) (a < 0)
+#define GET_VERTEX(e) (rmesa->radeon.swtcl.verts + (e*rmesa->radeon.swtcl.vertex_size*sizeof(int)))
+
+#define VERT_SET_RGBA( v, c ) \
+do { \
+ r300_color_t *color = (r300_color_t *)&((v)->ui[coloroffset]); \
+ UNCLAMPED_FLOAT_TO_UBYTE(color->red, (c)[0]); \
+ UNCLAMPED_FLOAT_TO_UBYTE(color->green, (c)[1]); \
+ UNCLAMPED_FLOAT_TO_UBYTE(color->blue, (c)[2]); \
+ UNCLAMPED_FLOAT_TO_UBYTE(color->alpha, (c)[3]); \
+} while (0)
+
+#define VERT_COPY_RGBA( v0, v1 ) v0->ui[coloroffset] = v1->ui[coloroffset]
+
+#define VERT_SET_SPEC( v0, c ) \
+do { \
+ if (specoffset) { \
+ UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.red, (c)[0]); \
+ UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.green, (c)[1]); \
+ UNCLAMPED_FLOAT_TO_UBYTE(v0->v.specular.blue, (c)[2]); \
+ } \
+} while (0)
+
+#define VERT_COPY_SPEC( v0, v1 ) \
+do { \
+ if (specoffset) { \
+ v0->v.specular.red = v1->v.specular.red; \
+ v0->v.specular.green = v1->v.specular.green; \
+ v0->v.specular.blue = v1->v.specular.blue; \
+ } \
+} while (0)
+
+#define VERT_SAVE_RGBA( idx ) color[idx] = v[idx]->ui[coloroffset]
+#define VERT_RESTORE_RGBA( idx ) v[idx]->ui[coloroffset] = color[idx]
+#define VERT_SAVE_SPEC( idx ) if (specoffset) spec[idx] = v[idx]->ui[specoffset]
+#define VERT_RESTORE_SPEC( idx ) if (specoffset) v[idx]->ui[specoffset] = spec[idx]
+
+#undef LOCAL_VARS
+#undef TAG
+#undef INIT
+
+#define LOCAL_VARS(n) \
+ r300ContextPtr rmesa = R300_CONTEXT(ctx); \
+ GLuint color[n] = { 0, }, spec[n] = { 0, }; \
+ GLuint coloroffset = rmesa->swtcl.coloroffset; \
+ GLuint specoffset = rmesa->swtcl.specoffset; \
+ (void) color; (void) spec; (void) coloroffset; (void) specoffset;
+
+/***********************************************************************
+ * Helpers for rendering unfilled primitives *
+ ***********************************************************************/
+
+#define RASTERIZE(x) r300RasterPrimitive( ctx, reduced_prim[x] )
+#define RENDER_PRIMITIVE rmesa->radeon.swtcl.render_primitive
+#undef TAG
+#define TAG(x) x
+#include "tnl_dd/t_dd_unfilled.h"
+#undef IND
+
+
+/***********************************************************************
+ * Generate GL render functions *
+ ***********************************************************************/
+
+
+#define IND (0)
+#define TAG(x) x
+#include "tnl_dd/t_dd_tritmp.h"
+
+#define IND (R300_UNFILLED_BIT)
+#define TAG(x) x##_unfilled
+#include "tnl_dd/t_dd_tritmp.h"
+
+
+static void init_rast_tab( void )
+{
+ init();
+ init_unfilled();
+}
+
+/**********************************************************************/
+/* Render unclipped begin/end objects */
+/**********************************************************************/
+
+#define RENDER_POINTS( start, count ) \
+ for ( ; start < count ; start++) \
+ r300_point( rmesa, VERT(start) )
+#define RENDER_LINE( v0, v1 ) \
+ r300_line( rmesa, VERT(v0), VERT(v1) )
+#define RENDER_TRI( v0, v1, v2 ) \
+ r300_triangle( rmesa, VERT(v0), VERT(v1), VERT(v2) )
+#define RENDER_QUAD( v0, v1, v2, v3 ) \
+ r300_quad( rmesa, VERT(v0), VERT(v1), VERT(v2), VERT(v3) )
+#define INIT(x) do { \
+ r300RenderPrimitive( ctx, x ); \
+} while (0)
+#undef LOCAL_VARS
+#define LOCAL_VARS \
+ r300ContextPtr rmesa = R300_CONTEXT(ctx); \
+ const GLuint vertsize = rmesa->radeon.swtcl.vertex_size; \
+ const char *r300verts = (char *)rmesa->radeon.swtcl.verts; \
+ const GLuint * const elt = TNL_CONTEXT(ctx)->vb.Elts; \
+ const GLboolean stipple = ctx->Line.StippleFlag; \
+ (void) elt; (void) stipple;
+#define RESET_STIPPLE //if ( stipple ) r200ResetLineStipple( ctx );
+#define RESET_OCCLUSION
+#define PRESERVE_VB_DEFS
+#define ELT(x) (x)
+#define TAG(x) r300_##x##_verts
+#include "tnl/t_vb_rendertmp.h"
+#undef ELT
+#undef TAG
+#define TAG(x) r300_##x##_elts
+#define ELT(x) elt[x]
+#include "tnl/t_vb_rendertmp.h"
+
+
+
+
+/**********************************************************************/
+/* Choose render functions */
+/**********************************************************************/
+static void r300ChooseRenderState( GLcontext *ctx )
+{
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ GLuint index = 0;
+ GLuint flags = ctx->_TriangleCaps;
+ radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, "%s\n", __func__);
+
+ if (flags & DD_TRI_UNFILLED) index |= R300_UNFILLED_BIT;
+
+ if (index != rmesa->radeon.swtcl.RenderIndex) {
+ tnl->Driver.Render.Points = rast_tab[index].points;
+ tnl->Driver.Render.Line = rast_tab[index].line;
+ tnl->Driver.Render.ClippedLine = rast_tab[index].line;
+ tnl->Driver.Render.Triangle = rast_tab[index].triangle;
+ tnl->Driver.Render.Quad = rast_tab[index].quad;
+
+ if (index == 0) {
+ tnl->Driver.Render.PrimTabVerts = r300_render_tab_verts;
+ tnl->Driver.Render.PrimTabElts = r300_render_tab_elts;
+ tnl->Driver.Render.ClippedPolygon = r300_fast_clipped_poly;
+ } else {
+ tnl->Driver.Render.PrimTabVerts = _tnl_render_tab_verts;
+ tnl->Driver.Render.PrimTabElts = _tnl_render_tab_elts;
+ tnl->Driver.Render.ClippedPolygon = _tnl_RenderClippedPolygon;
+ }
+
+ rmesa->radeon.swtcl.RenderIndex = index;
+ }
+}
+
+void r300RenderStart(GLcontext *ctx)
+{
+ radeon_print(RADEON_SWRENDER, RADEON_VERBOSE, "%s\n", __func__);
+ r300ContextPtr rmesa = R300_CONTEXT( ctx );
+
+ r300ChooseRenderState(ctx);
+
+ r300UpdateShaders(rmesa);
+
+ r300PrepareVertices(ctx);
+
+ r300ValidateBuffers(ctx);
+
+ r300UpdateShaderStates(rmesa);
+
+
+ /* investigate if we can put back flush optimisation if needed */
+ if (rmesa->radeon.dma.flush != NULL) {
+ rmesa->radeon.dma.flush(ctx);
+ }
+}
+
+void r300RenderFinish(GLcontext *ctx)
+{
+}
+
+static void r300RasterPrimitive( GLcontext *ctx, GLuint hwprim )
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ radeon_print(RADEON_SWRENDER, RADEON_TRACE, "%s\n", __func__);
+
+ if (rmesa->radeon.swtcl.hw_primitive != hwprim) {
+ R300_NEWPRIM( rmesa );
+ rmesa->radeon.swtcl.hw_primitive = hwprim;
+ }
+}
+
+void r300RenderPrimitive(GLcontext *ctx, GLenum prim)
+{
+
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ rmesa->radeon.swtcl.render_primitive = prim;
+ radeon_print(RADEON_SWRENDER, RADEON_TRACE, "%s\n", __func__);
+
+ if ((prim == GL_TRIANGLES) && (ctx->_TriangleCaps & DD_TRI_UNFILLED))
+ return;
+
+ r300RasterPrimitive( ctx, reduced_prim[prim] );
+}
+
+void r300ResetLineStipple(GLcontext *ctx)
+{
+ if (RADEON_DEBUG & RADEON_VERTS)
+ fprintf(stderr, "%s\n", __func__);
+}
+
+void r300InitSwtcl(GLcontext *ctx)
+{
+ TNLcontext *tnl = TNL_CONTEXT(ctx);
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ static int firsttime = 1;
+ radeon_print(RADEON_SWRENDER, RADEON_NORMAL, "%s\n", __func__);
+
+ if (firsttime) {
+ init_rast_tab();
+ firsttime = 0;
+ }
+ rmesa->radeon.swtcl.emit_prediction = 0;
+
+ tnl->Driver.Render.Start = r300RenderStart;
+ tnl->Driver.Render.Finish = r300RenderFinish;
+ tnl->Driver.Render.PrimitiveNotify = r300RenderPrimitive;
+ tnl->Driver.Render.ResetLineStipple = r300ResetLineStipple;
+ tnl->Driver.Render.BuildVertices = _tnl_build_vertices;
+ tnl->Driver.Render.CopyPV = _tnl_copy_pv;
+ tnl->Driver.Render.Interp = _tnl_interp;
+
+ /* FIXME: what are these numbers? */
+ _tnl_init_vertices( ctx, ctx->Const.MaxArrayLockSize + 12,
+ 48 * sizeof(GLfloat) );
+
+ rmesa->radeon.swtcl.verts = (GLubyte *)tnl->clipspace.vertex_buf;
+ rmesa->radeon.swtcl.RenderIndex = ~0;
+ rmesa->radeon.swtcl.render_primitive = GL_TRIANGLES;
+ rmesa->radeon.swtcl.hw_primitive = 0;
+
+ _tnl_invalidate_vertex_state( ctx, ~0 );
+ _tnl_invalidate_vertices( ctx, ~0 );
+
+ _tnl_need_projected_coords( ctx, GL_FALSE );
+}
+
+void r300DestroySwtcl(GLcontext *ctx)
+{
+}
+
+static void r300EmitVertexAOS(r300ContextPtr rmesa, GLuint vertex_size, struct radeon_bo *bo, GLuint offset)
+{
+ BATCH_LOCALS(&rmesa->radeon);
+
+ radeon_print(RADEON_SWRENDER, RADEON_TRACE,
+ "%s: vertex_size %d, offset 0x%x \n",
+ __FUNCTION__, vertex_size, offset);
+
+ BEGIN_BATCH(7);
+ OUT_BATCH_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 2);
+ OUT_BATCH(1);
+ OUT_BATCH(vertex_size | (vertex_size << 8));
+ OUT_BATCH_RELOC(offset, bo, offset, RADEON_GEM_DOMAIN_GTT, 0, 0);
+ END_BATCH();
+}
+
+static void r300EmitVbufPrim(r300ContextPtr rmesa, GLuint primitive, GLuint vertex_nr)
+{
+ BATCH_LOCALS(&rmesa->radeon);
+ int type, num_verts;
+ if (RADEON_DEBUG & RADEON_VERTS)
+ fprintf(stderr, "%s\n", __func__);
+
+ type = r300PrimitiveType(rmesa, primitive);
+ num_verts = r300NumVerts(rmesa, vertex_nr, primitive);
+
+ BEGIN_BATCH(3);
+ OUT_BATCH_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
+ OUT_BATCH(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (num_verts << 16) | type);
+ END_BATCH();
+}
+
+void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset)
+{
+ radeon_print(RADEON_SWRENDER, RADEON_TRACE, "%s\n", __func__);
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+
+ r300EmitCacheFlush(rmesa);
+
+ radeonEmitState(&rmesa->radeon);
+ r300_emit_scissor(ctx);
+ r300EmitVertexAOS(rmesa,
+ rmesa->radeon.swtcl.vertex_size,
+ rmesa->radeon.swtcl.bo,
+ current_offset);
+
+ r300EmitVbufPrim(rmesa,
+ rmesa->radeon.swtcl.hw_primitive,
+ rmesa->radeon.swtcl.numverts);
+ r300EmitCacheFlush(rmesa);
+ if ( rmesa->radeon.swtcl.emit_prediction < rmesa->radeon.cmdbuf.cs->cdw )
+ WARN_ONCE("Rendering was %d commands larger than predicted size."
+ " We might overflow command buffer.\n",
+ rmesa->radeon.cmdbuf.cs->cdw - rmesa->radeon.swtcl.emit_prediction );
+ rmesa->radeon.swtcl.emit_prediction = 0;
+ COMMIT_BATCH();
+}
diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.h b/src/mesa/drivers/dri/r300/r300_swtcl.h
new file mode 100644
index 0000000000..c271d26546
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_swtcl.h
@@ -0,0 +1,65 @@
+/*
+Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+/*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com> - original r200 code
+ * Dave Airlie <airlied@linux.ie>
+ */
+
+#ifndef __R300_SWTCL_H__
+#define __R300_SWTCL_H__
+
+#include "main/mtypes.h"
+#include "swrast/swrast.h"
+#include "r300_context.h"
+
+/*
+ * Here are definitions of OVM locations of vertex attributes for non TCL hw
+ */
+#define SWTCL_OVM_POS 0
+#define SWTCL_OVM_COLOR0 2
+#define SWTCL_OVM_COLOR1 3
+#define SWTCL_OVM_COLOR2 4
+#define SWTCL_OVM_COLOR3 5
+#define SWTCL_OVM_TEX(n) ((n) + 6)
+#define SWTCL_OVM_POINT_SIZE 15
+
+extern void r300ChooseSwtclVertexFormat(GLcontext *ctx, GLuint *InputsRead, GLuint *OutputsWritten);
+
+extern void r300InitSwtcl( GLcontext *ctx );
+extern void r300DestroySwtcl( GLcontext *ctx );
+
+extern void r300RenderStart(GLcontext *ctx);
+extern void r300RenderFinish(GLcontext *ctx);
+extern void r300RenderPrimitive(GLcontext *ctx, GLenum prim);
+extern void r300ResetLineStipple(GLcontext *ctx);
+
+extern void r300_swtcl_flush(GLcontext *ctx, uint32_t current_offset);
+
+#endif
diff --git a/src/mesa/drivers/dri/r300/r300_tex.c b/src/mesa/drivers/dri/r300/r300_tex.c
new file mode 100644
index 0000000000..baef206bc2
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_tex.c
@@ -0,0 +1,386 @@
+/*
+Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+/**
+ * \file
+ *
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/colormac.h"
+#include "main/context.h"
+#include "main/enums.h"
+#include "main/image.h"
+#include "main/mipmap.h"
+#include "main/simple_list.h"
+#include "main/texstore.h"
+#include "main/texobj.h"
+
+#include "texmem.h"
+
+#include "r300_context.h"
+#include "radeon_mipmap_tree.h"
+#include "r300_tex.h"
+
+
+static unsigned int translate_wrap_mode(GLenum wrapmode)
+{
+ switch(wrapmode) {
+ case GL_REPEAT: return R300_TX_REPEAT;
+ case GL_CLAMP: return R300_TX_CLAMP;
+ case GL_CLAMP_TO_EDGE: return R300_TX_CLAMP_TO_EDGE;
+ case GL_CLAMP_TO_BORDER: return R300_TX_CLAMP_TO_BORDER;
+ case GL_MIRRORED_REPEAT: return R300_TX_REPEAT | R300_TX_MIRRORED;
+ case GL_MIRROR_CLAMP_EXT: return R300_TX_CLAMP | R300_TX_MIRRORED;
+ case GL_MIRROR_CLAMP_TO_EDGE_EXT: return R300_TX_CLAMP_TO_EDGE | R300_TX_MIRRORED;
+ case GL_MIRROR_CLAMP_TO_BORDER_EXT: return R300_TX_CLAMP_TO_BORDER | R300_TX_MIRRORED;
+ default:
+ _mesa_problem(NULL, "bad wrap mode in %s", __FUNCTION__);
+ return 0;
+ }
+}
+
+
+/**
+ * Update the cached hardware registers based on the current texture wrap modes.
+ *
+ * \param t Texture object whose wrap modes are to be set
+ */
+static void r300UpdateTexWrap(radeonTexObjPtr t)
+{
+ struct gl_texture_object *tObj = &t->base;
+
+ t->pp_txfilter &=
+ ~(R300_TX_WRAP_S_MASK | R300_TX_WRAP_T_MASK | R300_TX_WRAP_R_MASK);
+
+ t->pp_txfilter |= translate_wrap_mode(tObj->WrapS) << R300_TX_WRAP_S_SHIFT;
+
+ if (tObj->Target != GL_TEXTURE_1D) {
+ t->pp_txfilter |= translate_wrap_mode(tObj->WrapT) << R300_TX_WRAP_T_SHIFT;
+
+ if (tObj->Target == GL_TEXTURE_3D)
+ t->pp_txfilter |= translate_wrap_mode(tObj->WrapR) << R300_TX_WRAP_R_SHIFT;
+ }
+}
+
+static GLuint aniso_filter(GLfloat anisotropy)
+{
+ if (anisotropy >= 16.0) {
+ return R300_TX_MAX_ANISO_16_TO_1;
+ } else if (anisotropy >= 8.0) {
+ return R300_TX_MAX_ANISO_8_TO_1;
+ } else if (anisotropy >= 4.0) {
+ return R300_TX_MAX_ANISO_4_TO_1;
+ } else if (anisotropy >= 2.0) {
+ return R300_TX_MAX_ANISO_2_TO_1;
+ } else {
+ return R300_TX_MAX_ANISO_1_TO_1;
+ }
+}
+
+/**
+ * Set the texture magnification and minification modes.
+ *
+ * \param t Texture whose filter modes are to be set
+ * \param minf Texture minification mode
+ * \param magf Texture magnification mode
+ * \param anisotropy Maximum anisotropy level
+ */
+static void r300SetTexFilter(radeonTexObjPtr t, GLenum minf, GLenum magf, GLfloat anisotropy)
+{
+ /* Force revalidation to account for switches from/to mipmapping. */
+ t->validated = GL_FALSE;
+
+ t->pp_txfilter &= ~(R300_TX_MIN_FILTER_MASK | R300_TX_MIN_FILTER_MIP_MASK | R300_TX_MAG_FILTER_MASK | R300_TX_MAX_ANISO_MASK);
+ t->pp_txfilter_1 &= ~R300_EDGE_ANISO_EDGE_ONLY;
+
+ /* Note that EXT_texture_filter_anisotropic is extremely vague about
+ * how anisotropic filtering interacts with the "normal" filter modes.
+ * When anisotropic filtering is enabled, we override min and mag
+ * filter settings completely. This includes driconf's settings.
+ */
+ if (anisotropy >= 2.0 && (minf != GL_NEAREST) && (magf != GL_NEAREST)) {
+ t->pp_txfilter |= R300_TX_MAG_FILTER_ANISO
+ | R300_TX_MIN_FILTER_ANISO
+ | R300_TX_MIN_FILTER_MIP_LINEAR
+ | aniso_filter(anisotropy);
+ if (RADEON_DEBUG & RADEON_TEXTURE)
+ fprintf(stderr, "Using maximum anisotropy of %f\n", anisotropy);
+ return;
+ }
+
+ switch (minf) {
+ case GL_NEAREST:
+ t->pp_txfilter |= R300_TX_MIN_FILTER_NEAREST;
+ break;
+ case GL_LINEAR:
+ t->pp_txfilter |= R300_TX_MIN_FILTER_LINEAR;
+ break;
+ case GL_NEAREST_MIPMAP_NEAREST:
+ t->pp_txfilter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_NEAREST;
+ break;
+ case GL_NEAREST_MIPMAP_LINEAR:
+ t->pp_txfilter |= R300_TX_MIN_FILTER_NEAREST|R300_TX_MIN_FILTER_MIP_LINEAR;
+ break;
+ case GL_LINEAR_MIPMAP_NEAREST:
+ t->pp_txfilter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_NEAREST;
+ break;
+ case GL_LINEAR_MIPMAP_LINEAR:
+ t->pp_txfilter |= R300_TX_MIN_FILTER_LINEAR|R300_TX_MIN_FILTER_MIP_LINEAR;
+ break;
+ }
+
+ /* Note we don't have 3D mipmaps so only use the mag filter setting
+ * to set the 3D texture filter mode.
+ */
+ switch (magf) {
+ case GL_NEAREST:
+ t->pp_txfilter |= R300_TX_MAG_FILTER_NEAREST;
+ break;
+ case GL_LINEAR:
+ t->pp_txfilter |= R300_TX_MAG_FILTER_LINEAR;
+ break;
+ }
+}
+
+static void r300SetTexBorderColor(radeonTexObjPtr t, const GLfloat color[4])
+{
+ GLubyte c[4];
+ CLAMPED_FLOAT_TO_UBYTE(c[0], color[0]);
+ CLAMPED_FLOAT_TO_UBYTE(c[1], color[1]);
+ CLAMPED_FLOAT_TO_UBYTE(c[2], color[2]);
+ CLAMPED_FLOAT_TO_UBYTE(c[3], color[3]);
+ t->pp_border_color = PACK_COLOR_8888(c[3], c[0], c[1], c[2]);
+}
+
+/**
+ * Changes variables and flags for a state update, which will happen at the
+ * next UpdateTextureState
+ */
+
+static void r300TexParameter(GLcontext * ctx, GLenum target,
+ struct gl_texture_object *texObj,
+ GLenum pname, const GLfloat * params)
+{
+ radeonTexObj* t = radeon_tex_obj(texObj);
+ GLenum texBaseFormat;
+
+ if (RADEON_DEBUG & (RADEON_STATE | RADEON_TEXTURE)) {
+ fprintf(stderr, "%s( %s )\n", __FUNCTION__,
+ _mesa_lookup_enum_by_nr(pname));
+ }
+
+ switch (pname) {
+ case GL_TEXTURE_MIN_FILTER:
+ case GL_TEXTURE_MAG_FILTER:
+ case GL_TEXTURE_MAX_ANISOTROPY_EXT:
+ r300SetTexFilter(t, texObj->MinFilter, texObj->MagFilter, texObj->MaxAnisotropy);
+ break;
+
+ case GL_TEXTURE_WRAP_S:
+ case GL_TEXTURE_WRAP_T:
+ case GL_TEXTURE_WRAP_R:
+ r300UpdateTexWrap(t);
+ break;
+
+ case GL_TEXTURE_BORDER_COLOR:
+ r300SetTexBorderColor(t, texObj->BorderColor.f);
+ break;
+
+ case GL_TEXTURE_BASE_LEVEL:
+ case GL_TEXTURE_MAX_LEVEL:
+ case GL_TEXTURE_MIN_LOD:
+ case GL_TEXTURE_MAX_LOD:
+ t->validated = GL_FALSE;
+ break;
+
+ case GL_DEPTH_TEXTURE_MODE:
+ if (!texObj->Image[0][texObj->BaseLevel])
+ return;
+ texBaseFormat = texObj->Image[0][texObj->BaseLevel]->_BaseFormat;
+
+ if (texBaseFormat == GL_DEPTH_COMPONENT ||
+ texBaseFormat == GL_DEPTH_STENCIL) {
+ r300SetDepthTexMode(texObj);
+ break;
+ } else {
+ /* If the texture isn't a depth texture, changing this
+ * state won't cause any changes to the hardware.
+ * Don't force a flush of texture state.
+ */
+ return;
+ }
+
+ default:
+ return;
+ }
+}
+
+static void r300DeleteTexture(GLcontext * ctx, struct gl_texture_object *texObj)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ radeonTexObj* t = radeon_tex_obj(texObj);
+
+ if (RADEON_DEBUG & (RADEON_STATE | RADEON_TEXTURE)) {
+ fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__,
+ (void *)texObj,
+ _mesa_lookup_enum_by_nr(texObj->Target));
+ }
+
+ if (rmesa) {
+ int i;
+ struct radeon_bo *bo;
+ bo = !t->mt ? t->bo : t->mt->bo;
+ if (bo && radeon_bo_is_referenced_by_cs(bo, rmesa->radeon.cmdbuf.cs)) {
+ radeon_firevertices(&rmesa->radeon);
+ }
+
+ for(i = 0; i < R300_MAX_TEXTURE_UNITS; ++i)
+ if (rmesa->hw.textures[i] == t)
+ rmesa->hw.textures[i] = 0;
+ }
+
+ if (t->bo) {
+ radeon_bo_unref(t->bo);
+ t->bo = NULL;
+ }
+
+ radeon_miptree_unreference(&t->mt);
+
+ _mesa_delete_texture_object(ctx, texObj);
+}
+
+/**
+ * Allocate a new texture object.
+ * Called via ctx->Driver.NewTextureObject.
+ * Note: this function will be called during context creation to
+ * allocate the default texture objects.
+ * Fixup MaxAnisotropy according to user preference.
+ */
+static struct gl_texture_object *r300NewTextureObject(GLcontext * ctx,
+ GLuint name,
+ GLenum target)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ radeonTexObj* t = CALLOC_STRUCT(radeon_tex_obj);
+
+
+ if (RADEON_DEBUG & (RADEON_STATE | RADEON_TEXTURE)) {
+ fprintf(stderr, "%s( %p (target = %s) )\n", __FUNCTION__,
+ t, _mesa_lookup_enum_by_nr(target));
+ }
+
+ _mesa_initialize_texture_object(&t->base, name, target);
+ t->base.MaxAnisotropy = rmesa->radeon.initialMaxAnisotropy;
+
+ /* Initialize hardware state */
+ r300UpdateTexWrap(t);
+ r300SetTexFilter(t, t->base.MinFilter, t->base.MagFilter, t->base.MaxAnisotropy);
+ r300SetTexBorderColor(t, t->base.BorderColor.f);
+
+ return &t->base;
+}
+
+unsigned r300IsFormatRenderable(gl_format mesa_format)
+{
+ switch (mesa_format)
+ {
+ case MESA_FORMAT_RGB565:
+ case MESA_FORMAT_RGBA5551:
+ case MESA_FORMAT_RGBA8888:
+ case MESA_FORMAT_RGB565_REV:
+ case MESA_FORMAT_RGBA8888_REV:
+ case MESA_FORMAT_ARGB4444:
+ case MESA_FORMAT_ARGB1555:
+ case MESA_FORMAT_XRGB8888:
+ case MESA_FORMAT_ARGB8888:
+ case MESA_FORMAT_ARGB4444_REV:
+ case MESA_FORMAT_ARGB1555_REV:
+ case MESA_FORMAT_XRGB8888_REV:
+ case MESA_FORMAT_ARGB8888_REV:
+ case MESA_FORMAT_SRGBA8:
+ case MESA_FORMAT_SARGB8:
+ case MESA_FORMAT_SL8:
+ case MESA_FORMAT_A8:
+ case MESA_FORMAT_L8:
+ case MESA_FORMAT_I8:
+ case MESA_FORMAT_Z16:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+unsigned r500IsFormatRenderable(gl_format mesa_format)
+{
+ if (mesa_format == MESA_FORMAT_S8_Z24) {
+ return 1;
+ } else {
+ return r300IsFormatRenderable(mesa_format);
+ }
+}
+
+void r300InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *functions)
+{
+ /* Note: we only plug in the functions we implement in the driver
+ * since _mesa_init_driver_functions() was already called.
+ */
+ functions->NewTextureImage = radeonNewTextureImage;
+ functions->FreeTexImageData = radeonFreeTexImageData;
+ functions->MapTexture = radeonMapTexture;
+ functions->UnmapTexture = radeonUnmapTexture;
+
+ functions->ChooseTextureFormat = radeonChooseTextureFormat_mesa;
+ functions->TexImage1D = radeonTexImage1D;
+ functions->TexImage2D = radeonTexImage2D;
+ functions->TexImage3D = radeonTexImage3D;
+ functions->TexSubImage1D = radeonTexSubImage1D;
+ functions->TexSubImage2D = radeonTexSubImage2D;
+ functions->TexSubImage3D = radeonTexSubImage3D;
+ functions->GetTexImage = radeonGetTexImage;
+ functions->GetCompressedTexImage = radeonGetCompressedTexImage;
+ functions->NewTextureObject = r300NewTextureObject;
+ functions->DeleteTexture = r300DeleteTexture;
+ functions->IsTextureResident = driIsTextureResident;
+
+ functions->TexParameter = r300TexParameter;
+
+ functions->CompressedTexImage2D = radeonCompressedTexImage2D;
+ functions->CompressedTexSubImage2D = radeonCompressedTexSubImage2D;
+
+ if (radeon->radeonScreen->kernel_mm) {
+ functions->CopyTexImage2D = radeonCopyTexImage2D;
+ functions->CopyTexSubImage2D = radeonCopyTexSubImage2D;
+ }
+
+ functions->GenerateMipmap = radeonGenerateMipmap;
+
+ driInitTextureFormats();
+}
diff --git a/src/mesa/drivers/dri/r300/r300_tex.h b/src/mesa/drivers/dri/r300/r300_tex.h
new file mode 100644
index 0000000000..aca44cd766
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_tex.h
@@ -0,0 +1,59 @@
+/*
+Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ * Keith Whitwell <keith@tungstengraphics.com>
+ */
+
+#ifndef __r300_TEX_H__
+#define __r300_TEX_H__
+
+extern void r300SetDepthTexMode(struct gl_texture_object *tObj);
+
+extern void r300SetTexBuffer(__DRIcontext *pDRICtx, GLint target,
+ __DRIdrawable *dPriv);
+
+extern void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target,
+ GLint format, __DRIdrawable *dPriv);
+
+extern void r300SetTexOffset(__DRIcontext *pDRICtx, GLint texname,
+ unsigned long long offset, GLint depth,
+ GLuint pitch);
+
+extern GLboolean r300ValidateBuffers(GLcontext * ctx);
+
+extern void r300InitTextureFuncs(radeonContextPtr radeon, struct dd_function_table *functions);
+
+int32_t r300TranslateTexFormat(gl_format mesaFormat);
+
+unsigned r300IsFormatRenderable(gl_format mesaFormat);
+unsigned r500IsFormatRenderable(gl_format mesaFormat);
+
+#endif /* __r300_TEX_H__ */
diff --git a/src/mesa/drivers/dri/r300/r300_texstate.c b/src/mesa/drivers/dri/r300/r300_texstate.c
new file mode 100644
index 0000000000..4ba6740e3d
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_texstate.c
@@ -0,0 +1,524 @@
+/*
+Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/**
+ * \file
+ *
+ * \author Keith Whitwell <keith@tungstengraphics.com>
+ *
+ * \todo Enable R300 texture tiling code?
+ */
+
+#include "main/glheader.h"
+#include "main/imports.h"
+#include "main/context.h"
+#include "main/macros.h"
+#include "main/teximage.h"
+#include "main/texobj.h"
+#include "main/enums.h"
+#include "main/simple_list.h"
+
+#include "r300_context.h"
+#include "radeon_mipmap_tree.h"
+#include "r300_tex.h"
+#include "r300_reg.h"
+
+/*
+ * Note that the _REV formats are the same as the non-REV formats. This is
+ * because the REV and non-REV formats are identical as a byte string, but
+ * differ when accessed as 16-bit or 32-bit words depending on the endianness of
+ * the host. Since the textures are transferred to the R300 as a byte string
+ * (i.e. without any byte-swapping), the R300 sees the REV and non-REV formats
+ * identically. -- paulus
+ */
+
+int32_t r300TranslateTexFormat(gl_format mesaFormat)
+{
+ switch (mesaFormat)
+ {
+#ifdef MESA_LITTLE_ENDIAN
+ case MESA_FORMAT_RGBA8888:
+ return R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8);
+ case MESA_FORMAT_RGBA8888_REV:
+ return R300_EASY_TX_FORMAT(Z, Y, X, W, W8Z8Y8X8);
+ case MESA_FORMAT_ARGB8888:
+ return R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8);
+ case MESA_FORMAT_ARGB8888_REV:
+ return R300_EASY_TX_FORMAT(W, Z, Y, X, W8Z8Y8X8);
+#else
+ case MESA_FORMAT_RGBA8888:
+ return R300_EASY_TX_FORMAT(Z, Y, X, W, W8Z8Y8X8);
+ case MESA_FORMAT_RGBA8888_REV:
+ return R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8);
+ case MESA_FORMAT_ARGB8888:
+ return R300_EASY_TX_FORMAT(W, Z, Y, X, W8Z8Y8X8);
+ case MESA_FORMAT_ARGB8888_REV:
+ return R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8);
+#endif
+ case MESA_FORMAT_XRGB8888:
+ return R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8);
+ case MESA_FORMAT_RGB888:
+ return R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8);
+ case MESA_FORMAT_RGB565:
+ return R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5);
+ case MESA_FORMAT_RGB565_REV:
+ return R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5);
+ case MESA_FORMAT_ARGB4444:
+ return R300_EASY_TX_FORMAT(X, Y, Z, W, W4Z4Y4X4);
+ case MESA_FORMAT_ARGB4444_REV:
+ return R300_EASY_TX_FORMAT(X, Y, Z, W, W4Z4Y4X4);
+ case MESA_FORMAT_ARGB1555:
+ return R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5);
+ case MESA_FORMAT_ARGB1555_REV:
+ return R300_EASY_TX_FORMAT(X, Y, Z, W, W1Z5Y5X5);
+ case MESA_FORMAT_AL88:
+ return R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8);
+ case MESA_FORMAT_AL88_REV:
+ return R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8);
+ case MESA_FORMAT_RGB332:
+ return R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z3Y3X2);
+ case MESA_FORMAT_A8:
+ return R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X8);
+ case MESA_FORMAT_L8:
+ return R300_EASY_TX_FORMAT(X, X, X, ONE, X8);
+ case MESA_FORMAT_I8:
+ return R300_EASY_TX_FORMAT(X, X, X, X, X8);
+ case MESA_FORMAT_CI8:
+ return R300_EASY_TX_FORMAT(X, X, X, X, X8);
+ case MESA_FORMAT_YCBCR:
+ return R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8) | R300_TX_FORMAT_YUV_MODE;
+ case MESA_FORMAT_YCBCR_REV:
+ return R300_EASY_TX_FORMAT(X, Y, Z, ONE, G8R8_G8B8) | R300_TX_FORMAT_YUV_MODE;
+ case MESA_FORMAT_RGB_DXT1:
+ return R300_EASY_TX_FORMAT(X, Y, Z, ONE, DXT1);
+ case MESA_FORMAT_RGBA_DXT1:
+ return R300_EASY_TX_FORMAT(X, Y, Z, W, DXT1);
+ case MESA_FORMAT_RGBA_DXT3:
+ return R300_EASY_TX_FORMAT(X, Y, Z, W, DXT3);
+ case MESA_FORMAT_RGBA_DXT5:
+ return R300_EASY_TX_FORMAT(Y, Z, W, X, DXT5);
+ case MESA_FORMAT_RGBA_FLOAT32:
+ return R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R32G32B32A32);
+ case MESA_FORMAT_RGBA_FLOAT16:
+ return R300_EASY_TX_FORMAT(Z, Y, X, W, FL_R16G16B16A16);
+ case MESA_FORMAT_ALPHA_FLOAT32:
+ return R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, FL_I32);
+ case MESA_FORMAT_ALPHA_FLOAT16:
+ return R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, FL_I16);
+ case MESA_FORMAT_LUMINANCE_FLOAT32:
+ return R300_EASY_TX_FORMAT(X, X, X, ONE, FL_I32);
+ case MESA_FORMAT_LUMINANCE_FLOAT16:
+ return R300_EASY_TX_FORMAT(X, X, X, ONE, FL_I16);
+ case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT32:
+ return R300_EASY_TX_FORMAT(X, X, X, Y, FL_I32A32);
+ case MESA_FORMAT_LUMINANCE_ALPHA_FLOAT16:
+ return R300_EASY_TX_FORMAT(X, X, X, Y, FL_I16A16);
+ case MESA_FORMAT_INTENSITY_FLOAT32:
+ return R300_EASY_TX_FORMAT(X, X, X, X, FL_I32);
+ case MESA_FORMAT_INTENSITY_FLOAT16:
+ return R300_EASY_TX_FORMAT(X, X, X, X, FL_I16);
+ case MESA_FORMAT_Z16:
+ return R300_EASY_TX_FORMAT(X, X, X, X, X16);
+ case MESA_FORMAT_Z24_S8:
+ return R300_EASY_TX_FORMAT(X, X, X, X, X24_Y8);
+ case MESA_FORMAT_S8_Z24:
+ return R300_EASY_TX_FORMAT(Y, Y, Y, Y, X24_Y8);
+ case MESA_FORMAT_Z32:
+ return R300_EASY_TX_FORMAT(X, X, X, X, X32);
+ /* EXT_texture_sRGB */
+ case MESA_FORMAT_SRGBA8:
+ return R300_EASY_TX_FORMAT(Y, Z, W, X, W8Z8Y8X8) | R300_TX_FORMAT_GAMMA;
+ case MESA_FORMAT_SLA8:
+ return R300_EASY_TX_FORMAT(X, X, X, Y, Y8X8) | R300_TX_FORMAT_GAMMA;
+ case MESA_FORMAT_SL8:
+ return R300_EASY_TX_FORMAT(X, X, X, ONE, X8) | R300_TX_FORMAT_GAMMA;
+ case MESA_FORMAT_SRGB_DXT1:
+ return R300_EASY_TX_FORMAT(X, Y, Z, ONE, DXT1) | R300_TX_FORMAT_GAMMA;
+ case MESA_FORMAT_SRGBA_DXT1:
+ return R300_EASY_TX_FORMAT(X, Y, Z, W, DXT1) | R300_TX_FORMAT_GAMMA;
+ case MESA_FORMAT_SRGBA_DXT3:
+ return R300_EASY_TX_FORMAT(X, Y, Z, W, DXT3) | R300_TX_FORMAT_GAMMA;
+ case MESA_FORMAT_SRGBA_DXT5:
+ return R300_EASY_TX_FORMAT(Y, Z, W, X, DXT5) | R300_TX_FORMAT_GAMMA;
+ default:
+ return -1;
+ }
+};
+
+void r300SetDepthTexMode(struct gl_texture_object *tObj)
+{
+ static const GLuint formats[3][3] = {
+ {
+ R300_EASY_TX_FORMAT(X, X, X, ONE, X16),
+ R300_EASY_TX_FORMAT(X, X, X, X, X16),
+ R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X16),
+ },
+ {
+ R300_EASY_TX_FORMAT(Y, Y, Y, ONE, X24_Y8),
+ R300_EASY_TX_FORMAT(Y, Y, Y, Y, X24_Y8),
+ R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, Y, X24_Y8),
+ },
+ {
+ R300_EASY_TX_FORMAT(X, X, X, ONE, X32),
+ R300_EASY_TX_FORMAT(X, X, X, X, X32),
+ R300_EASY_TX_FORMAT(ZERO, ZERO, ZERO, X, X32),
+ },
+ };
+ const GLuint *format;
+ radeonTexObjPtr t;
+
+ if (!tObj)
+ return;
+
+ t = radeon_tex_obj(tObj);
+
+ switch (tObj->Image[0][tObj->BaseLevel]->TexFormat) {
+ case MESA_FORMAT_Z16:
+ format = formats[0];
+ break;
+ case MESA_FORMAT_S8_Z24:
+ format = formats[1];
+ break;
+ case MESA_FORMAT_Z32:
+ format = formats[2];
+ break;
+ default:
+ /* Error...which should have already been caught by higher
+ * levels of Mesa.
+ */
+ ASSERT(0);
+ return;
+ }
+
+ switch (tObj->DepthMode) {
+ case GL_LUMINANCE:
+ t->pp_txformat = format[0];
+ break;
+ case GL_INTENSITY:
+ t->pp_txformat = format[1];
+ break;
+ case GL_ALPHA:
+ t->pp_txformat = format[2];
+ break;
+ default:
+ /* Error...which should have already been caught by higher
+ * levels of Mesa.
+ */
+ ASSERT(0);
+ return;
+ }
+}
+
+
+/**
+ * Compute the cached hardware register values for the given texture object.
+ *
+ * \param rmesa Context pointer
+ * \param t the r300 texture object
+ */
+static void setup_hardware_state(r300ContextPtr rmesa, radeonTexObj *t)
+{
+ const struct gl_texture_image *firstImage;
+ firstImage = t->base.Image[0][t->minLod];
+
+ if (!t->image_override) {
+ if (firstImage->_BaseFormat == GL_DEPTH_COMPONENT) {
+ r300SetDepthTexMode(&t->base);
+ } else {
+ int32_t txformat = r300TranslateTexFormat(firstImage->TexFormat);
+ if (txformat < 0) {
+ _mesa_problem(rmesa->radeon.glCtx, "%s: Invalid format %s",
+ __FUNCTION__, _mesa_get_format_name(firstImage->TexFormat));
+ exit(1);
+ }
+ t->pp_txformat = (uint32_t) txformat;
+ }
+ }
+
+ if (t->image_override && t->bo)
+ return;
+
+ t->pp_txsize = (((R300_TX_WIDTHMASK_MASK & ((firstImage->Width - 1) << R300_TX_WIDTHMASK_SHIFT)))
+ | ((R300_TX_HEIGHTMASK_MASK & ((firstImage->Height - 1) << R300_TX_HEIGHTMASK_SHIFT)))
+ | ((R300_TX_DEPTHMASK_MASK & ((firstImage->DepthLog2) << R300_TX_DEPTHMASK_SHIFT)))
+ | ((R300_TX_MAX_MIP_LEVEL_MASK & ((t->maxLod - t->minLod) << R300_TX_MAX_MIP_LEVEL_SHIFT))));
+
+ t->tile_bits = 0;
+
+ if (t->base.Target == GL_TEXTURE_CUBE_MAP)
+ t->pp_txformat |= R300_TX_FORMAT_CUBIC_MAP;
+ if (t->base.Target == GL_TEXTURE_3D)
+ t->pp_txformat |= R300_TX_FORMAT_3D;
+
+
+ if (t->base.Target == GL_TEXTURE_RECTANGLE_NV) {
+ unsigned int align = (64 / _mesa_get_format_bytes(firstImage->TexFormat)) - 1;
+ t->pp_txsize |= R300_TX_SIZE_TXPITCH_EN;
+ if (!t->image_override)
+ t->pp_txpitch = ((firstImage->Width + align) & ~align) - 1;
+ }
+
+ if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+ if (firstImage->Width > 2048)
+ t->pp_txpitch |= R500_TXWIDTH_BIT11;
+ else
+ t->pp_txpitch &= ~R500_TXWIDTH_BIT11;
+ if (firstImage->Height > 2048)
+ t->pp_txpitch |= R500_TXHEIGHT_BIT11;
+ else
+ t->pp_txpitch &= ~R500_TXHEIGHT_BIT11;
+ }
+}
+
+/**
+ * Ensure the given texture is ready for rendering.
+ *
+ * Mostly this means populating the texture object's mipmap tree.
+ */
+static GLboolean r300_validate_texture(GLcontext * ctx, struct gl_texture_object *texObj)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ radeonTexObj *t = radeon_tex_obj(texObj);
+
+ if (!radeon_validate_texture_miptree(ctx, texObj))
+ return GL_FALSE;
+
+ /* Configure the hardware registers (more precisely, the cached version
+ * of the hardware registers). */
+ setup_hardware_state(rmesa, t);
+
+ t->validated = GL_TRUE;
+ return GL_TRUE;
+}
+
+/**
+ * Ensure all enabled and complete textures are uploaded along with any buffers being used.
+ */
+GLboolean r300ValidateBuffers(GLcontext * ctx)
+{
+ r300ContextPtr rmesa = R300_CONTEXT(ctx);
+ struct radeon_renderbuffer *rrb;
+ int i;
+ int ret;
+
+ radeon_cs_space_reset_bos(rmesa->radeon.cmdbuf.cs);
+
+ rrb = radeon_get_colorbuffer(&rmesa->radeon);
+ /* color buffer */
+ if (rrb && rrb->bo) {
+ radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs,
+ rrb->bo, 0,
+ RADEON_GEM_DOMAIN_VRAM);
+ }
+
+ /* depth buffer */
+ rrb = radeon_get_depthbuffer(&rmesa->radeon);
+ if (rrb && rrb->bo) {
+ radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs,
+ rrb->bo, 0,
+ RADEON_GEM_DOMAIN_VRAM);
+ }
+
+ for (i = 0; i < ctx->Const.MaxTextureImageUnits; ++i) {
+ radeonTexObj *t;
+
+ if (!ctx->Texture.Unit[i]._ReallyEnabled)
+ continue;
+
+ if (!r300_validate_texture(ctx, ctx->Texture.Unit[i]._Current)) {
+ _mesa_warning(ctx,
+ "failed to validate texture for unit %d.\n",
+ i);
+ }
+ t = radeon_tex_obj(ctx->Texture.Unit[i]._Current);
+ if (t->image_override && t->bo)
+ radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs,
+ t->bo,
+ RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
+ else if (t->mt->bo)
+ radeon_cs_space_add_persistent_bo(rmesa->radeon.cmdbuf.cs,
+ t->mt->bo,
+ RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0);
+ }
+
+ ret = radeon_cs_space_check_with_bo(rmesa->radeon.cmdbuf.cs, first_elem(&rmesa->radeon.dma.reserved)->bo, RADEON_GEM_DOMAIN_GTT, 0);
+ if (ret)
+ return GL_FALSE;
+ return GL_TRUE;
+}
+
+void r300SetTexOffset(__DRIcontext * pDRICtx, GLint texname,
+ unsigned long long offset, GLint depth, GLuint pitch)
+{
+ r300ContextPtr rmesa = pDRICtx->driverPrivate;
+ struct gl_texture_object *tObj =
+ _mesa_lookup_texture(rmesa->radeon.glCtx, texname);
+ radeonTexObjPtr t = radeon_tex_obj(tObj);
+ uint32_t pitch_val;
+
+ if (!tObj)
+ return;
+
+ t->image_override = GL_TRUE;
+
+ if (!offset)
+ return;
+
+ t->bo = NULL;
+ t->override_offset = offset;
+ t->pp_txpitch &= (1 << 13) -1;
+ pitch_val = pitch;
+
+ switch (depth) {
+ case 32:
+ t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8);
+ pitch_val /= 4;
+ break;
+ case 24:
+ default:
+ t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8);
+ pitch_val /= 4;
+ break;
+ case 16:
+ t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5);
+ pitch_val /= 2;
+ break;
+ }
+ pitch_val--;
+
+ t->pp_txpitch |= pitch_val;
+}
+
+void r300SetTexBuffer2(__DRIcontext *pDRICtx, GLint target, GLint texture_format, __DRIdrawable *dPriv)
+{
+ struct gl_texture_unit *texUnit;
+ struct gl_texture_object *texObj;
+ struct gl_texture_image *texImage;
+ struct radeon_renderbuffer *rb;
+ radeon_texture_image *rImage;
+ radeonContextPtr radeon;
+ r300ContextPtr rmesa;
+ struct radeon_framebuffer *rfb;
+ radeonTexObjPtr t;
+ uint32_t pitch_val;
+ uint32_t internalFormat, type, format;
+
+ type = GL_BGRA;
+ format = GL_UNSIGNED_BYTE;
+ internalFormat = (texture_format == __DRI_TEXTURE_FORMAT_RGB ? 3 : 4);
+
+ radeon = pDRICtx->driverPrivate;
+ rmesa = pDRICtx->driverPrivate;
+
+ rfb = dPriv->driverPrivate;
+ texUnit = &radeon->glCtx->Texture.Unit[radeon->glCtx->Texture.CurrentUnit];
+ texObj = _mesa_select_tex_object(radeon->glCtx, texUnit, target);
+ texImage = _mesa_get_tex_image(radeon->glCtx, texObj, target, 0);
+
+ rImage = get_radeon_texture_image(texImage);
+ t = radeon_tex_obj(texObj);
+ if (t == NULL) {
+ return;
+ }
+
+ radeon_update_renderbuffers(pDRICtx, dPriv, GL_TRUE);
+ rb = rfb->color_rb[0];
+ if (rb->bo == NULL) {
+ /* Failed to BO for the buffer */
+ return;
+ }
+
+ _mesa_lock_texture(radeon->glCtx, texObj);
+ if (t->bo) {
+ radeon_bo_unref(t->bo);
+ t->bo = NULL;
+ }
+ if (rImage->bo) {
+ radeon_bo_unref(rImage->bo);
+ rImage->bo = NULL;
+ }
+
+ radeon_miptree_unreference(&t->mt);
+ radeon_miptree_unreference(&rImage->mt);
+
+ _mesa_init_teximage_fields(radeon->glCtx, target, texImage,
+ rb->base.Width, rb->base.Height, 1, 0, rb->cpp);
+ texImage->RowStride = rb->pitch / rb->cpp;
+ rImage->bo = rb->bo;
+ radeon_bo_ref(rImage->bo);
+ t->bo = rb->bo;
+ radeon_bo_ref(t->bo);
+ t->tile_bits = 0;
+ t->image_override = GL_TRUE;
+ t->override_offset = 0;
+ t->pp_txpitch &= (1 << 13) -1;
+ pitch_val = rb->pitch;
+ switch (rb->cpp) {
+ case 4:
+ if (texture_format == __DRI_TEXTURE_FORMAT_RGB)
+ t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8);
+ else
+ t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, W, W8Z8Y8X8);
+ pitch_val /= 4;
+ break;
+ case 3:
+ default:
+ t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, W8Z8Y8X8);
+ pitch_val /= 4;
+ break;
+ case 2:
+ t->pp_txformat = R300_EASY_TX_FORMAT(X, Y, Z, ONE, Z5Y6X5);
+ pitch_val /= 2;
+ break;
+ }
+ pitch_val--;
+ t->pp_txsize = (((R300_TX_WIDTHMASK_MASK & ((rb->base.Width - 1) << R300_TX_WIDTHMASK_SHIFT)))
+ | ((R300_TX_HEIGHTMASK_MASK & ((rb->base.Height - 1) << R300_TX_HEIGHTMASK_SHIFT))));
+ t->pp_txsize |= R300_TX_SIZE_TXPITCH_EN;
+ t->pp_txpitch |= pitch_val;
+
+ if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515) {
+ if (rb->base.Width > 2048)
+ t->pp_txpitch |= R500_TXWIDTH_BIT11;
+ else
+ t->pp_txpitch &= ~R500_TXWIDTH_BIT11;
+ if (rb->base.Height > 2048)
+ t->pp_txpitch |= R500_TXHEIGHT_BIT11;
+ else
+ t->pp_txpitch &= ~R500_TXHEIGHT_BIT11;
+ }
+ t->validated = GL_TRUE;
+ _mesa_unlock_texture(radeon->glCtx, texObj);
+ return;
+}
+
+void r300SetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv)
+{
+ r300SetTexBuffer2(pDRICtx, target, __DRI_TEXTURE_FORMAT_RGBA, dPriv);
+}
diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c
new file mode 100644
index 0000000000..a1fe378029
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_vertprog.c
@@ -0,0 +1,414 @@
+/**************************************************************************
+
+Copyright (C) 2005 Aapo Tahkola <aet@rasterburn.org>
+Copyright (C) 2008 Oliver McFadden <z3ro.geek@gmail.com>
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/* Radeon R5xx Acceleration, Revision 1.2 */
+
+#include "main/glheader.h"
+#include "main/macros.h"
+#include "main/enums.h"
+#include "shader/program.h"
+#include "shader/programopt.h"
+#include "shader/prog_instruction.h"
+#include "shader/prog_parameter.h"
+#include "shader/prog_print.h"
+#include "shader/prog_statevars.h"
+#include "tnl/tnl.h"
+
+#include "compiler/radeon_compiler.h"
+#include "radeon_mesa_to_rc.h"
+#include "r300_context.h"
+#include "r300_fragprog_common.h"
+#include "r300_state.h"
+
+/**
+ * Write parameter array for the given vertex program into dst.
+ * Return the total number of components written.
+ */
+static int r300VertexProgUpdateParams(GLcontext * ctx, struct r300_vertex_program *vp, float *dst)
+{
+ int i;
+
+ if (vp->Base->IsNVProgram) {
+ _mesa_load_tracked_matrices(ctx);
+ } else {
+ if (vp->Base->Base.Parameters) {
+ _mesa_load_state_parameters(ctx, vp->Base->Base.Parameters);
+ }
+ }
+
+ for(i = 0; i < vp->code.constants.Count; ++i) {
+ const float * src = 0;
+ const struct rc_constant * constant = &vp->code.constants.Constants[i];
+
+ switch(constant->Type) {
+ case RC_CONSTANT_EXTERNAL:
+ if (vp->Base->IsNVProgram) {
+ src = ctx->VertexProgram.Parameters[constant->u.External];
+ } else {
+ src = vp->Base->Base.Parameters->ParameterValues[constant->u.External];
+ }
+ break;
+
+ case RC_CONSTANT_IMMEDIATE:
+ src = constant->u.Immediate;
+ break;
+ }
+
+ assert(src);
+ dst[4*i] = src[0];
+ dst[4*i + 1] = src[1];
+ dst[4*i + 2] = src[2];
+ dst[4*i + 3] = src[3];
+ }
+
+ return 4 * vp->code.constants.Count;
+}
+
+static GLbitfield compute_required_outputs(struct gl_vertex_program * vp, GLbitfield fpreads)
+{
+ GLbitfield outputs = 0;
+ int i;
+
+#define ADD_OUTPUT(fp_attr, vp_result) \
+ do { \
+ if (fpreads & (1 << (fp_attr))) \
+ outputs |= (1 << (vp_result)); \
+ } while (0)
+
+ ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0);
+ ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1);
+
+ for (i = 0; i <= 7; ++i) {
+ ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i);
+ }
+
+#undef ADD_OUTPUT
+
+ if ((fpreads & (1 << FRAG_ATTRIB_COL0)) &&
+ (vp->Base.OutputsWritten & (1 << VERT_RESULT_BFC0)))
+ outputs |= 1 << VERT_RESULT_BFC0;
+ if ((fpreads & (1 << FRAG_ATTRIB_COL1)) &&
+ (vp->Base.OutputsWritten & (1 << VERT_RESULT_BFC1)))
+ outputs |= 1 << VERT_RESULT_BFC1;
+
+ outputs |= 1 << VERT_RESULT_HPOS;
+ if (vp->Base.OutputsWritten & (1 << VERT_RESULT_PSIZ))
+ outputs |= 1 << VERT_RESULT_PSIZ;
+
+ return outputs;
+}
+
+
+static void t_inputs_outputs(struct r300_vertex_program_compiler * c)
+{
+ int i;
+ int cur_reg;
+ GLuint OutputsWritten, InputsRead;
+
+ OutputsWritten = c->Base.Program.OutputsWritten;
+ InputsRead = c->Base.Program.InputsRead;
+
+ cur_reg = -1;
+ for (i = 0; i < VERT_ATTRIB_MAX; i++) {
+ if (InputsRead & (1 << i))
+ c->code->inputs[i] = ++cur_reg;
+ else
+ c->code->inputs[i] = -1;
+ }
+
+ cur_reg = 0;
+ for (i = 0; i < VERT_RESULT_MAX; i++)
+ c->code->outputs[i] = -1;
+
+ assert(OutputsWritten & (1 << VERT_RESULT_HPOS));
+
+ if (OutputsWritten & (1 << VERT_RESULT_HPOS)) {
+ c->code->outputs[VERT_RESULT_HPOS] = cur_reg++;
+ }
+
+ if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
+ c->code->outputs[VERT_RESULT_PSIZ] = cur_reg++;
+ }
+
+ /* If we're writing back facing colors we need to send
+ * four colors to make front/back face colors selection work.
+ * If the vertex program doesn't write all 4 colors, lets
+ * pretend it does by skipping output index reg so the colors
+ * get written into appropriate output vectors.
+ */
+ if (OutputsWritten & (1 << VERT_RESULT_COL0)) {
+ c->code->outputs[VERT_RESULT_COL0] = cur_reg++;
+ } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
+ OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+ cur_reg++;
+ }
+
+ if (OutputsWritten & (1 << VERT_RESULT_COL1)) {
+ c->code->outputs[VERT_RESULT_COL1] = cur_reg++;
+ } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
+ OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+ cur_reg++;
+ }
+
+ if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
+ c->code->outputs[VERT_RESULT_BFC0] = cur_reg++;
+ } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+ cur_reg++;
+ }
+
+ if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
+ c->code->outputs[VERT_RESULT_BFC1] = cur_reg++;
+ } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
+ cur_reg++;
+ }
+
+ for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
+ if (OutputsWritten & (1 << i)) {
+ c->code->outputs[i] = cur_reg++;
+ }
+ }
+
+ if (OutputsWritten & (1 << VERT_RESULT_FOGC)) {
+ c->code->outputs[VERT_RESULT_FOGC] = cur_reg++;
+ }
+}
+
+/**
+ * The NV_vertex_program spec mandates that all registers be
+ * initialized to zero. We do this here unconditionally.
+ *
+ * \note We rely on dead-code elimination in the compiler.
+ */
+static void initialize_NV_registers(struct radeon_compiler * compiler)
+{
+ unsigned int reg;
+ struct rc_instruction * inst;
+
+ for(reg = 0; reg < 12; ++reg) {
+ inst = rc_insert_new_instruction(compiler, &compiler->Program.Instructions);
+ inst->U.I.Opcode = RC_OPCODE_MOV;
+ inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
+ inst->U.I.DstReg.Index = reg;
+ inst->U.I.SrcReg[0].File = RC_FILE_NONE;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
+ }
+
+ inst = rc_insert_new_instruction(compiler, &compiler->Program.Instructions);
+ inst->U.I.Opcode = RC_OPCODE_ARL;
+ inst->U.I.DstReg.File = RC_FILE_ADDRESS;
+ inst->U.I.DstReg.Index = 0;
+ inst->U.I.DstReg.WriteMask = WRITEMASK_X;
+ inst->U.I.SrcReg[0].File = RC_FILE_NONE;
+ inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
+}
+
+static struct r300_vertex_program *build_program(GLcontext *ctx,
+ struct r300_vertex_program_key *wanted_key,
+ const struct gl_vertex_program *mesa_vp)
+{
+ struct r300_vertex_program *vp;
+ struct r300_vertex_program_compiler compiler;
+
+ vp = calloc(1, sizeof(*vp));
+ vp->Base = _mesa_clone_vertex_program(ctx, mesa_vp);
+ memcpy(&vp->key, wanted_key, sizeof(vp->key));
+
+ rc_init(&compiler.Base);
+ compiler.Base.Debug = (RADEON_DEBUG & RADEON_VERTS) ? GL_TRUE : GL_FALSE;
+
+ compiler.code = &vp->code;
+ compiler.RequiredOutputs = compute_required_outputs(vp->Base, vp->key.FpReads);
+ compiler.SetHwInputOutput = &t_inputs_outputs;
+
+ if (compiler.Base.Debug) {
+ fprintf(stderr, "Initial vertex program:\n");
+ _mesa_print_program(&vp->Base->Base);
+ fflush(stderr);
+ }
+
+ if (mesa_vp->IsPositionInvariant) {
+ _mesa_insert_mvp_code(ctx, vp->Base);
+ }
+
+ radeon_mesa_to_rc_program(&compiler.Base, &vp->Base->Base);
+
+ if (mesa_vp->IsNVProgram)
+ initialize_NV_registers(&compiler.Base);
+
+ rc_move_output(&compiler.Base, VERT_RESULT_PSIZ, VERT_RESULT_PSIZ, WRITEMASK_X);
+
+ if (vp->key.WPosAttr != FRAG_ATTRIB_MAX) {
+ unsigned int vp_wpos_attr = vp->key.WPosAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0;
+
+ /* Set empty writemask for instructions writing to vp_wpos_attr
+ * before moving the wpos attr there.
+ * Such instructions will be removed by DCE.
+ */
+ rc_move_output(&compiler.Base, vp_wpos_attr, vp->key.WPosAttr, 0);
+ rc_copy_output(&compiler.Base, VERT_RESULT_HPOS, vp_wpos_attr);
+ }
+
+ if (vp->key.FogAttr != FRAG_ATTRIB_MAX) {
+ unsigned int vp_fog_attr = vp->key.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0;
+
+ /* Set empty writemask for instructions writing to vp_fog_attr
+ * before moving the fog attr there.
+ * Such instructions will be removed by DCE.
+ */
+ rc_move_output(&compiler.Base, vp_fog_attr, vp->key.FogAttr, 0);
+ rc_move_output(&compiler.Base, VERT_RESULT_FOGC, vp_fog_attr, WRITEMASK_X);
+ }
+
+ r3xx_compile_vertex_program(&compiler);
+
+ if (vp->code.constants.Count > ctx->Const.VertexProgram.MaxParameters) {
+ rc_error(&compiler.Base, "Program exceeds constant buffer size limit\n");
+ }
+
+ vp->error = compiler.Base.Error;
+
+ vp->Base->Base.InputsRead = vp->code.InputsRead;
+ vp->Base->Base.OutputsWritten = vp->code.OutputsWritten;
+
+ rc_destroy(&compiler.Base);
+
+ return vp;
+}
+
+struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx)
+{
+ r300ContextPtr r300 = R300_CONTEXT(ctx);
+ struct r300_vertex_program_key wanted_key = { 0 };
+ struct r300_vertex_program_cont *vpc;
+ struct r300_vertex_program *vp;
+
+ vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
+
+ if (!r300->selected_fp) {
+ /* This can happen when GetProgramiv is called to check
+ * whether the program runs natively.
+ *
+ * To be honest, this is not a very good solution,
+ * but solving the problem of reporting good values
+ * for those queries is tough anyway considering that
+ * we recompile vertex programs based on the precise
+ * fragment program that is in use.
+ */
+ r300SelectAndTranslateFragmentShader(ctx);
+ }
+
+ assert(r300->selected_fp);
+ wanted_key.FpReads = r300->selected_fp->InputsRead;
+ wanted_key.FogAttr = r300->selected_fp->fog_attr;
+ wanted_key.WPosAttr = r300->selected_fp->wpos_attr;
+
+ for (vp = vpc->progs; vp; vp = vp->next) {
+ if (memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) == 0) {
+ return r300->selected_vp = vp;
+ }
+ }
+
+ vp = build_program(ctx, &wanted_key, &vpc->mesa_program);
+ vp->next = vpc->progs;
+ vpc->progs = vp;
+
+ return r300->selected_vp = vp;
+}
+
+#define bump_vpu_count(ptr, new_count) do { \
+ drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr)); \
+ int _nc=(new_count)/4; \
+ if(_nc>_p->vpu.count)_p->vpu.count=_nc; \
+ } while(0)
+
+static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_vertex_program_code *code)
+{
+ int i;
+
+ assert((code->length > 0) && (code->length % 4 == 0));
+
+ switch ((dest >> 8) & 0xf) {
+ case 0:
+ R300_STATECHANGE(r300, vpi);
+ for (i = 0; i < code->length; i++)
+ r300->hw.vpi.cmd[R300_VPI_INSTR_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
+ bump_vpu_count(r300->hw.vpi.cmd, code->length + 4 * (dest & 0xff));
+ break;
+ case 2:
+ R300_STATECHANGE(r300, vpp);
+ for (i = 0; i < code->length; i++)
+ r300->hw.vpp.cmd[R300_VPP_PARAM_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
+ bump_vpu_count(r300->hw.vpp.cmd, code->length + 4 * (dest & 0xff));
+ break;
+ case 4:
+ R300_STATECHANGE(r300, vps);
+ for (i = 0; i < code->length; i++)
+ r300->hw.vps.cmd[1 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
+ bump_vpu_count(r300->hw.vps.cmd, code->length + 4 * (dest & 0xff));
+ break;
+ default:
+ fprintf(stderr, "%s:%s don't know how to handle dest %04x\n", __FILE__, __FUNCTION__, dest);
+ exit(-1);
+ }
+}
+
+void r300SetupVertexProgram(r300ContextPtr rmesa)
+{
+ GLcontext *ctx = rmesa->radeon.glCtx;
+ struct r300_vertex_program *prog = rmesa->selected_vp;
+ int inst_count = 0;
+ int param_count = 0;
+
+ /* Reset state, in case we don't use something */
+ ((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0;
+ ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0;
+ ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0;
+
+ R300_STATECHANGE(rmesa, vap_cntl);
+ R300_STATECHANGE(rmesa, vpp);
+ param_count = r300VertexProgUpdateParams(ctx, prog, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]);
+ if (!rmesa->radeon.radeonScreen->kernel_mm && param_count > 255 * 4) {
+ WARN_ONCE("Too many VP params, expect rendering errors\n");
+ }
+ /* Prevent the overflow (vpu.count is u8) */
+ bump_vpu_count(rmesa->hw.vpp.cmd, MIN2(255 * 4, param_count));
+ param_count /= 4;
+
+ r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->code));
+ inst_count = (prog->code.length / 4) - 1;
+
+ r300VapCntl(rmesa, _mesa_bitcount(prog->code.InputsRead),
+ _mesa_bitcount(prog->code.OutputsWritten), prog->code.num_temporaries);
+
+ R300_STATECHANGE(rmesa, pvs);
+ rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_FIRST_INST_SHIFT) | (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) |
+ (inst_count << R300_PVS_LAST_INST_SHIFT);
+
+ rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | ((param_count - 1) << R300_PVS_MAX_CONST_ADDR_SHIFT);
+ rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
+}
diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.h b/src/mesa/drivers/dri/r300/r300_vertprog.h
new file mode 100644
index 0000000000..ccec896be4
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/r300_vertprog.h
@@ -0,0 +1,11 @@
+#ifndef __R300_VERTPROG_H_
+#define __R300_VERTPROG_H_
+
+#include "r300_reg.h"
+
+
+void r300SetupVertexProgram(r300ContextPtr rmesa);
+
+struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx);
+
+#endif
diff --git a/src/mesa/drivers/dri/r300/radeon_bo.c b/src/mesa/drivers/dri/r300/radeon_bo.c
new file mode 120000
index 0000000000..9448ffee54
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_bo.c
@@ -0,0 +1 @@
+../radeon/radeon_bo.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_bo_int_drm.h b/src/mesa/drivers/dri/r300/radeon_bo_int_drm.h
new file mode 120000
index 0000000000..029450928b
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_bo_int_drm.h
@@ -0,0 +1 @@
+../radeon/radeon_bo_int_drm.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_bo_legacy.c b/src/mesa/drivers/dri/r300/radeon_bo_legacy.c
new file mode 120000
index 0000000000..79ad050e6b
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_bo_legacy.c
@@ -0,0 +1 @@
+../radeon/radeon_bo_legacy.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_bo_legacy.h b/src/mesa/drivers/dri/r300/radeon_bo_legacy.h
new file mode 120000
index 0000000000..83b0f7ffab
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_bo_legacy.h
@@ -0,0 +1 @@
+../radeon/radeon_bo_legacy.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_bocs_wrapper.h b/src/mesa/drivers/dri/r300/radeon_bocs_wrapper.h
new file mode 120000
index 0000000000..ca894b2443
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_bocs_wrapper.h
@@ -0,0 +1 @@
+../radeon/radeon_bocs_wrapper.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_buffer_objects.c b/src/mesa/drivers/dri/r300/radeon_buffer_objects.c
new file mode 120000
index 0000000000..f6a5f66470
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_buffer_objects.c
@@ -0,0 +1 @@
+../radeon/radeon_buffer_objects.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_buffer_objects.h b/src/mesa/drivers/dri/r300/radeon_buffer_objects.h
new file mode 120000
index 0000000000..2f134fd17b
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_buffer_objects.h
@@ -0,0 +1 @@
+../radeon/radeon_buffer_objects.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_chipset.h b/src/mesa/drivers/dri/r300/radeon_chipset.h
new file mode 120000
index 0000000000..eba99001ff
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_chipset.h
@@ -0,0 +1 @@
+../radeon/radeon_chipset.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_cmdbuf.h b/src/mesa/drivers/dri/r300/radeon_cmdbuf.h
new file mode 120000
index 0000000000..a799e1dc6d
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_cmdbuf.h
@@ -0,0 +1 @@
+../radeon/radeon_cmdbuf.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_common.c b/src/mesa/drivers/dri/r300/radeon_common.c
new file mode 120000
index 0000000000..67b19ba940
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_common.c
@@ -0,0 +1 @@
+../radeon/radeon_common.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_common.h b/src/mesa/drivers/dri/r300/radeon_common.h
new file mode 120000
index 0000000000..5bcb696a9f
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_common.h
@@ -0,0 +1 @@
+../radeon/radeon_common.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_common_context.c b/src/mesa/drivers/dri/r300/radeon_common_context.c
new file mode 120000
index 0000000000..86800f3819
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_common_context.c
@@ -0,0 +1 @@
+../radeon/radeon_common_context.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_common_context.h b/src/mesa/drivers/dri/r300/radeon_common_context.h
new file mode 120000
index 0000000000..4d66312550
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_common_context.h
@@ -0,0 +1 @@
+../radeon/radeon_common_context.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_context.h b/src/mesa/drivers/dri/r300/radeon_context.h
new file mode 100644
index 0000000000..da4812d323
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_context.h
@@ -0,0 +1,62 @@
+/**************************************************************************
+
+Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
+ VA Linux Systems Inc., Fremont, California.
+Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/*
+ * Authors:
+ * Gareth Hughes <gareth@valinux.com>
+ * Keith Whitwell <keith@tungstengraphics.com>
+ * Kevin E. Martin <martin@valinux.com>
+ * Nicolai Haehnle <prefect_@gmx.net>
+ */
+
+#ifndef __RADEON_CONTEXT_H__
+#define __RADEON_CONTEXT_H__
+
+#include "main/mtypes.h"
+#include "main/colormac.h"
+#include "radeon_screen.h"
+#include "drm.h"
+#include "dri_util.h"
+
+#include "radeon_screen.h"
+
+#define FALLBACK( radeon, bit, mode ) fprintf(stderr, "%s:%s\n", __LINE__, __FILE__);
+
+/* TCL fallbacks */
+extern void radeonTclFallback(GLcontext * ctx, GLuint bit, GLboolean mode);
+
+#define TCL_FALLBACK( ctx, bit, mode ) ;
+
+
+#endif /* __RADEON_CONTEXT_H__ */
diff --git a/src/mesa/drivers/dri/r300/radeon_cs.c b/src/mesa/drivers/dri/r300/radeon_cs.c
new file mode 120000
index 0000000000..66b7ad1eb0
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_cs.c
@@ -0,0 +1 @@
+../radeon/radeon_cs.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_cs_int_drm.h b/src/mesa/drivers/dri/r300/radeon_cs_int_drm.h
new file mode 120000
index 0000000000..462f5245d0
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_cs_int_drm.h
@@ -0,0 +1 @@
+../radeon/radeon_cs_int_drm.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_cs_legacy.c b/src/mesa/drivers/dri/r300/radeon_cs_legacy.c
new file mode 120000
index 0000000000..006720f8a4
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_cs_legacy.c
@@ -0,0 +1 @@
+../radeon/radeon_cs_legacy.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_cs_legacy.h b/src/mesa/drivers/dri/r300/radeon_cs_legacy.h
new file mode 120000
index 0000000000..a5f95e0a3d
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_cs_legacy.h
@@ -0,0 +1 @@
+../radeon/radeon_cs_legacy.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_cs_space_drm.c b/src/mesa/drivers/dri/r300/radeon_cs_space_drm.c
new file mode 120000
index 0000000000..c248ea7d1a
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_cs_space_drm.c
@@ -0,0 +1 @@
+../radeon/radeon_cs_space_drm.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_debug.c b/src/mesa/drivers/dri/r300/radeon_debug.c
new file mode 120000
index 0000000000..c98c2e074c
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_debug.c
@@ -0,0 +1 @@
+../radeon/radeon_debug.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_debug.h b/src/mesa/drivers/dri/r300/radeon_debug.h
new file mode 120000
index 0000000000..bd8aa28e89
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_debug.h
@@ -0,0 +1 @@
+../radeon/radeon_debug.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_dma.c b/src/mesa/drivers/dri/r300/radeon_dma.c
new file mode 120000
index 0000000000..43be000625
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_dma.c
@@ -0,0 +1 @@
+../radeon/radeon_dma.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_dma.h b/src/mesa/drivers/dri/r300/radeon_dma.h
new file mode 120000
index 0000000000..82e50634e3
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_dma.h
@@ -0,0 +1 @@
+../radeon/radeon_dma.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_fbo.c b/src/mesa/drivers/dri/r300/radeon_fbo.c
new file mode 120000
index 0000000000..0d738d8d78
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_fbo.c
@@ -0,0 +1 @@
+../radeon/radeon_fbo.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_lock.c b/src/mesa/drivers/dri/r300/radeon_lock.c
new file mode 120000
index 0000000000..af4108a8e3
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_lock.c
@@ -0,0 +1 @@
+../radeon/radeon_lock.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_lock.h b/src/mesa/drivers/dri/r300/radeon_lock.h
new file mode 120000
index 0000000000..64bdf94ee7
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_lock.h
@@ -0,0 +1 @@
+../radeon/radeon_lock.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c b/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c
new file mode 100644
index 0000000000..9f9dec840b
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.c
@@ -0,0 +1,223 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "radeon_mesa_to_rc.h"
+
+#include "main/mtypes.h"
+#include "shader/prog_instruction.h"
+#include "shader/prog_parameter.h"
+
+#include "compiler/radeon_compiler.h"
+#include "compiler/radeon_program.h"
+
+
+static rc_opcode translate_opcode(gl_inst_opcode opcode)
+{
+ switch(opcode) {
+ case OPCODE_NOP: return RC_OPCODE_NOP;
+ case OPCODE_ABS: return RC_OPCODE_ABS;
+ case OPCODE_ADD: return RC_OPCODE_ADD;
+ case OPCODE_ARL: return RC_OPCODE_ARL;
+ case OPCODE_CMP: return RC_OPCODE_CMP;
+ case OPCODE_COS: return RC_OPCODE_COS;
+ case OPCODE_DDX: return RC_OPCODE_DDX;
+ case OPCODE_DDY: return RC_OPCODE_DDY;
+ case OPCODE_DP3: return RC_OPCODE_DP3;
+ case OPCODE_DP4: return RC_OPCODE_DP4;
+ case OPCODE_DPH: return RC_OPCODE_DPH;
+ case OPCODE_DST: return RC_OPCODE_DST;
+ case OPCODE_EX2: return RC_OPCODE_EX2;
+ case OPCODE_EXP: return RC_OPCODE_EXP;
+ case OPCODE_FLR: return RC_OPCODE_FLR;
+ case OPCODE_FRC: return RC_OPCODE_FRC;
+ case OPCODE_KIL: return RC_OPCODE_KIL;
+ case OPCODE_LG2: return RC_OPCODE_LG2;
+ case OPCODE_LIT: return RC_OPCODE_LIT;
+ case OPCODE_LOG: return RC_OPCODE_LOG;
+ case OPCODE_LRP: return RC_OPCODE_LRP;
+ case OPCODE_MAD: return RC_OPCODE_MAD;
+ case OPCODE_MAX: return RC_OPCODE_MAX;
+ case OPCODE_MIN: return RC_OPCODE_MIN;
+ case OPCODE_MOV: return RC_OPCODE_MOV;
+ case OPCODE_MUL: return RC_OPCODE_MUL;
+ case OPCODE_POW: return RC_OPCODE_POW;
+ case OPCODE_RCP: return RC_OPCODE_RCP;
+ case OPCODE_RSQ: return RC_OPCODE_RSQ;
+ case OPCODE_SCS: return RC_OPCODE_SCS;
+ case OPCODE_SEQ: return RC_OPCODE_SEQ;
+ case OPCODE_SFL: return RC_OPCODE_SFL;
+ case OPCODE_SGE: return RC_OPCODE_SGE;
+ case OPCODE_SGT: return RC_OPCODE_SGT;
+ case OPCODE_SIN: return RC_OPCODE_SIN;
+ case OPCODE_SLE: return RC_OPCODE_SLE;
+ case OPCODE_SLT: return RC_OPCODE_SLT;
+ case OPCODE_SNE: return RC_OPCODE_SNE;
+ case OPCODE_SUB: return RC_OPCODE_SUB;
+ case OPCODE_SWZ: return RC_OPCODE_SWZ;
+ case OPCODE_TEX: return RC_OPCODE_TEX;
+ case OPCODE_TXB: return RC_OPCODE_TXB;
+ case OPCODE_TXD: return RC_OPCODE_TXD;
+ case OPCODE_TXL: return RC_OPCODE_TXL;
+ case OPCODE_TXP: return RC_OPCODE_TXP;
+ case OPCODE_XPD: return RC_OPCODE_XPD;
+ default: return RC_OPCODE_ILLEGAL_OPCODE;
+ }
+}
+
+static rc_saturate_mode translate_saturate(unsigned int saturate)
+{
+ switch(saturate) {
+ default:
+ case SATURATE_OFF: return RC_SATURATE_NONE;
+ case SATURATE_ZERO_ONE: return RC_SATURATE_ZERO_ONE;
+ }
+}
+
+static rc_register_file translate_register_file(unsigned int file)
+{
+ switch(file) {
+ case PROGRAM_TEMPORARY: return RC_FILE_TEMPORARY;
+ case PROGRAM_INPUT: return RC_FILE_INPUT;
+ case PROGRAM_OUTPUT: return RC_FILE_OUTPUT;
+ case PROGRAM_LOCAL_PARAM:
+ case PROGRAM_ENV_PARAM:
+ case PROGRAM_STATE_VAR:
+ case PROGRAM_NAMED_PARAM:
+ case PROGRAM_CONSTANT:
+ case PROGRAM_UNIFORM: return RC_FILE_CONSTANT;
+ case PROGRAM_ADDRESS: return RC_FILE_ADDRESS;
+ default: return RC_FILE_NONE;
+ }
+}
+
+static void translate_srcreg(struct rc_src_register * dest, struct prog_src_register * src)
+{
+ dest->File = translate_register_file(src->File);
+ dest->Index = src->Index;
+ dest->RelAddr = src->RelAddr;
+ dest->Swizzle = src->Swizzle;
+ dest->Abs = src->Abs;
+ dest->Negate = src->Negate;
+}
+
+static void translate_dstreg(struct rc_dst_register * dest, struct prog_dst_register * src)
+{
+ dest->File = translate_register_file(src->File);
+ dest->Index = src->Index;
+ dest->RelAddr = src->RelAddr;
+ dest->WriteMask = src->WriteMask;
+}
+
+static rc_texture_target translate_tex_target(gl_texture_index target)
+{
+ switch(target) {
+ case TEXTURE_2D_ARRAY_INDEX: return RC_TEXTURE_2D_ARRAY;
+ case TEXTURE_1D_ARRAY_INDEX: return RC_TEXTURE_1D_ARRAY;
+ case TEXTURE_CUBE_INDEX: return RC_TEXTURE_CUBE;
+ case TEXTURE_3D_INDEX: return RC_TEXTURE_3D;
+ case TEXTURE_RECT_INDEX: return RC_TEXTURE_RECT;
+ default:
+ case TEXTURE_2D_INDEX: return RC_TEXTURE_2D;
+ case TEXTURE_1D_INDEX: return RC_TEXTURE_1D;
+ }
+}
+
+static void translate_instruction(struct radeon_compiler * c,
+ struct rc_instruction * dest, struct prog_instruction * src)
+{
+ const struct rc_opcode_info * opcode;
+ unsigned int i;
+
+ dest->U.I.Opcode = translate_opcode(src->Opcode);
+ if (dest->U.I.Opcode == RC_OPCODE_ILLEGAL_OPCODE) {
+ rc_error(c, "Unsupported opcode %i\n", src->Opcode);
+ return;
+ }
+ dest->U.I.SaturateMode = translate_saturate(src->SaturateMode);
+
+ opcode = rc_get_opcode_info(dest->U.I.Opcode);
+
+ for(i = 0; i < opcode->NumSrcRegs; ++i)
+ translate_srcreg(&dest->U.I.SrcReg[i], &src->SrcReg[i]);
+
+ if (opcode->HasDstReg)
+ translate_dstreg(&dest->U.I.DstReg, &src->DstReg);
+
+ if (opcode->HasTexture) {
+ dest->U.I.TexSrcUnit = src->TexSrcUnit;
+ dest->U.I.TexSrcTarget = translate_tex_target(src->TexSrcTarget);
+ dest->U.I.TexShadow = src->TexShadow;
+ }
+}
+
+void radeon_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program)
+{
+ struct prog_instruction *source;
+ unsigned int i;
+
+ for(source = program->Instructions; source->Opcode != OPCODE_END; ++source) {
+ struct rc_instruction * dest = rc_insert_new_instruction(c, c->Program.Instructions.Prev);
+ translate_instruction(c, dest, source);
+ }
+
+ c->Program.ShadowSamplers = program->ShadowSamplers;
+ c->Program.InputsRead = program->InputsRead;
+ c->Program.OutputsWritten = program->OutputsWritten;
+
+ int isNVProgram = 0;
+
+ if (program->Target == GL_VERTEX_PROGRAM_ARB) {
+ struct gl_vertex_program * vp = (struct gl_vertex_program *) program;
+ isNVProgram = vp->IsNVProgram;
+ }
+
+ if (isNVProgram) {
+ /* NV_vertex_program has a fixed-sized constant environment.
+ * This could be handled more efficiently for programs that
+ * do not use relative addressing.
+ */
+ for(i = 0; i < 96; ++i) {
+ struct rc_constant constant;
+
+ constant.Type = RC_CONSTANT_EXTERNAL;
+ constant.Size = 4;
+ constant.u.External = i;
+
+ rc_constants_add(&c->Program.Constants, &constant);
+ }
+ } else {
+ for(i = 0; i < program->Parameters->NumParameters; ++i) {
+ struct rc_constant constant;
+
+ constant.Type = RC_CONSTANT_EXTERNAL;
+ constant.Size = 4;
+ constant.u.External = i;
+
+ rc_constants_add(&c->Program.Constants, &constant);
+ }
+ }
+}
diff --git a/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.h b/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.h
new file mode 100644
index 0000000000..9511a04f36
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_mesa_to_rc.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2009 Nicolai Haehnle.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef RADEON_MESA_TO_RC_H
+#define RADEON_MESA_TO_RC_H
+
+struct gl_program;
+struct radeon_compiler;
+
+void radeon_mesa_to_rc_program(struct radeon_compiler * c, struct gl_program * program);
+
+#endif /* RADEON_MESA_TO_RC_H */
diff --git a/src/mesa/drivers/dri/r300/radeon_mipmap_tree.c b/src/mesa/drivers/dri/r300/radeon_mipmap_tree.c
new file mode 120000
index 0000000000..31c0cfbe94
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_mipmap_tree.c
@@ -0,0 +1 @@
+../radeon/radeon_mipmap_tree.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_mipmap_tree.h b/src/mesa/drivers/dri/r300/radeon_mipmap_tree.h
new file mode 120000
index 0000000000..254d50cf8c
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_mipmap_tree.h
@@ -0,0 +1 @@
+../radeon/radeon_mipmap_tree.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_pixel_read.c b/src/mesa/drivers/dri/r300/radeon_pixel_read.c
new file mode 120000
index 0000000000..3b03803126
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_pixel_read.c
@@ -0,0 +1 @@
+../radeon/radeon_pixel_read.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_queryobj.c b/src/mesa/drivers/dri/r300/radeon_queryobj.c
new file mode 120000
index 0000000000..1d6ebc1c48
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_queryobj.c
@@ -0,0 +1 @@
+../radeon/radeon_queryobj.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_queryobj.h b/src/mesa/drivers/dri/r300/radeon_queryobj.h
new file mode 120000
index 0000000000..8f6f842b0a
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_queryobj.h
@@ -0,0 +1 @@
+../radeon/radeon_queryobj.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_screen.c b/src/mesa/drivers/dri/r300/radeon_screen.c
new file mode 120000
index 0000000000..86161118dd
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_screen.c
@@ -0,0 +1 @@
+../radeon/radeon_screen.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_screen.h b/src/mesa/drivers/dri/r300/radeon_screen.h
new file mode 120000
index 0000000000..23bb6bd459
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_screen.h
@@ -0,0 +1 @@
+../radeon/radeon_screen.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_span.c b/src/mesa/drivers/dri/r300/radeon_span.c
new file mode 120000
index 0000000000..232868c4c9
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_span.c
@@ -0,0 +1 @@
+../radeon/radeon_span.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_span.h b/src/mesa/drivers/dri/r300/radeon_span.h
new file mode 120000
index 0000000000..f9d634508c
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_span.h
@@ -0,0 +1 @@
+../radeon/radeon_span.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_tex_copy.c b/src/mesa/drivers/dri/r300/radeon_tex_copy.c
new file mode 120000
index 0000000000..dfa5ba34e6
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_tex_copy.c
@@ -0,0 +1 @@
+../radeon/radeon_tex_copy.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_tex_getimage.c b/src/mesa/drivers/dri/r300/radeon_tex_getimage.c
new file mode 120000
index 0000000000..d9836d7326
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_tex_getimage.c
@@ -0,0 +1 @@
+../radeon/radeon_tex_getimage.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_texture.c b/src/mesa/drivers/dri/r300/radeon_texture.c
new file mode 120000
index 0000000000..a822710915
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_texture.c
@@ -0,0 +1 @@
+../radeon/radeon_texture.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_texture.h b/src/mesa/drivers/dri/r300/radeon_texture.h
new file mode 120000
index 0000000000..17fac3d5ea
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_texture.h
@@ -0,0 +1 @@
+../radeon/radeon_texture.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_tile.c b/src/mesa/drivers/dri/r300/radeon_tile.c
new file mode 120000
index 0000000000..d4bfe27da6
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_tile.c
@@ -0,0 +1 @@
+../radeon/radeon_tile.c \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/radeon_tile.h b/src/mesa/drivers/dri/r300/radeon_tile.h
new file mode 120000
index 0000000000..31074c581e
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/radeon_tile.h
@@ -0,0 +1 @@
+../radeon/radeon_tile.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/server/radeon.h b/src/mesa/drivers/dri/r300/server/radeon.h
new file mode 120000
index 0000000000..81274a54f1
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/server/radeon.h
@@ -0,0 +1 @@
+../../radeon/server/radeon.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/server/radeon_dri.h b/src/mesa/drivers/dri/r300/server/radeon_dri.h
new file mode 120000
index 0000000000..27c591d3c9
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/server/radeon_dri.h
@@ -0,0 +1 @@
+../../radeon/server/radeon_dri.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/server/radeon_macros.h b/src/mesa/drivers/dri/r300/server/radeon_macros.h
new file mode 120000
index 0000000000..c56cd735b8
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/server/radeon_macros.h
@@ -0,0 +1 @@
+../../radeon/server/radeon_macros.h \ No newline at end of file
diff --git a/src/mesa/drivers/dri/r300/server/radeon_reg.h b/src/mesa/drivers/dri/r300/server/radeon_reg.h
new file mode 120000
index 0000000000..e2349dcb68
--- /dev/null
+++ b/src/mesa/drivers/dri/r300/server/radeon_reg.h
@@ -0,0 +1 @@
+../../radeon/server/radeon_reg.h \ No newline at end of file