summaryrefslogtreecommitdiff
path: root/src/gallium/drivers/r300
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/r300')
-rw-r--r--src/gallium/drivers/r300/Makefile43
-rw-r--r--src/gallium/drivers/r300/SConscript43
-rw-r--r--src/gallium/drivers/r300/r300_blit.c252
-rw-r--r--src/gallium/drivers/r300/r300_cb.h142
-rw-r--r--src/gallium/drivers/r300/r300_chipset.c380
-rw-r--r--src/gallium/drivers/r300/r300_chipset.h103
-rw-r--r--src/gallium/drivers/r300/r300_context.c329
-rw-r--r--src/gallium/drivers/r300/r300_context.h591
-rw-r--r--src/gallium/drivers/r300/r300_cs.h152
-rw-r--r--src/gallium/drivers/r300/r300_debug.c132
-rw-r--r--src/gallium/drivers/r300/r300_defines.h47
-rw-r--r--src/gallium/drivers/r300/r300_emit.c1042
-rw-r--r--src/gallium/drivers/r300/r300_emit.h106
-rw-r--r--src/gallium/drivers/r300/r300_flush.c89
-rw-r--r--src/gallium/drivers/r300/r300_fs.c506
-rw-r--r--src/gallium/drivers/r300/r300_fs.h84
-rw-r--r--src/gallium/drivers/r300/r300_hyperz.c122
-rw-r--r--src/gallium/drivers/r300/r300_hyperz.h30
-rw-r--r--src/gallium/drivers/r300/r300_query.c231
-rw-r--r--src/gallium/drivers/r300/r300_reg.h3491
-rw-r--r--src/gallium/drivers/r300/r300_render.c1073
-rw-r--r--src/gallium/drivers/r300/r300_render_stencilref.c158
-rw-r--r--src/gallium/drivers/r300/r300_render_translate.c322
-rw-r--r--src/gallium/drivers/r300/r300_resource.c73
-rw-r--r--src/gallium/drivers/r300/r300_screen.c431
-rw-r--r--src/gallium/drivers/r300/r300_screen.h104
-rw-r--r--src/gallium/drivers/r300/r300_screen_buffer.c314
-rw-r--r--src/gallium/drivers/r300/r300_screen_buffer.h119
-rw-r--r--src/gallium/drivers/r300/r300_shader_semantics.h66
-rw-r--r--src/gallium/drivers/r300/r300_state.c1750
-rw-r--r--src/gallium/drivers/r300/r300_state_derived.c684
-rw-r--r--src/gallium/drivers/r300/r300_state_derived.h30
-rw-r--r--src/gallium/drivers/r300/r300_state_inlines.h454
-rw-r--r--src/gallium/drivers/r300/r300_state_invariant.c111
-rw-r--r--src/gallium/drivers/r300/r300_state_invariant.h31
-rw-r--r--src/gallium/drivers/r300/r300_texture.c1149
-rw-r--r--src/gallium/drivers/r300/r300_texture.h78
-rw-r--r--src/gallium/drivers/r300/r300_tgsi_to_rc.c383
-rw-r--r--src/gallium/drivers/r300/r300_tgsi_to_rc.h54
-rw-r--r--src/gallium/drivers/r300/r300_transfer.c271
-rw-r--r--src/gallium/drivers/r300/r300_transfer.h51
-rw-r--r--src/gallium/drivers/r300/r300_vs.c252
-rw-r--r--src/gallium/drivers/r300/r300_vs.h67
-rw-r--r--src/gallium/drivers/r300/r300_vs_draw.c358
-rw-r--r--src/gallium/drivers/r300/r300_winsys.h168
45 files changed, 16466 insertions, 0 deletions
diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile
new file mode 100644
index 0000000000..dd897f6072
--- /dev/null
+++ b/src/gallium/drivers/r300/Makefile
@@ -0,0 +1,43 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = r300
+
+C_SOURCES = \
+ r300_blit.c \
+ r300_chipset.c \
+ r300_context.c \
+ r300_debug.c \
+ r300_emit.c \
+ r300_flush.c \
+ r300_fs.c \
+ r300_hyperz.c \
+ r300_query.c \
+ r300_render.c \
+ r300_render_stencilref.c \
+ r300_render_translate.c \
+ r300_resource.c \
+ r300_screen.c \
+ r300_screen_buffer.c \
+ r300_state.c \
+ r300_state_derived.c \
+ r300_state_invariant.c \
+ r300_vs.c \
+ r300_vs_draw.c \
+ r300_texture.c \
+ r300_tgsi_to_rc.c \
+ r300_transfer.c
+
+LIBRARY_INCLUDES = \
+ -I$(TOP)/src/mesa/drivers/dri/r300/compiler \
+ -I$(TOP)/src/gallium/winsys/drm/radeon/core
+
+COMPILER_ARCHIVE = $(TOP)/src/mesa/drivers/dri/r300/compiler/libr300compiler.a
+
+EXTRA_OBJECTS = \
+ $(COMPILER_ARCHIVE)
+
+include ../../Makefile.template
+
+$(COMPILER_ARCHIVE):
+ $(MAKE) -C $(TOP)/src/mesa/drivers/dri/r300/compiler
diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript
new file mode 100644
index 0000000000..ee19e9d278
--- /dev/null
+++ b/src/gallium/drivers/r300/SConscript
@@ -0,0 +1,43 @@
+Import('*')
+
+r300compiler = SConscript('#/src/mesa/drivers/dri/r300/compiler/SConscript')
+
+env = env.Clone()
+# add the paths for r300compiler
+env.Append(CPPPATH = [
+ '#/src/mesa/drivers/dri/r300/compiler',
+ '#/src/gallium/winsys/drm/radeon/core',
+ '#/include',
+ '#/src/mesa',
+])
+
+r300 = env.ConvenienceLibrary(
+ target = 'r300',
+ source = [
+ 'r300_blit.c',
+ 'r300_chipset.c',
+ 'r300_context.c',
+ 'r300_debug.c',
+ 'r300_emit.c',
+ 'r300_flush.c',
+ 'r300_fs.c',
+ 'r300_hyperz.c',
+ 'r300_query.c',
+ 'r300_render.c',
+ 'r300_render_stencilref.c',
+ 'r300_render_translate.c',
+ 'r300_resource.c',
+ 'r300_screen.c',
+ 'r300_screen_buffer.c',
+ 'r300_state.c',
+ 'r300_state_derived.c',
+ 'r300_state_invariant.c',
+ 'r300_vs.c',
+ 'r300_vs_draw.c',
+ 'r300_texture.c',
+ 'r300_tgsi_to_rc.c',
+ 'r300_transfer.c',
+ ] + r300compiler) + r300compiler
+
+Export('r300')
+
diff --git a/src/gallium/drivers/r300/r300_blit.c b/src/gallium/drivers/r300/r300_blit.c
new file mode 100644
index 0000000000..2a47701291
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_blit.c
@@ -0,0 +1,252 @@
+/*
+ * Copyright 2009 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "r300_context.h"
+#include "r300_texture.h"
+
+#include "util/u_format.h"
+
+enum r300_blitter_op
+{
+ R300_CLEAR,
+ R300_CLEAR_SURFACE,
+ R300_COPY
+};
+
+static void r300_blitter_begin(struct r300_context* r300, enum r300_blitter_op op)
+{
+ if (r300->query_current) {
+ r300->blitter_saved_query = r300->query_current;
+ r300_stop_query(r300);
+ }
+
+ /* Yeah we have to save all those states to ensure the blitter operation
+ * is really transparent. The states will be restored by the blitter once
+ * copying is done. */
+ util_blitter_save_blend(r300->blitter, r300->blend_state.state);
+ util_blitter_save_depth_stencil_alpha(r300->blitter, r300->dsa_state.state);
+ util_blitter_save_stencil_ref(r300->blitter, &(r300->stencil_ref));
+ util_blitter_save_rasterizer(r300->blitter, r300->rs_state.state);
+ util_blitter_save_fragment_shader(r300->blitter, r300->fs.state);
+ util_blitter_save_vertex_shader(r300->blitter, r300->vs_state.state);
+ util_blitter_save_viewport(r300->blitter, &r300->viewport);
+ util_blitter_save_clip(r300->blitter, (struct pipe_clip_state*)r300->clip_state.state);
+ util_blitter_save_vertex_elements(r300->blitter, r300->velems);
+ util_blitter_save_vertex_buffers(r300->blitter, r300->vertex_buffer_count,
+ r300->vertex_buffer);
+
+ if (op & (R300_CLEAR_SURFACE | R300_COPY))
+ util_blitter_save_framebuffer(r300->blitter, r300->fb_state.state);
+
+ if (op & R300_COPY) {
+ struct r300_textures_state* state =
+ (struct r300_textures_state*)r300->textures_state.state;
+
+ util_blitter_save_fragment_sampler_states(
+ r300->blitter, state->sampler_state_count,
+ (void**)state->sampler_states);
+
+ util_blitter_save_fragment_sampler_views(
+ r300->blitter, state->sampler_view_count,
+ (struct pipe_sampler_view**)state->sampler_views);
+ }
+}
+
+static void r300_blitter_end(struct r300_context *r300)
+{
+ if (r300->blitter_saved_query) {
+ r300_resume_query(r300, r300->blitter_saved_query);
+ r300->blitter_saved_query = NULL;
+ }
+}
+
+/* Clear currently bound buffers. */
+static void r300_clear(struct pipe_context* pipe,
+ unsigned buffers,
+ const float* rgba,
+ double depth,
+ unsigned stencil)
+{
+ /* XXX Implement fastfill.
+ *
+ * If fastfill is enabled, a few facts should be considered:
+ *
+ * 1) Zbuffer must be micro-tiled and whole microtiles must be
+ * written.
+ *
+ * 2) ZB_DEPTHCLEARVALUE is used to clear a zbuffer and Z Mask must be
+ * equal to 0.
+ *
+ * 3) For 16-bit integer buffering, compression causes a hung with one or
+ * two samples and should not be used.
+ *
+ * 4) Fastfill must not be used if reading of compressed Z data is disabled
+ * and writing of compressed Z data is enabled (RD/WR_COMP_ENABLE),
+ * i.e. it cannot be used to compress the zbuffer.
+ * (what the hell does that mean and how does it fit in clearing
+ * the buffers?)
+ *
+ * - Marek
+ */
+
+ struct r300_context* r300 = r300_context(pipe);
+ struct pipe_framebuffer_state* fb =
+ (struct pipe_framebuffer_state*)r300->fb_state.state;
+
+ r300_blitter_begin(r300, R300_CLEAR);
+ util_blitter_clear(r300->blitter,
+ fb->width,
+ fb->height,
+ fb->nr_cbufs,
+ buffers, rgba, depth, stencil);
+ r300_blitter_end(r300);
+}
+
+/* Clear a region of a color surface to a constant value. */
+static void r300_clear_render_target(struct pipe_context *pipe,
+ struct pipe_surface *dst,
+ const float *rgba,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height)
+{
+ struct r300_context *r300 = r300_context(pipe);
+
+ r300_blitter_begin(r300, R300_CLEAR_SURFACE);
+ util_blitter_clear_render_target(r300->blitter, dst, rgba,
+ dstx, dsty, width, height);
+ r300_blitter_end(r300);
+}
+
+/* Clear a region of a depth stencil surface. */
+static void r300_clear_depth_stencil(struct pipe_context *pipe,
+ struct pipe_surface *dst,
+ unsigned clear_flags,
+ double depth,
+ unsigned stencil,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height)
+{
+ struct r300_context *r300 = r300_context(pipe);
+
+ r300_blitter_begin(r300, R300_CLEAR_SURFACE);
+ util_blitter_clear_depth_stencil(r300->blitter, dst, clear_flags, depth, stencil,
+ dstx, dsty, width, height);
+ r300_blitter_end(r300);
+}
+
+/* Copy a block of pixels from one surface to another using HW. */
+static void r300_hw_copy_region(struct pipe_context* pipe,
+ struct pipe_resource *dst,
+ struct pipe_subresource subdst,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src,
+ struct pipe_subresource subsrc,
+ unsigned srcx, unsigned srcy, unsigned srcz,
+ unsigned width, unsigned height)
+{
+ struct r300_context* r300 = r300_context(pipe);
+
+ r300_blitter_begin(r300, R300_COPY);
+ util_blitter_copy_region(r300->blitter, dst, subdst, dstx, dsty, dstz,
+ src, subsrc, srcx, srcy, srcz, width, height,
+ TRUE);
+ r300_blitter_end(r300);
+}
+
+/* Copy a block of pixels from one surface to another. */
+static void r300_resource_copy_region(struct pipe_context *pipe,
+ struct pipe_resource *dst,
+ struct pipe_subresource subdst,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src,
+ struct pipe_subresource subsrc,
+ unsigned srcx, unsigned srcy, unsigned srcz,
+ unsigned width, unsigned height)
+{
+ enum pipe_format old_format = dst->format;
+ enum pipe_format new_format = old_format;
+
+ if (dst->format != src->format) {
+ debug_printf("r300: Implementation error: Format mismatch in %s\n"
+ " : src: %s dst: %s\n", __FUNCTION__,
+ util_format_short_name(src->format),
+ util_format_short_name(dst->format));
+ debug_assert(0);
+ }
+
+ if (!pipe->screen->is_format_supported(pipe->screen,
+ old_format, src->target,
+ src->nr_samples,
+ PIPE_BIND_RENDER_TARGET |
+ PIPE_BIND_SAMPLER_VIEW, 0) &&
+ util_format_is_plain(old_format)) {
+ switch (util_format_get_blocksize(old_format)) {
+ case 1:
+ new_format = PIPE_FORMAT_I8_UNORM;
+ break;
+ case 2:
+ new_format = PIPE_FORMAT_B4G4R4A4_UNORM;
+ break;
+ case 4:
+ new_format = PIPE_FORMAT_B8G8R8A8_UNORM;
+ break;
+ case 8:
+ new_format = PIPE_FORMAT_R16G16B16A16_UNORM;
+ break;
+ default:
+ debug_printf("r300: surface_copy: Unhandled format: %s. Falling back to software.\n"
+ "r300: surface_copy: Software fallback doesn't work for tiled textures.\n",
+ util_format_short_name(old_format));
+ }
+ }
+
+ if (old_format != new_format) {
+ dst->format = new_format;
+ src->format = new_format;
+
+ r300_texture_reinterpret_format(pipe->screen,
+ dst, new_format);
+ r300_texture_reinterpret_format(pipe->screen,
+ src, new_format);
+ }
+
+ r300_hw_copy_region(pipe, dst, subdst, dstx, dsty, dstz,
+ src, subsrc, srcx, srcy, srcz, width, height);
+
+ if (old_format != new_format) {
+ dst->format = old_format;
+ src->format = old_format;
+
+ r300_texture_reinterpret_format(pipe->screen,
+ dst, old_format);
+ r300_texture_reinterpret_format(pipe->screen,
+ src, old_format);
+ }
+}
+
+void r300_init_blit_functions(struct r300_context *r300)
+{
+ r300->context.clear = r300_clear;
+ r300->context.clear_render_target = r300_clear_render_target;
+ r300->context.clear_depth_stencil = r300_clear_depth_stencil;
+ r300->context.resource_copy_region = r300_resource_copy_region;
+}
diff --git a/src/gallium/drivers/r300/r300_cb.h b/src/gallium/drivers/r300/r300_cb.h
new file mode 100644
index 0000000000..6987471244
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_cb.h
@@ -0,0 +1,142 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Copyright 2010 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+/**
+ * This file contains macros for building command buffers in memory.
+ *
+ * Use NEW_CB for buffers with a varying size and it will also allocate
+ * the buffer.
+ * Use BEGIN_CB for arrays with a static size.
+ *
+ * Example:
+ *
+ * uint32_t cb[3];
+ * CB_LOCALS;
+ *
+ * BEGIN_CB(cb, 3);
+ * OUT_CB_REG_SEQ(R500_RB3D_CONSTANT_COLOR_AR, 2);
+ * OUT_CB(blend_color_red_alpha);
+ * OUT_CB(blend_color_green_blue);
+ * END_CB;
+ *
+ * And later:
+ *
+ * CS_LOCALS;
+ * WRITE_CS_TABLE(cb, 3);
+ *
+ * Or using a little slower variant:
+ *
+ * CS_LOCALS;
+ * BEGIN_CS(cb, 3);
+ * OUT_CS_TABLE(cb, 3);
+ * END_CS;
+ */
+
+#ifndef R300_CB_H
+#define R300_CB_H
+
+#include "r300_reg.h"
+
+/* Yes, I know macros are ugly. However, they are much prettier than the code
+ * that they neatly hide away, and don't have the cost of function setup, so
+ * we're going to use them. */
+
+#ifdef DEBUG
+#define CB_DEBUG(x) x
+#else
+#define CB_DEBUG(x)
+#endif
+
+
+/**
+ * Command buffer setup.
+ */
+
+#define CB_LOCALS \
+ CB_DEBUG(int cs_count = 0;) \
+ uint32_t *cs_ptr = NULL; \
+ CB_DEBUG((void) cs_count;) (void) cs_ptr;
+
+#define NEW_CB(ptr, size) do { \
+ assert(sizeof(*ptr) == sizeof(uint32_t)); \
+ cs_ptr = (ptr) = (uint32_t*)malloc((size) * sizeof(uint32_t)); \
+ CB_DEBUG(cs_count = size;) \
+} while (0)
+
+#define BEGIN_CB(ptr, size) do { \
+ assert(sizeof(*ptr) == sizeof(uint32_t)); \
+ cs_ptr = ptr; \
+ CB_DEBUG(cs_count = size;) \
+} while (0)
+
+#define BEGIN_CS_AS_CB(r300, size) \
+ BEGIN_CB(r300->rws->get_cs_pointer(r300->rws, dwords), dwords)
+
+#define END_CB do { \
+ CB_DEBUG(if (cs_count != 0) \
+ debug_printf("r300: Warning: cs_count off by %d at (%s, %s:%i)\n", \
+ cs_count, __FUNCTION__, __FILE__, __LINE__);) \
+} while (0)
+
+
+/**
+ * Storing pure DWORDs.
+ */
+
+#define OUT_CB(value) do { \
+ *cs_ptr = (value); \
+ cs_ptr++; \
+ CB_DEBUG(cs_count--;) \
+} while (0)
+
+#define OUT_CB_TABLE(values, count) do { \
+ memcpy(cs_ptr, values, count * sizeof(uint32_t)); \
+ cs_ptr += count; \
+ CB_DEBUG(cs_count -= count;) \
+} while (0)
+
+#define OUT_CB_32F(value) \
+ OUT_CB(fui(value));
+
+#define OUT_CB_REG(register, value) do { \
+ assert(register); \
+ OUT_CB(CP_PACKET0(register, 0)); \
+ OUT_CB(value); \
+} while (0)
+
+/* Note: This expects count to be the number of registers,
+ * not the actual packet0 count! */
+#define OUT_CB_REG_SEQ(register, count) do { \
+ assert(register); \
+ OUT_CB(CP_PACKET0(register, (count) - 1)); \
+} while (0)
+
+#define OUT_CB_ONE_REG(register, count) do { \
+ assert(register); \
+ OUT_CB(CP_PACKET0(register, (count) - 1) | RADEON_ONE_REG_WR); \
+} while (0)
+
+#define OUT_CB_PKT3(op, count) \
+ OUT_CB(CP_PACKET3(op, count))
+
+#endif /* R300_CB_H */
diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c
new file mode 100644
index 0000000000..e6dca66d4a
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_chipset.c
@@ -0,0 +1,380 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "r300_chipset.h"
+
+#include "util/u_debug.h"
+
+#include <stdio.h>
+
+/* r300_chipset: A file all to itself for deducing the various properties of
+ * Radeons. */
+
+/* Parse a PCI ID and fill an r300_capabilities struct with information. */
+void r300_parse_chipset(struct r300_capabilities* caps)
+{
+ /* Reasonable defaults */
+ caps->num_vert_fpus = 2;
+ caps->num_tex_units = 16;
+ caps->has_tcl = debug_get_bool_option("RADEON_NO_TCL", FALSE) ? FALSE : TRUE;
+ caps->is_r400 = FALSE;
+ caps->is_r500 = FALSE;
+ caps->high_second_pipe = FALSE;
+
+ /* Note: These are not ordered by PCI ID. I leave that task to GCC,
+ * which will perform the ordering while collating jump tables. Instead,
+ * I've tried to group them according to capabilities and age. */
+ switch (caps->pci_id) {
+ case 0x4144:
+ caps->family = CHIP_FAMILY_R300;
+ caps->high_second_pipe = TRUE;
+ caps->num_vert_fpus = 4;
+ break;
+
+ case 0x4145:
+ case 0x4146:
+ case 0x4147:
+ case 0x4E44:
+ case 0x4E45:
+ case 0x4E46:
+ case 0x4E47:
+ caps->family = CHIP_FAMILY_R300;
+ caps->high_second_pipe = TRUE;
+ caps->num_vert_fpus = 4;
+ break;
+
+ case 0x4150:
+ case 0x4151:
+ case 0x4152:
+ case 0x4153:
+ case 0x4154:
+ case 0x4155:
+ case 0x4156:
+ case 0x4E50:
+ case 0x4E51:
+ case 0x4E52:
+ case 0x4E53:
+ case 0x4E54:
+ case 0x4E56:
+ caps->family = CHIP_FAMILY_RV350;
+ caps->high_second_pipe = TRUE;
+ break;
+
+ case 0x4148:
+ case 0x4149:
+ case 0x414A:
+ case 0x414B:
+ case 0x4E48:
+ case 0x4E49:
+ case 0x4E4B:
+ caps->family = CHIP_FAMILY_R350;
+ caps->high_second_pipe = TRUE;
+ caps->num_vert_fpus = 4;
+ break;
+
+ case 0x4E4A:
+ caps->family = CHIP_FAMILY_R360;
+ caps->high_second_pipe = TRUE;
+ caps->num_vert_fpus = 4;
+ break;
+
+ case 0x5460:
+ case 0x5462:
+ case 0x5464:
+ case 0x5B60:
+ case 0x5B62:
+ case 0x5B63:
+ case 0x5B64:
+ case 0x5B65:
+ caps->family = CHIP_FAMILY_RV370;
+ caps->high_second_pipe = TRUE;
+ break;
+
+ case 0x3150:
+ case 0x3152:
+ case 0x3154:
+ case 0x3155:
+ case 0x3E50:
+ case 0x3E54:
+ caps->family = CHIP_FAMILY_RV380;
+ caps->high_second_pipe = TRUE;
+ break;
+
+ case 0x4A48:
+ case 0x4A49:
+ case 0x4A4A:
+ case 0x4A4B:
+ case 0x4A4C:
+ case 0x4A4D:
+ case 0x4A4E:
+ case 0x4A4F:
+ case 0x4A50:
+ case 0x4A54:
+ caps->family = CHIP_FAMILY_R420;
+ caps->num_vert_fpus = 6;
+ caps->is_r400 = TRUE;
+ break;
+
+ case 0x5548:
+ case 0x5549:
+ case 0x554A:
+ case 0x554B:
+ case 0x5550:
+ case 0x5551:
+ case 0x5552:
+ case 0x5554:
+ case 0x5D57:
+ caps->family = CHIP_FAMILY_R423;
+ caps->num_vert_fpus = 6;
+ caps->is_r400 = TRUE;
+ break;
+
+ case 0x554C:
+ case 0x554D:
+ case 0x554E:
+ case 0x554F:
+ case 0x5D48:
+ case 0x5D49:
+ case 0x5D4A:
+ caps->family = CHIP_FAMILY_R430;
+ caps->num_vert_fpus = 6;
+ caps->is_r400 = TRUE;
+ break;
+
+ case 0x5D4C:
+ case 0x5D4D:
+ case 0x5D4E:
+ case 0x5D4F:
+ case 0x5D50:
+ case 0x5D52:
+ caps->family = CHIP_FAMILY_R480;
+ caps->num_vert_fpus = 6;
+ caps->is_r400 = TRUE;
+ break;
+
+ case 0x4B48:
+ case 0x4B49:
+ case 0x4B4A:
+ case 0x4B4B:
+ case 0x4B4C:
+ caps->family = CHIP_FAMILY_R481;
+ caps->num_vert_fpus = 6;
+ caps->is_r400 = TRUE;
+ break;
+
+ case 0x5E4C:
+ case 0x5E4F:
+ case 0x564A:
+ case 0x564B:
+ case 0x564F:
+ case 0x5652:
+ case 0x5653:
+ case 0x5657:
+ case 0x5E48:
+ case 0x5E4A:
+ case 0x5E4B:
+ case 0x5E4D:
+ caps->family = CHIP_FAMILY_RV410;
+ caps->num_vert_fpus = 6;
+ caps->is_r400 = TRUE;
+ break;
+
+ case 0x5954:
+ case 0x5955:
+ caps->family = CHIP_FAMILY_RS480;
+ caps->has_tcl = FALSE;
+ break;
+
+ case 0x5974:
+ case 0x5975:
+ caps->family = CHIP_FAMILY_RS482;
+ caps->has_tcl = FALSE;
+ break;
+
+ case 0x5A41:
+ case 0x5A42:
+ caps->family = CHIP_FAMILY_RS400;
+ caps->has_tcl = FALSE;
+ break;
+
+ case 0x5A61:
+ case 0x5A62:
+ caps->family = CHIP_FAMILY_RC410;
+ caps->has_tcl = FALSE;
+ break;
+
+ case 0x791E:
+ case 0x791F:
+ caps->family = CHIP_FAMILY_RS690;
+ caps->has_tcl = FALSE;
+ caps->is_r400 = TRUE;
+ break;
+
+ case 0x793F:
+ case 0x7941:
+ case 0x7942:
+ caps->family = CHIP_FAMILY_RS600;
+ caps->has_tcl = FALSE;
+ caps->is_r400 = TRUE;
+ break;
+
+ case 0x796C:
+ case 0x796D:
+ case 0x796E:
+ case 0x796F:
+ caps->family = CHIP_FAMILY_RS740;
+ caps->has_tcl = FALSE;
+ caps->is_r400 = TRUE;
+ break;
+
+ case 0x7100:
+ case 0x7101:
+ case 0x7102:
+ case 0x7103:
+ case 0x7104:
+ case 0x7105:
+ case 0x7106:
+ case 0x7108:
+ case 0x7109:
+ case 0x710A:
+ case 0x710B:
+ case 0x710C:
+ case 0x710E:
+ case 0x710F:
+ caps->family = CHIP_FAMILY_R520;
+ caps->num_vert_fpus = 8;
+ caps->is_r500 = TRUE;
+ break;
+
+ case 0x7140:
+ case 0x7141:
+ case 0x7142:
+ case 0x7143:
+ case 0x7144:
+ case 0x7145:
+ case 0x7146:
+ case 0x7147:
+ case 0x7149:
+ case 0x714A:
+ case 0x714B:
+ case 0x714C:
+ case 0x714D:
+ case 0x714E:
+ case 0x714F:
+ case 0x7151:
+ case 0x7152:
+ case 0x7153:
+ case 0x715E:
+ case 0x715F:
+ case 0x7180:
+ case 0x7181:
+ case 0x7183:
+ case 0x7186:
+ case 0x7187:
+ case 0x7188:
+ case 0x718A:
+ case 0x718B:
+ case 0x718C:
+ case 0x718D:
+ case 0x718F:
+ case 0x7193:
+ case 0x7196:
+ case 0x719B:
+ case 0x719F:
+ case 0x7200:
+ case 0x7210:
+ case 0x7211:
+ caps->family = CHIP_FAMILY_RV515;
+ caps->num_vert_fpus = 2;
+ caps->is_r500 = TRUE;
+ break;
+
+ case 0x71C0:
+ case 0x71C1:
+ case 0x71C2:
+ case 0x71C3:
+ case 0x71C4:
+ case 0x71C5:
+ case 0x71C6:
+ case 0x71C7:
+ case 0x71CD:
+ case 0x71CE:
+ case 0x71D2:
+ case 0x71D4:
+ case 0x71D5:
+ case 0x71D6:
+ case 0x71DA:
+ case 0x71DE:
+ caps->family = CHIP_FAMILY_RV530;
+ caps->num_vert_fpus = 5;
+ caps->is_r500 = TRUE;
+ break;
+
+ case 0x7240:
+ case 0x7243:
+ case 0x7244:
+ case 0x7245:
+ case 0x7246:
+ case 0x7247:
+ case 0x7248:
+ case 0x7249:
+ case 0x724A:
+ case 0x724B:
+ case 0x724C:
+ case 0x724D:
+ case 0x724E:
+ case 0x724F:
+ case 0x7284:
+ caps->family = CHIP_FAMILY_R580;
+ caps->num_vert_fpus = 8;
+ caps->is_r500 = TRUE;
+ break;
+
+ case 0x7280:
+ caps->family = CHIP_FAMILY_RV570;
+ caps->num_vert_fpus = 8;
+ caps->is_r500 = TRUE;
+ break;
+
+ case 0x7281:
+ case 0x7283:
+ case 0x7287:
+ case 0x7288:
+ case 0x7289:
+ case 0x728B:
+ case 0x728C:
+ case 0x7290:
+ case 0x7291:
+ case 0x7293:
+ case 0x7297:
+ caps->family = CHIP_FAMILY_RV560;
+ caps->num_vert_fpus = 8;
+ caps->is_r500 = TRUE;
+ break;
+
+ default:
+ fprintf(stderr, "r300: Warning: Unknown chipset 0x%x\n",
+ caps->pci_id);
+ }
+
+ caps->is_rv350 = caps->family >= CHIP_FAMILY_RV350;
+}
diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h
new file mode 100644
index 0000000000..ab649c3857
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_chipset.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef R300_CHIPSET_H
+#define R300_CHIPSET_H
+
+#include "pipe/p_compiler.h"
+
+/* Structure containing all the possible information about a specific Radeon
+ * in the R3xx, R4xx, and R5xx families. */
+struct r300_capabilities {
+ /* PCI ID */
+ uint32_t pci_id;
+ /* Chipset family */
+ int family;
+ /* The number of vertex floating-point units */
+ unsigned num_vert_fpus;
+ /* The number of fragment pipes */
+ unsigned num_frag_pipes;
+ /* The number of z pipes */
+ unsigned num_z_pipes;
+ /* The number of texture units. */
+ unsigned num_tex_units;
+ /* Whether or not TCL is physically present */
+ boolean has_tcl;
+ /* Whether or not this is RV350 or newer, including all r400 and r500
+ * chipsets. The differences compared to the oldest r300 chips are:
+ * - Blend LTE/GTE thresholds
+ * - Better MACRO_SWITCH in texture tiling
+ * - Half float vertex
+ * - More HyperZ optimizations */
+ boolean is_rv350;
+ /* Whether or not this is R400. The differences compared their rv350
+ * cousins are:
+ * - Extended fragment shader registers
+ * - 3DC texture compression (RGTC2) */
+ boolean is_r400;
+ /* Whether or not this is an RV515 or newer; R500s have many differences
+ * that require extra consideration, compared to their rv350 cousins:
+ * - Extra bit of width and height on texture sizes
+ * - Blend color is split across two registers
+ * - Universal Shader (US) block used for fragment shaders
+ * - FP16 blending and multisampling
+ * - Full RGTC texture compression
+ * - 24-bit depth textures
+ * - Stencil back-face reference value
+ * - Ability to render up to 2^24 - 1 vertices with signed index offset */
+ boolean is_r500;
+ /* Whether or not the second pixel pipe is accessed with the high bit */
+ boolean high_second_pipe;
+};
+
+/* Enumerations for legibility and telling which card we're running on. */
+enum {
+ CHIP_FAMILY_R300 = 0,
+ CHIP_FAMILY_R350,
+ CHIP_FAMILY_R360,
+ CHIP_FAMILY_RV350,
+ CHIP_FAMILY_RV370,
+ CHIP_FAMILY_RV380,
+ CHIP_FAMILY_R420,
+ CHIP_FAMILY_R423,
+ CHIP_FAMILY_R430,
+ CHIP_FAMILY_R480,
+ CHIP_FAMILY_R481,
+ CHIP_FAMILY_RV410,
+ CHIP_FAMILY_RS400,
+ CHIP_FAMILY_RC410,
+ CHIP_FAMILY_RS480,
+ CHIP_FAMILY_RS482,
+ CHIP_FAMILY_RS600,
+ CHIP_FAMILY_RS690,
+ CHIP_FAMILY_RS740,
+ CHIP_FAMILY_RV515,
+ CHIP_FAMILY_R520,
+ CHIP_FAMILY_RV530,
+ CHIP_FAMILY_R580,
+ CHIP_FAMILY_RV560,
+ CHIP_FAMILY_RV570
+};
+
+void r300_parse_chipset(struct r300_capabilities* caps);
+
+#endif /* R300_CHIPSET_H */
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
new file mode 100644
index 0000000000..16a75aa612
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -0,0 +1,329 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "draw/draw_context.h"
+
+#include "util/u_memory.h"
+#include "util/u_sampler.h"
+#include "util/u_simple_list.h"
+#include "util/u_upload_mgr.h"
+
+#include "r300_cb.h"
+#include "r300_context.h"
+#include "r300_emit.h"
+#include "r300_screen.h"
+#include "r300_screen_buffer.h"
+#include "r300_state_invariant.h"
+#include "r300_winsys.h"
+
+#include <inttypes.h>
+
+static void r300_destroy_context(struct pipe_context* context)
+{
+ struct r300_context* r300 = r300_context(context);
+ struct r300_query *query, *temp;
+ struct r300_atom *atom;
+
+ if (r300->texkill_sampler) {
+ pipe_sampler_view_reference(
+ (struct pipe_sampler_view**)&r300->texkill_sampler,
+ NULL);
+ }
+
+ util_blitter_destroy(r300->blitter);
+ draw_destroy(r300->draw);
+
+ /* Print stats, if enabled. */
+ if (SCREEN_DBG_ON(r300->screen, DBG_STATS)) {
+ fprintf(stderr, "r300: Stats for context %p:\n", r300);
+ fprintf(stderr, " : Flushes: %" PRIu64 "\n", r300->flush_counter);
+ foreach(atom, &r300->atom_list) {
+ fprintf(stderr, " : %s: %" PRIu64 " emits\n",
+ atom->name, atom->counter);
+ }
+ }
+
+ /* If there are any queries pending or not destroyed, remove them now. */
+ foreach_s(query, temp, &r300->query_list) {
+ remove_from_list(query);
+ FREE(query);
+ }
+
+ u_upload_destroy(r300->upload_vb);
+ u_upload_destroy(r300->upload_ib);
+
+ translate_cache_destroy(r300->tran.translate_cache);
+
+ FREE(r300->blend_color_state.state);
+ FREE(r300->clip_state.state);
+ FREE(r300->fb_state.state);
+ FREE(r300->rs_block_state.state);
+ FREE(r300->scissor_state.state);
+ FREE(r300->textures_state.state);
+ FREE(r300->viewport_state.state);
+ FREE(r300->ztop_state.state);
+ FREE(r300->fs_constants.state);
+ FREE(r300->vs_constants.state);
+ if (!r300->screen->caps.has_tcl) {
+ FREE(r300->vertex_stream_state.state);
+ }
+ FREE(r300);
+}
+
+static void r300_flush_cb(void *data)
+{
+ struct r300_context* const cs_context_copy = data;
+
+ cs_context_copy->context.flush(&cs_context_copy->context, 0, NULL);
+}
+
+#define R300_INIT_ATOM(atomname, atomsize) \
+ r300->atomname.name = #atomname; \
+ r300->atomname.state = NULL; \
+ r300->atomname.size = atomsize; \
+ r300->atomname.emit = r300_emit_##atomname; \
+ r300->atomname.dirty = FALSE; \
+ insert_at_tail(&r300->atom_list, &r300->atomname);
+
+static void r300_setup_atoms(struct r300_context* r300)
+{
+ boolean is_r500 = r300->screen->caps.is_r500;
+ boolean has_tcl = r300->screen->caps.has_tcl;
+
+ /* Create the actual atom list.
+ *
+ * Each atom is examined and emitted in the order it appears here, which
+ * can affect performance and conformance if not handled with care.
+ *
+ * Some atoms never change size, others change every emit - those have
+ * the size of 0 here. */
+ make_empty_list(&r300->atom_list);
+ R300_INIT_ATOM(invariant_state, 71);
+ R300_INIT_ATOM(ztop_state, 2);
+ R300_INIT_ATOM(query_start, 4);
+ R300_INIT_ATOM(blend_state, 8);
+ R300_INIT_ATOM(blend_color_state, is_r500 ? 3 : 2);
+ R300_INIT_ATOM(clip_state, has_tcl ? 5 + (6 * 4) : 2);
+ R300_INIT_ATOM(dsa_state, is_r500 ? 8 : 6);
+ R300_INIT_ATOM(fb_state, 0);
+ R300_INIT_ATOM(rs_state, 0);
+ R300_INIT_ATOM(scissor_state, 3);
+ R300_INIT_ATOM(viewport_state, 9);
+ R300_INIT_ATOM(rs_block_state, 0);
+ R300_INIT_ATOM(vertex_stream_state, 0);
+ R300_INIT_ATOM(pvs_flush, 2);
+ R300_INIT_ATOM(vs_state, 0);
+ R300_INIT_ATOM(vs_constants, 0);
+ R300_INIT_ATOM(texture_cache_inval, 2);
+ R300_INIT_ATOM(textures_state, 0);
+ R300_INIT_ATOM(fs, 0);
+ R300_INIT_ATOM(fs_rc_constant_state, 0);
+ R300_INIT_ATOM(fs_constants, 0);
+
+ /* Replace emission functions for r500. */
+ if (r300->screen->caps.is_r500) {
+ r300->fs.emit = r500_emit_fs;
+ r300->fs_rc_constant_state.emit = r500_emit_fs_rc_constant_state;
+ r300->fs_constants.emit = r500_emit_fs_constants;
+ }
+
+ /* Some non-CSO atoms need explicit space to store the state locally. */
+ r300->blend_color_state.state = CALLOC_STRUCT(r300_blend_color_state);
+ r300->clip_state.state = CALLOC_STRUCT(r300_clip_state);
+ r300->fb_state.state = CALLOC_STRUCT(pipe_framebuffer_state);
+ r300->rs_block_state.state = CALLOC_STRUCT(r300_rs_block);
+ r300->scissor_state.state = CALLOC_STRUCT(pipe_scissor_state);
+ r300->textures_state.state = CALLOC_STRUCT(r300_textures_state);
+ r300->viewport_state.state = CALLOC_STRUCT(r300_viewport_state);
+ r300->ztop_state.state = CALLOC_STRUCT(r300_ztop_state);
+ r300->fs_constants.state = CALLOC_STRUCT(r300_constant_buffer);
+ r300->vs_constants.state = CALLOC_STRUCT(r300_constant_buffer);
+ if (!r300->screen->caps.has_tcl) {
+ r300->vertex_stream_state.state = CALLOC_STRUCT(r300_vertex_stream_state);
+ }
+
+ /* Some non-CSO atoms don't use the state pointer. */
+ r300->invariant_state.allow_null_state = TRUE;
+ r300->fs_rc_constant_state.allow_null_state = TRUE;
+ r300->pvs_flush.allow_null_state = TRUE;
+ r300->query_start.allow_null_state = TRUE;
+ r300->texture_cache_inval.allow_null_state = TRUE;
+}
+
+/* Not every state tracker calls every driver function before the first draw
+ * call and we must initialize the command buffers somehow. */
+static void r300_init_states(struct pipe_context *pipe)
+{
+ struct pipe_blend_color bc = {{0}};
+ struct pipe_clip_state cs = {{{0}}};
+ struct pipe_scissor_state ss = {0};
+ struct r300_clip_state *clip =
+ (struct r300_clip_state*)r300_context(pipe)->clip_state.state;
+ CB_LOCALS;
+
+ pipe->set_blend_color(pipe, &bc);
+ pipe->set_scissor_state(pipe, &ss);
+
+ if (r300_context(pipe)->screen->caps.has_tcl) {
+ pipe->set_clip_state(pipe, &cs);
+ } else {
+ BEGIN_CB(clip->cb, 2);
+ OUT_CB_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE);
+ END_CB;
+ }
+}
+
+struct pipe_context* r300_create_context(struct pipe_screen* screen,
+ void *priv)
+{
+ struct r300_context* r300 = CALLOC_STRUCT(r300_context);
+ struct r300_screen* r300screen = r300_screen(screen);
+ struct r300_winsys_screen *rws = r300screen->rws;
+
+ if (!r300)
+ return NULL;
+
+ r300->rws = rws;
+ r300->screen = r300screen;
+
+ r300->context.winsys = (struct pipe_winsys*)rws;
+ r300->context.screen = screen;
+ r300->context.priv = priv;
+
+ r300->context.destroy = r300_destroy_context;
+
+ if (!r300screen->caps.has_tcl) {
+ /* Create a Draw. This is used for SW TCL. */
+ r300->draw = draw_create(&r300->context);
+ /* Enable our renderer. */
+ draw_set_rasterize_stage(r300->draw, r300_draw_stage(r300));
+ /* Enable Draw's clipping. */
+ draw_set_driver_clipping(r300->draw, FALSE);
+ /* Disable converting points/lines to triangles. */
+ draw_wide_line_threshold(r300->draw, 10000000.f);
+ draw_wide_point_threshold(r300->draw, 10000000.f);
+ }
+
+ r300_setup_atoms(r300);
+
+ make_empty_list(&r300->query_list);
+
+ r300_init_blit_functions(r300);
+ r300_init_flush_functions(r300);
+ r300_init_query_functions(r300);
+ r300_init_render_functions(r300);
+ r300_init_state_functions(r300);
+ r300_init_resource_functions(r300);
+
+ r300->invariant_state.dirty = TRUE;
+
+ rws->set_flush_cb(r300->rws, r300_flush_cb, r300);
+ r300->dirty_hw++;
+
+ r300->blitter = util_blitter_create(&r300->context);
+
+ r300->upload_ib = u_upload_create(&r300->context,
+ 32 * 1024, 16,
+ PIPE_BIND_INDEX_BUFFER);
+
+ if (r300->upload_ib == NULL)
+ goto no_upload_ib;
+
+ r300->upload_vb = u_upload_create(&r300->context,
+ 128 * 1024, 16,
+ PIPE_BIND_VERTEX_BUFFER);
+ if (r300->upload_vb == NULL)
+ goto no_upload_vb;
+
+ r300->tran.translate_cache = translate_cache_create();
+
+ r300_init_states(&r300->context);
+
+ /* The KIL opcode needs the first texture unit to be enabled
+ * on r3xx-r4xx. In order to calm down the CS checker, we bind this
+ * dummy texture there. */
+ if (!r300->screen->caps.is_r500) {
+ struct pipe_resource *tex;
+ struct pipe_resource rtempl = {{0}};
+ struct pipe_sampler_view vtempl = {{0}};
+
+ rtempl.target = PIPE_TEXTURE_2D;
+ rtempl.format = PIPE_FORMAT_I8_UNORM;
+ rtempl.bind = PIPE_BIND_SAMPLER_VIEW;
+ rtempl.width0 = 1;
+ rtempl.height0 = 1;
+ rtempl.depth0 = 1;
+ tex = screen->resource_create(screen, &rtempl);
+
+ u_sampler_view_default_template(&vtempl, tex, tex->format);
+
+ r300->texkill_sampler = (struct r300_sampler_view*)
+ r300->context.create_sampler_view(&r300->context, tex, &vtempl);
+
+ pipe_resource_reference(&tex, NULL);
+
+ /* This will make sure that the dummy texture is set up
+ * from the beginning even if an application does not use
+ * textures. */
+ r300->textures_state.dirty = TRUE;
+ }
+
+ return &r300->context;
+
+ no_upload_ib:
+ u_upload_destroy(r300->upload_ib);
+ no_upload_vb:
+ FREE(r300);
+ return NULL;
+}
+
+boolean r300_check_cs(struct r300_context *r300, unsigned size)
+{
+ return size <= r300->rws->get_cs_free_dwords(r300->rws);
+}
+
+void r300_finish(struct r300_context *r300)
+{
+ struct pipe_framebuffer_state *fb;
+ unsigned i;
+
+ /* This is a preliminary implementation of glFinish.
+ *
+ * The ideal implementation should use something like EmitIrqLocked and
+ * WaitIrq, or better, real fences.
+ */
+ if (r300->fb_state.state) {
+ fb = r300->fb_state.state;
+
+ for (i = 0; i < fb->nr_cbufs; i++) {
+ if (fb->cbufs[i]->texture) {
+ r300->rws->buffer_wait(r300->rws,
+ r300_texture(fb->cbufs[i]->texture)->buffer);
+ return;
+ }
+ }
+ if (fb->zsbuf && fb->zsbuf->texture) {
+ r300->rws->buffer_wait(r300->rws,
+ r300_texture(fb->zsbuf->texture)->buffer);
+ }
+ }
+}
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
new file mode 100644
index 0000000000..8d0b4bb3d3
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -0,0 +1,591 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef R300_CONTEXT_H
+#define R300_CONTEXT_H
+
+#include "draw/draw_vertex.h"
+
+#include "util/u_blitter.h"
+
+#include "pipe/p_context.h"
+#include "util/u_inlines.h"
+#include "util/u_transfer.h"
+
+#include "translate/translate_cache.h"
+
+#include "r300_defines.h"
+#include "r300_screen.h"
+
+struct u_upload_mgr;
+struct r300_context;
+struct r300_fragment_shader;
+struct r300_vertex_shader;
+struct r300_stencilref_context;
+
+struct r300_atom {
+ /* List pointers. */
+ struct r300_atom *prev, *next;
+ /* Name, for debugging. */
+ const char* name;
+ /* Stat counter. */
+ uint64_t counter;
+ /* Opaque state. */
+ void* state;
+ /* Emit the state to the context. */
+ void (*emit)(struct r300_context*, unsigned, void*);
+ /* Upper bound on number of dwords to emit. */
+ unsigned size;
+ /* Whether this atom should be emitted. */
+ boolean dirty;
+ /* Whether this atom may be emitted with state == NULL. */
+ boolean allow_null_state;
+};
+
+struct r300_blend_state {
+ uint32_t cb[8];
+ uint32_t cb_no_readwrite[8];
+};
+
+struct r300_blend_color_state {
+ uint32_t cb[3];
+};
+
+struct r300_clip_state {
+ struct pipe_clip_state clip;
+
+ uint32_t cb[29];
+};
+
+struct r300_dsa_state {
+ struct pipe_depth_stencil_alpha_state dsa;
+
+ /* This is actually a command buffer with named dwords. */
+ uint32_t cb_begin;
+ uint32_t alpha_function; /* R300_FG_ALPHA_FUNC: 0x4bd4 */
+ uint32_t cb_reg_seq;
+ uint32_t z_buffer_control; /* R300_ZB_CNTL: 0x4f00 */
+ uint32_t z_stencil_control; /* R300_ZB_ZSTENCILCNTL: 0x4f04 */
+ uint32_t stencil_ref_mask; /* R300_ZB_STENCILREFMASK: 0x4f08 */
+ uint32_t cb_reg;
+ uint32_t stencil_ref_bf; /* R500_ZB_STENCILREFMASK_BF: 0x4fd4 */
+
+ /* The second command buffer disables zbuffer reads and writes. */
+ uint32_t cb_no_readwrite[8];
+
+ /* Whether a two-sided stencil is enabled. */
+ boolean two_sided;
+ /* Whether a fallback should be used for a two-sided stencil ref value. */
+ boolean two_sided_stencil_ref;
+};
+
+struct r300_rs_state {
+ /* Original rasterizer state. */
+ struct pipe_rasterizer_state rs;
+ /* Draw-specific rasterizer state. */
+ struct pipe_rasterizer_state rs_draw;
+
+ uint32_t vap_control_status; /* R300_VAP_CNTL_STATUS: 0x2140 */
+ uint32_t multisample_position_0;/* R300_GB_MSPOS0: 0x4010 */
+ uint32_t multisample_position_1;/* R300_GB_MSPOS1: 0x4014 */
+ uint32_t antialiasing_config; /* R300_GB_AA_CONFIG: 0x4020 */
+ uint32_t point_size; /* R300_GA_POINT_SIZE: 0x421c */
+ uint32_t point_minmax; /* R300_GA_POINT_MINMAX: 0x4230 */
+ uint32_t line_control; /* R300_GA_LINE_CNTL: 0x4234 */
+ float depth_scale; /* R300_SU_POLY_OFFSET_FRONT_SCALE: 0x42a4 */
+ /* R300_SU_POLY_OFFSET_BACK_SCALE: 0x42ac */
+ float depth_offset; /* R300_SU_POLY_OFFSET_FRONT_OFFSET: 0x42a8 */
+ /* R300_SU_POLY_OFFSET_BACK_OFFSET: 0x42b0 */
+ uint32_t polygon_offset_enable; /* R300_SU_POLY_OFFSET_ENABLE: 0x42b4 */
+ uint32_t cull_mode; /* R300_SU_CULL_MODE: 0x42b8 */
+ uint32_t line_stipple_config; /* R300_GA_LINE_STIPPLE_CONFIG: 0x4328 */
+ uint32_t line_stipple_value; /* R300_GA_LINE_STIPPLE_VALUE: 0x4260 */
+ uint32_t color_control; /* R300_GA_COLOR_CONTROL: 0x4278 */
+ uint32_t polygon_mode; /* R300_GA_POLY_MODE: 0x4288 */
+ uint32_t clip_rule; /* R300_SC_CLIP_RULE: 0x43D0 */
+
+ /* Specifies top of Raster pipe specific enable controls,
+ * i.e. texture coordinates stuffing for points, lines, triangles */
+ uint32_t stuffing_enable; /* R300_GB_ENABLE: 0x4008 */
+
+ /* Point sprites texture coordinates, 0: lower left, 1: upper right */
+ float point_texcoord_left; /* R300_GA_POINT_S0: 0x4200 */
+ float point_texcoord_bottom; /* R300_GA_POINT_T0: 0x4204 */
+ float point_texcoord_right; /* R300_GA_POINT_S1: 0x4208 */
+ float point_texcoord_top; /* R300_GA_POINT_T1: 0x420c */
+};
+
+struct r300_rs_block {
+ uint32_t vap_vtx_state_cntl; /* R300_VAP_VTX_STATE_CNTL: 0x2180 */
+ uint32_t vap_vsm_vtx_assm; /* R300_VAP_VSM_VTX_ASSM: 0x2184 */
+ uint32_t vap_out_vtx_fmt[2]; /* R300_VAP_OUTPUT_VTX_FMT_[0-1]: 0x2090 */
+
+ uint32_t ip[8]; /* R300_RS_IP_[0-7], R500_RS_IP_[0-7] */
+ uint32_t count; /* R300_RS_COUNT */
+ uint32_t inst_count; /* R300_RS_INST_COUNT */
+ uint32_t inst[8]; /* R300_RS_INST_[0-7] */
+};
+
+struct r300_sampler_state {
+ struct pipe_sampler_state state;
+
+ uint32_t filter0; /* R300_TX_FILTER0: 0x4400 */
+ uint32_t filter1; /* R300_TX_FILTER1: 0x4440 */
+ uint32_t border_color; /* R300_TX_BORDER_COLOR: 0x45c0 */
+
+ /* Min/max LOD must be clamped to [0, last_level], thus
+ * it's dependent on a currently bound texture */
+ unsigned min_lod, max_lod;
+};
+
+struct r300_texture_format_state {
+ uint32_t format0; /* R300_TX_FORMAT0: 0x4480 */
+ uint32_t format1; /* R300_TX_FORMAT1: 0x44c0 */
+ uint32_t format2; /* R300_TX_FORMAT2: 0x4500 */
+ uint32_t tile_config; /* R300_TX_OFFSET (subset thereof) */
+};
+
+struct r300_sampler_view {
+ struct pipe_sampler_view base;
+
+ /* Swizzles in the UTIL_FORMAT_SWIZZLE_* representation,
+ * derived from base. */
+ unsigned char swizzle[4];
+
+ /* Copy of r300_texture::texture_format_state with format-specific bits
+ * added. */
+ struct r300_texture_format_state format;
+
+ /* The texture cache region for this texture. */
+ uint32_t texcache_region;
+};
+
+struct r300_texture_fb_state {
+ uint32_t pitch[R300_MAX_TEXTURE_LEVELS]; /* COLORPITCH or DEPTHPITCH. */
+ uint32_t format; /* US_OUT_FMT or R300_ZB_FORMAT */
+};
+
+struct r300_texture_sampler_state {
+ struct r300_texture_format_state format;
+ uint32_t filter0; /* R300_TX_FILTER0: 0x4400 */
+ uint32_t filter1; /* R300_TX_FILTER1: 0x4440 */
+ uint32_t border_color; /* R300_TX_BORDER_COLOR: 0x45c0 */
+};
+
+struct r300_textures_state {
+ /* Textures. */
+ struct r300_sampler_view *sampler_views[16];
+ int sampler_view_count;
+ /* Sampler states. */
+ struct r300_sampler_state *sampler_states[16];
+ int sampler_state_count;
+
+ /* This is the merge of the texture and sampler states. */
+ unsigned count;
+ uint32_t tx_enable; /* R300_TX_ENABLE: 0x4101 */
+ struct r300_texture_sampler_state regs[16];
+};
+
+struct r300_vertex_stream_state {
+ /* R300_VAP_PROG_STREAK_CNTL_[0-7] */
+ uint32_t vap_prog_stream_cntl[8];
+ /* R300_VAP_PROG_STREAK_CNTL_EXT_[0-7] */
+ uint32_t vap_prog_stream_cntl_ext[8];
+
+ unsigned count;
+};
+
+struct r300_viewport_state {
+ float xscale; /* R300_VAP_VPORT_XSCALE: 0x2098 */
+ float xoffset; /* R300_VAP_VPORT_XOFFSET: 0x209c */
+ float yscale; /* R300_VAP_VPORT_YSCALE: 0x20a0 */
+ float yoffset; /* R300_VAP_VPORT_YOFFSET: 0x20a4 */
+ float zscale; /* R300_VAP_VPORT_ZSCALE: 0x20a8 */
+ float zoffset; /* R300_VAP_VPORT_ZOFFSET: 0x20ac */
+ uint32_t vte_control; /* R300_VAP_VTE_CNTL: 0x20b0 */
+};
+
+struct r300_ztop_state {
+ uint32_t z_buffer_top; /* R300_ZB_ZTOP: 0x4f14 */
+};
+
+/* The next several objects are not pure Radeon state; they inherit from
+ * various Gallium classes. */
+
+struct r300_constant_buffer {
+ /* Buffer of constants */
+ uint32_t constants[256][4];
+ /* Total number of constants */
+ unsigned count;
+};
+
+/* Query object.
+ *
+ * This is not a subclass of pipe_query because pipe_query is never
+ * actually fully defined. So, rather than have it as a member, and do
+ * subclass-style casting, we treat pipe_query as an opaque, and just
+ * trust that our state tracker does not ever mess up query objects.
+ */
+struct r300_query {
+ /* The kind of query. Currently only OQ is supported. */
+ unsigned type;
+ /* The number of pipes where query results are stored. */
+ unsigned num_pipes;
+ /* How many results have been written, in dwords. It's incremented
+ * after end_query and flush. */
+ unsigned num_results;
+ /* if we've flushed the query */
+ boolean flushed;
+ /* if begin has been emitted */
+ boolean begin_emitted;
+
+ /* The buffer where query results are stored. */
+ struct r300_winsys_buffer *buffer;
+ /* The size of the buffer. */
+ unsigned buffer_size;
+ /* The domain of the buffer. */
+ enum r300_buffer_domain domain;
+
+ /* Linked list members. */
+ struct r300_query* prev;
+ struct r300_query* next;
+};
+
+/* Fence object.
+ *
+ * This is a fake fence. Instead of syncing with the fence, we sync
+ * with the context, which is inefficient but compliant.
+ *
+ * This is not a subclass of pipe_fence_handle because pipe_fence_handle is
+ * never actually fully defined. So, rather than have it as a member, and do
+ * subclass-style casting, we treat pipe_fence_handle as an opaque, and just
+ * trust that our state tracker does not ever mess up fence objects.
+ */
+struct r300_fence {
+ struct pipe_reference reference;
+ struct r300_context *ctx;
+ boolean signalled;
+};
+
+struct r300_surface {
+ struct pipe_surface base;
+
+ /* Winsys buffer backing the texture. */
+ struct r300_winsys_buffer *buffer;
+
+ enum r300_buffer_domain domain;
+
+ uint32_t offset;
+ uint32_t pitch; /* COLORPITCH or DEPTHPITCH. */
+ uint32_t format; /* US_OUT_FMT or R300_ZB_FORMAT. */
+};
+
+struct r300_texture {
+ /* Parent class */
+ struct u_resource b;
+
+ enum r300_buffer_domain domain;
+
+ /* Offsets into the buffer. */
+ unsigned offset[R300_MAX_TEXTURE_LEVELS];
+
+ /* A pitch for each mip-level */
+ unsigned pitch[R300_MAX_TEXTURE_LEVELS];
+
+ /* A pitch multiplied by blockwidth as hardware wants
+ * the number of pixels instead of the number of blocks. */
+ unsigned hwpitch[R300_MAX_TEXTURE_LEVELS];
+
+ /* Size of one zslice or face based on the texture target */
+ unsigned layer_size[R300_MAX_TEXTURE_LEVELS];
+
+ /* Whether the mipmap level is macrotiled. */
+ enum r300_buffer_tiling mip_macrotile[R300_MAX_TEXTURE_LEVELS];
+
+ /**
+ * If non-zero, override the natural texture layout with
+ * a custom stride (in bytes).
+ *
+ * \note Mipmapping fails for textures with a non-natural layout!
+ *
+ * \sa r300_texture_get_stride
+ */
+ unsigned stride_override;
+
+ /* Total size of this texture, in bytes. */
+ unsigned size;
+
+ /* Whether this texture has non-power-of-two dimensions
+ * or a user-specified pitch.
+ * It can be either a regular texture or a rectangle one.
+ */
+ boolean uses_pitch;
+
+ /* Pipe buffer backing this texture. */
+ struct r300_winsys_buffer *buffer;
+
+ /* Registers carrying texture format data. */
+ /* Only format-independent bits should be filled in. */
+ struct r300_texture_format_state tx_format;
+ /* All bits should be filled in. */
+ struct r300_texture_fb_state fb_state;
+
+ /* Buffer tiling */
+ enum r300_buffer_tiling microtile, macrotile;
+};
+
+struct r300_vertex_element_state {
+ unsigned count;
+ struct pipe_vertex_element velem[PIPE_MAX_ATTRIBS];
+
+ /* If (velem[i].src_format != hw_format[i]), the vertex buffer
+ * referenced by this vertex element cannot be used for rendering and
+ * its vertex data must be translated to hw_format[i]. */
+ enum pipe_format hw_format[PIPE_MAX_ATTRIBS];
+ unsigned hw_format_size[PIPE_MAX_ATTRIBS];
+
+ /* The size of the vertex, in dwords. */
+ unsigned vertex_size_dwords;
+
+ /* This might mean two things:
+ * - src_format != hw_format, as discussed above.
+ * - src_offset % 4 != 0. */
+ boolean incompatible_layout;
+
+ struct r300_vertex_stream_state vertex_stream;
+};
+
+struct r300_translate_context {
+ /* Translate cache for incompatible vertex offset/stride/format fallback. */
+ struct translate_cache *translate_cache;
+
+ /* The vertex buffer slot containing the translated buffer. */
+ unsigned vb_slot;
+
+ /* Saved and new vertex element state. */
+ void *saved_velems, *new_velems;
+};
+
+struct r300_context {
+ /* Parent class */
+ struct pipe_context context;
+
+ /* The interface to the windowing system, etc. */
+ struct r300_winsys_screen *rws;
+ /* Screen. */
+ struct r300_screen *screen;
+ /* Draw module. Used mostly for SW TCL. */
+ struct draw_context* draw;
+ /* Accelerated blit support. */
+ struct blitter_context* blitter;
+ /* Stencil two-sided reference value fallback. */
+ struct r300_stencilref_context *stencilref_fallback;
+ /* For translating vertex buffers having incompatible vertex layout. */
+ struct r300_translate_context tran;
+
+ /* Vertex buffer for rendering. */
+ struct pipe_resource* vbo;
+ /* The KIL opcode needs the first texture unit to be enabled
+ * on r3xx-r4xx. In order to calm down the CS checker, we bind this
+ * dummy texture there. */
+ struct r300_sampler_view *texkill_sampler;
+ /* Offset into the VBO. */
+ size_t vbo_offset;
+
+ /* The currently active query. */
+ struct r300_query *query_current;
+ /* The saved query for blitter operations. */
+ struct r300_query *blitter_saved_query;
+ /* Query list. */
+ struct r300_query query_list;
+
+ /* Various CSO state objects. */
+ /* Beginning of atom list. */
+ struct r300_atom atom_list;
+ /* Blend state. */
+ struct r300_atom blend_state;
+ /* Blend color state. */
+ struct r300_atom blend_color_state;
+ /* User clip planes. */
+ struct r300_atom clip_state;
+ /* Depth, stencil, and alpha state. */
+ struct r300_atom dsa_state;
+ /* Fragment shader. */
+ struct r300_atom fs;
+ /* Fragment shader RC_CONSTANT_STATE variables. */
+ struct r300_atom fs_rc_constant_state;
+ /* Fragment shader constant buffer. */
+ struct r300_atom fs_constants;
+ /* Framebuffer state. */
+ struct r300_atom fb_state;
+ /* Occlusion query. */
+ struct r300_atom query_start;
+ /* Rasterizer state. */
+ struct r300_atom rs_state;
+ /* RS block state + VAP (vertex shader) output mapping state. */
+ struct r300_atom rs_block_state;
+ /* Scissor state. */
+ struct r300_atom scissor_state;
+ /* Textures state. */
+ struct r300_atom textures_state;
+ /* Vertex stream formatting state. */
+ struct r300_atom vertex_stream_state;
+ /* Vertex shader. */
+ struct r300_atom vs_state;
+ /* Vertex shader constant buffer. */
+ struct r300_atom vs_constants;
+ /* Viewport state. */
+ struct r300_atom viewport_state;
+ /* ZTOP state. */
+ struct r300_atom ztop_state;
+ /* PVS flush. */
+ struct r300_atom pvs_flush;
+ /* Texture cache invalidate. */
+ struct r300_atom texture_cache_inval;
+
+ /* Invariant state. This must be emitted to get the engine started. */
+ struct r300_atom invariant_state;
+
+ /* Vertex buffers for Gallium. */
+ struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
+ int vertex_buffer_count;
+ int vertex_buffer_max_index;
+ /* Vertex elements for Gallium. */
+ struct r300_vertex_element_state *velems;
+ bool any_user_vbs;
+
+ /* Vertex info for Draw. */
+ struct vertex_info vertex_info;
+
+ struct pipe_stencil_ref stencil_ref;
+ struct pipe_viewport_state viewport;
+
+ /* Stream locations for SWTCL. */
+ int stream_loc_notcl[16];
+
+ /* Flag indicating whether or not the HW is dirty. */
+ uint32_t dirty_hw;
+ /* Whether polygon offset is enabled. */
+ boolean polygon_offset_enabled;
+ /* Z buffer bit depth. */
+ uint32_t zbuffer_bpp;
+ /* Whether rendering is conditional and should be skipped. */
+ boolean skip_rendering;
+ /* Point sprites texcoord index, 1 bit per texcoord */
+ int sprite_coord_enable;
+ /* Whether two-sided color selection is enabled (AKA light_twoside). */
+ boolean two_sided_color;
+ /* Incompatible vertex buffer layout? (misaligned stride or buffer_offset) */
+ boolean incompatible_vb_layout;
+
+ /* upload managers */
+ struct u_upload_mgr *upload_vb;
+ struct u_upload_mgr *upload_ib;
+
+ /* Stat counter. */
+ uint64_t flush_counter;
+};
+
+/* Convenience cast wrappers. */
+static INLINE struct r300_query* r300_query(struct pipe_query* q)
+{
+ return (struct r300_query*)q;
+}
+
+static INLINE struct r300_surface* r300_surface(struct pipe_surface* surf)
+{
+ return (struct r300_surface*)surf;
+}
+
+static INLINE struct r300_texture* r300_texture(struct pipe_resource* tex)
+{
+ return (struct r300_texture*)tex;
+}
+
+static INLINE struct r300_context* r300_context(struct pipe_context* context)
+{
+ return (struct r300_context*)context;
+}
+
+static INLINE struct r300_fragment_shader *r300_fs(struct r300_context *r300)
+{
+ return (struct r300_fragment_shader*)r300->fs.state;
+}
+
+struct pipe_context* r300_create_context(struct pipe_screen* screen,
+ void *priv);
+
+boolean r300_check_cs(struct r300_context *r300, unsigned size);
+void r300_finish(struct r300_context *r300);
+
+/* Context initialization. */
+struct draw_stage* r300_draw_stage(struct r300_context* r300);
+void r300_init_blit_functions(struct r300_context *r300);
+void r300_init_flush_functions(struct r300_context* r300);
+void r300_init_query_functions(struct r300_context* r300);
+void r300_init_render_functions(struct r300_context *r300);
+void r300_init_state_functions(struct r300_context* r300);
+void r300_init_resource_functions(struct r300_context* r300);
+
+/* r300_query.c */
+void r300_resume_query(struct r300_context *r300,
+ struct r300_query *query);
+void r300_stop_query(struct r300_context *r300);
+
+/* r300_render_translate.c */
+void r300_begin_vertex_translate(struct r300_context *r300);
+void r300_end_vertex_translate(struct r300_context *r300);
+void r300_translate_index_buffer(struct r300_context *r300,
+ struct pipe_resource **index_buffer,
+ unsigned *index_size, unsigned index_offset,
+ unsigned *start, unsigned count);
+
+/* r300_render_stencilref.c */
+void r300_plug_in_stencil_ref_fallback(struct r300_context *r300);
+
+/* r300_state.c */
+void r300_mark_fs_code_dirty(struct r300_context *r300);
+
+/* r300_debug.c */
+void r500_dump_rs_block(struct r300_rs_block *rs);
+
+
+static INLINE boolean CTX_DBG_ON(struct r300_context * ctx, unsigned flags)
+{
+ return SCREEN_DBG_ON(ctx->screen, flags);
+}
+
+static INLINE void CTX_DBG(struct r300_context * ctx, unsigned flags,
+ const char * fmt, ...)
+{
+ if (CTX_DBG_ON(ctx, flags)) {
+ va_list va;
+ va_start(va, fmt);
+ vfprintf(stderr, fmt, va);
+ va_end(va);
+ }
+}
+
+#define DBG_ON CTX_DBG_ON
+#define DBG CTX_DBG
+
+#endif /* R300_CONTEXT_H */
diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h
new file mode 100644
index 0000000000..1db7da642b
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_cs.h
@@ -0,0 +1,152 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+/**
+ * This file contains macros for immediate command submission.
+ */
+
+#ifndef R300_CS_H
+#define R300_CS_H
+
+#include "r300_reg.h"
+#include "r300_context.h"
+#include "r300_winsys.h"
+
+/* Yes, I know macros are ugly. However, they are much prettier than the code
+ * that they neatly hide away, and don't have the cost of function setup,so
+ * we're going to use them. */
+
+#ifdef DEBUG
+#define CS_DEBUG(x) x
+#else
+#define CS_DEBUG(x)
+#endif
+
+/**
+ * Command submission setup.
+ */
+
+#define CS_LOCALS(context) \
+ struct r300_context* const cs_context_copy = (context); \
+ struct r300_winsys_screen *cs_winsys = cs_context_copy->rws; \
+ CS_DEBUG(int cs_count = 0; (void) cs_count;)
+
+#define BEGIN_CS(size) do { \
+ assert(r300_check_cs(cs_context_copy, (size))); \
+ CS_DEBUG(cs_count = size;) \
+} while (0)
+
+#ifdef DEBUG
+#define END_CS do { \
+ if (cs_count != 0) \
+ debug_printf("r300: Warning: cs_count off by %d at (%s, %s:%i)\n", \
+ cs_count, __FUNCTION__, __FILE__, __LINE__); \
+ cs_count = 0; \
+} while (0)
+#else
+#define END_CS
+#endif
+
+/**
+ * Writing pure DWORDs.
+ */
+
+#define OUT_CS(value) do { \
+ cs_winsys->write_cs_dword(cs_winsys, (value)); \
+ CS_DEBUG(cs_count--;) \
+} while (0)
+
+#define OUT_CS_32F(value) do { \
+ cs_winsys->write_cs_dword(cs_winsys, fui(value)); \
+ CS_DEBUG(cs_count--;) \
+} while (0)
+
+#define OUT_CS_REG(register, value) do { \
+ assert(register); \
+ cs_winsys->write_cs_dword(cs_winsys, CP_PACKET0(register, 0)); \
+ cs_winsys->write_cs_dword(cs_winsys, value); \
+ CS_DEBUG(cs_count -= 2;) \
+} while (0)
+
+/* Note: This expects count to be the number of registers,
+ * not the actual packet0 count! */
+#define OUT_CS_REG_SEQ(register, count) do { \
+ assert(register); \
+ cs_winsys->write_cs_dword(cs_winsys, CP_PACKET0((register), ((count) - 1))); \
+ CS_DEBUG(cs_count--;) \
+} while (0)
+
+#define OUT_CS_TABLE(values, count) do { \
+ cs_winsys->write_cs_table(cs_winsys, values, count); \
+ CS_DEBUG(cs_count -= count;) \
+} while (0)
+
+#define OUT_CS_ONE_REG(register, count) do { \
+ assert(register); \
+ cs_winsys->write_cs_dword(cs_winsys, CP_PACKET0((register), ((count) - 1)) | RADEON_ONE_REG_WR); \
+ CS_DEBUG(cs_count--;) \
+} while (0)
+
+#define OUT_CS_PKT3(op, count) do { \
+ cs_winsys->write_cs_dword(cs_winsys, CP_PACKET3(op, count)); \
+ CS_DEBUG(cs_count--;) \
+} while (0)
+
+
+/**
+ * Writing relocations.
+ */
+
+#define OUT_CS_RELOC(bo, offset, rd, wd, flags) do { \
+ assert(bo); \
+ cs_winsys->write_cs_dword(cs_winsys, offset); \
+ cs_winsys->write_cs_reloc(cs_winsys, bo, rd, wd, flags); \
+ CS_DEBUG(cs_count -= 3;) \
+} while (0)
+
+#define OUT_CS_BUF_RELOC(bo, offset, rd, wd, flags) do { \
+ assert(bo); \
+ OUT_CS_RELOC(r300_buffer(bo)->buf, offset, rd, wd, flags); \
+} while (0)
+
+#define OUT_CS_TEX_RELOC(tex, offset, rd, wd, flags) do { \
+ assert(tex); \
+ OUT_CS_RELOC(tex->buffer, offset, rd, wd, flags); \
+} while (0)
+
+#define OUT_CS_BUF_RELOC_NO_OFFSET(bo, rd, wd, flags) do { \
+ assert(bo); \
+ cs_winsys->write_cs_reloc(cs_winsys, r300_buffer(bo)->buf, rd, wd, flags); \
+ CS_DEBUG(cs_count -= 2;) \
+} while (0)
+
+
+/**
+ * Command buffer emission.
+ */
+
+#define WRITE_CS_TABLE(values, count) do { \
+ CS_DEBUG(assert(cs_count == 0);) \
+ cs_winsys->write_cs_table(cs_winsys, values, count); \
+} while (0)
+
+#endif /* R300_CS_H */
diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c
new file mode 100644
index 0000000000..a6cd86e392
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_debug.c
@@ -0,0 +1,132 @@
+/*
+ * Copyright 2009 Nicolai Haehnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "r300_context.h"
+
+#include "util/u_debug.h"
+
+#include <stdio.h>
+
+static const struct debug_named_value debug_options[] = {
+ { "fp", DBG_FP, "Fragment program handling (for debugging)" },
+ { "vp", DBG_VP, "Vertex program handling (for debugging)" },
+ { "draw", DBG_DRAW, "Draw and emit (for debugging)" },
+ { "tex", DBG_TEX, "Textures (for debugging)" },
+ { "texalloc", DBG_TEXALLOC, "Texture allocation (for debugging)" },
+ { "fall", DBG_FALL, "Fallbacks (for debugging)" },
+ { "rs", DBG_RS, "Rasterizer (for debugging)" },
+ { "fb", DBG_FB, "Framebuffer (for debugging)" },
+ { "anisohq", DBG_ANISOHQ, "High quality anisotropic filtering (for benchmarking)" },
+ { "notiling", DBG_NO_TILING, "Disable tiling (for benchmarking)" },
+ { "noimmd", DBG_NO_IMMD, "Disable immediate mode (for benchmarking)" },
+ { "fakeocc", DBG_FAKE_OCC, "Use fake occlusion queries (for lulz)" },
+ { "stats", DBG_STATS, "Gather statistics (for lulz)" },
+
+ /* must be last */
+ DEBUG_NAMED_VALUE_END
+};
+
+void r300_init_debug(struct r300_screen * screen)
+{
+ screen->debug = debug_get_flags_option("RADEON_DEBUG", debug_options, 0);
+}
+
+void r500_dump_rs_block(struct r300_rs_block *rs)
+{
+ unsigned count, ip, it_count, ic_count, i, j;
+ unsigned tex_ptr;
+ unsigned col_ptr, col_fmt;
+
+ count = rs->inst_count & 0xf;
+ count++;
+
+ it_count = rs->count & 0x7f;
+ ic_count = (rs->count >> 7) & 0xf;
+
+ fprintf(stderr, "RS Block: %d texcoords (linear), %d colors (perspective)\n",
+ it_count, ic_count);
+ fprintf(stderr, "%d instructions\n", count);
+
+ for (i = 0; i < count; i++) {
+ if (rs->inst[i] & 0x10) {
+ ip = rs->inst[i] & 0xf;
+ fprintf(stderr, "texture: ip %d to psf %d\n",
+ ip, (rs->inst[i] >> 5) & 0x7f);
+
+ tex_ptr = rs->ip[ip] & 0xffffff;
+ fprintf(stderr, " : ");
+
+ j = 3;
+ do {
+ if ((tex_ptr & 0x3f) == 63) {
+ fprintf(stderr, "1.0");
+ } else if ((tex_ptr & 0x3f) == 62) {
+ fprintf(stderr, "0.0");
+ } else {
+ fprintf(stderr, "[%d]", tex_ptr & 0x3f);
+ }
+ } while (j-- && fprintf(stderr, "/"));
+ fprintf(stderr, "\n");
+ }
+
+ if (rs->inst[i] & 0x10000) {
+ ip = (rs->inst[i] >> 12) & 0xf;
+ fprintf(stderr, "color: ip %d to psf %d\n",
+ ip, (rs->inst[i] >> 18) & 0x7f);
+
+ col_ptr = (rs->ip[ip] >> 24) & 0x7;
+ col_fmt = (rs->ip[ip] >> 27) & 0xf;
+ fprintf(stderr, " : offset %d ", col_ptr);
+
+ switch (col_fmt) {
+ case 0:
+ fprintf(stderr, "(R/G/B/A)");
+ break;
+ case 1:
+ fprintf(stderr, "(R/G/B/0)");
+ break;
+ case 2:
+ fprintf(stderr, "(R/G/B/1)");
+ break;
+ case 4:
+ fprintf(stderr, "(0/0/0/A)");
+ break;
+ case 5:
+ fprintf(stderr, "(0/0/0/0)");
+ break;
+ case 6:
+ fprintf(stderr, "(0/0/0/1)");
+ break;
+ case 8:
+ fprintf(stderr, "(1/1/1/A)");
+ break;
+ case 9:
+ fprintf(stderr, "(1/1/1/0)");
+ break;
+ case 10:
+ fprintf(stderr, "(1/1/1/1)");
+ break;
+ }
+ fprintf(stderr, "\n");
+ }
+ }
+}
diff --git a/src/gallium/drivers/r300/r300_defines.h b/src/gallium/drivers/r300/r300_defines.h
new file mode 100644
index 0000000000..d510d80a7b
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_defines.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2010 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef R300_DEFINES_H
+#define R300_DEFINES_H
+
+#include "pipe/p_defines.h"
+
+#define R300_MAX_TEXTURE_LEVELS 13
+#define R300_MAX_DRAW_VBO_SIZE (1024 * 1024)
+
+#define R300_RESOURCE_FLAG_TRANSFER PIPE_RESOURCE_FLAG_DRV_PRIV
+
+#define R300_INVALID_FORMAT 0xffff
+
+/* Tiling flags. */
+enum r300_buffer_tiling {
+ R300_BUFFER_LINEAR = 0,
+ R300_BUFFER_TILED,
+ R300_BUFFER_SQUARETILED
+};
+
+enum r300_buffer_domain { /* bitfield */
+ R300_DOMAIN_GTT = 1,
+ R300_DOMAIN_VRAM = 2
+};
+
+#endif
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
new file mode 100644
index 0000000000..e2c40d823d
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -0,0 +1,1042 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Copyright 2009 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+/* r300_emit: Functions for emitting state. */
+
+#include "util/u_format.h"
+#include "util/u_math.h"
+#include "util/u_simple_list.h"
+
+#include "r300_context.h"
+#include "r300_cs.h"
+#include "r300_emit.h"
+#include "r300_fs.h"
+#include "r300_screen.h"
+#include "r300_screen_buffer.h"
+#include "r300_vs.h"
+
+void r300_emit_blend_state(struct r300_context* r300,
+ unsigned size, void* state)
+{
+ struct r300_blend_state* blend = (struct r300_blend_state*)state;
+ struct pipe_framebuffer_state* fb =
+ (struct pipe_framebuffer_state*)r300->fb_state.state;
+ CS_LOCALS(r300);
+
+ if (fb->nr_cbufs) {
+ WRITE_CS_TABLE(blend->cb, size);
+ } else {
+ WRITE_CS_TABLE(blend->cb_no_readwrite, size);
+ }
+}
+
+void r300_emit_blend_color_state(struct r300_context* r300,
+ unsigned size, void* state)
+{
+ struct r300_blend_color_state* bc = (struct r300_blend_color_state*)state;
+ CS_LOCALS(r300);
+
+ WRITE_CS_TABLE(bc->cb, size);
+}
+
+void r300_emit_clip_state(struct r300_context* r300,
+ unsigned size, void* state)
+{
+ struct r300_clip_state* clip = (struct r300_clip_state*)state;
+ CS_LOCALS(r300);
+
+ WRITE_CS_TABLE(clip->cb, size);
+}
+
+void r300_emit_dsa_state(struct r300_context* r300, unsigned size, void* state)
+{
+ struct r300_dsa_state* dsa = (struct r300_dsa_state*)state;
+ struct pipe_framebuffer_state* fb =
+ (struct pipe_framebuffer_state*)r300->fb_state.state;
+ CS_LOCALS(r300);
+
+ if (fb->zsbuf) {
+ WRITE_CS_TABLE(&dsa->cb_begin, size);
+ } else {
+ WRITE_CS_TABLE(dsa->cb_no_readwrite, size);
+ }
+}
+
+static const float * get_rc_constant_state(
+ struct r300_context * r300,
+ struct rc_constant * constant)
+{
+ struct r300_textures_state* texstate = r300->textures_state.state;
+ static float vec[4] = { 0.0, 0.0, 0.0, 1.0 };
+ struct pipe_resource *tex;
+
+ assert(constant->Type == RC_CONSTANT_STATE);
+
+ switch (constant->u.State[0]) {
+ /* Factor for converting rectangle coords to
+ * normalized coords. Should only show up on non-r500. */
+ case RC_STATE_R300_TEXRECT_FACTOR:
+ tex = texstate->sampler_views[constant->u.State[1]]->base.texture;
+ vec[0] = 1.0 / tex->width0;
+ vec[1] = 1.0 / tex->height0;
+ break;
+
+ case RC_STATE_R300_VIEWPORT_SCALE:
+ vec[0] = r300->viewport.scale[0];
+ vec[1] = r300->viewport.scale[1];
+ vec[2] = r300->viewport.scale[2];
+ break;
+
+ case RC_STATE_R300_VIEWPORT_OFFSET:
+ vec[0] = r300->viewport.translate[0];
+ vec[1] = r300->viewport.translate[1];
+ vec[2] = r300->viewport.translate[2];
+ break;
+
+ default:
+ fprintf(stderr, "r300: Implementation error: "
+ "Unknown RC_CONSTANT type %d\n", constant->u.State[0]);
+ }
+
+ /* This should either be (0, 0, 0, 1), which should be a relatively safe
+ * RGBA or STRQ value, or it could be one of the RC_CONSTANT_STATE
+ * state factors. */
+ return vec;
+}
+
+/* Convert a normal single-precision float into the 7.16 format
+ * used by the R300 fragment shader.
+ */
+uint32_t pack_float24(float f)
+{
+ union {
+ float fl;
+ uint32_t u;
+ } u;
+ float mantissa;
+ int exponent;
+ uint32_t float24 = 0;
+
+ if (f == 0.0)
+ return 0;
+
+ u.fl = f;
+
+ mantissa = frexpf(f, &exponent);
+
+ /* Handle -ve */
+ if (mantissa < 0) {
+ float24 |= (1 << 23);
+ mantissa = mantissa * -1.0;
+ }
+ /* Handle exponent, bias of 63 */
+ exponent += 62;
+ float24 |= (exponent << 16);
+ /* Kill 7 LSB of mantissa */
+ float24 |= (u.u & 0x7FFFFF) >> 7;
+
+ return float24;
+}
+
+void r300_emit_fs(struct r300_context* r300, unsigned size, void *state)
+{
+ struct r300_fragment_shader *fs = r300_fs(r300);
+ CS_LOCALS(r300);
+
+ WRITE_CS_TABLE(fs->shader->cb_code, fs->shader->cb_code_size);
+}
+
+void r300_emit_fs_constants(struct r300_context* r300, unsigned size, void *state)
+{
+ struct r300_fragment_shader *fs = r300_fs(r300);
+ struct r300_constant_buffer *buf = (struct r300_constant_buffer*)state;
+ unsigned count = fs->shader->externals_count * 4;
+ CS_LOCALS(r300);
+
+ if (count == 0)
+ return;
+
+ BEGIN_CS(size);
+ OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X, count);
+ OUT_CS_TABLE(buf->constants, count);
+ END_CS;
+}
+
+void r300_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, void *state)
+{
+ struct r300_fragment_shader *fs = r300_fs(r300);
+ struct rc_constant_list *constants = &fs->shader->code.constants;
+ unsigned i;
+ unsigned count = fs->shader->rc_state_count;
+ unsigned first = fs->shader->externals_count;
+ unsigned end = constants->Count;
+ uint32_t cdata[4];
+ unsigned j;
+ CS_LOCALS(r300);
+
+ if (count == 0)
+ return;
+
+ BEGIN_CS(size);
+ for(i = first; i < end; ++i) {
+ if (constants->Constants[i].Type == RC_CONSTANT_STATE) {
+ const float *data =
+ get_rc_constant_state(r300, &constants->Constants[i]);
+
+ for (j = 0; j < 4; j++)
+ cdata[j] = pack_float24(data[j]);
+
+ OUT_CS_REG_SEQ(R300_PFS_PARAM_0_X + i * 16, 4);
+ OUT_CS_TABLE(cdata, 4);
+ }
+ }
+ END_CS;
+}
+
+void r500_emit_fs(struct r300_context* r300, unsigned size, void *state)
+{
+ struct r300_fragment_shader *fs = r300_fs(r300);
+ CS_LOCALS(r300);
+
+ WRITE_CS_TABLE(fs->shader->cb_code, fs->shader->cb_code_size);
+}
+
+void r500_emit_fs_constants(struct r300_context* r300, unsigned size, void *state)
+{
+ struct r300_fragment_shader *fs = r300_fs(r300);
+ struct r300_constant_buffer *buf = (struct r300_constant_buffer*)state;
+ unsigned count = fs->shader->externals_count * 4;
+ CS_LOCALS(r300);
+
+ if (count == 0)
+ return;
+
+ BEGIN_CS(size);
+ OUT_CS_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_CONST);
+ OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, count);
+ OUT_CS_TABLE(buf->constants, count);
+ END_CS;
+}
+
+void r500_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, void *state)
+{
+ struct r300_fragment_shader *fs = r300_fs(r300);
+ struct rc_constant_list *constants = &fs->shader->code.constants;
+ unsigned i;
+ unsigned count = fs->shader->rc_state_count;
+ unsigned first = fs->shader->externals_count;
+ unsigned end = constants->Count;
+ CS_LOCALS(r300);
+
+ if (count == 0)
+ return;
+
+ BEGIN_CS(size);
+ for(i = first; i < end; ++i) {
+ if (constants->Constants[i].Type == RC_CONSTANT_STATE) {
+ const float *data =
+ get_rc_constant_state(r300, &constants->Constants[i]);
+
+ OUT_CS_REG(R500_GA_US_VECTOR_INDEX,
+ R500_GA_US_VECTOR_INDEX_TYPE_CONST |
+ (i & R500_GA_US_VECTOR_INDEX_MASK));
+ OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, 4);
+ OUT_CS_TABLE(data, 4);
+ }
+ }
+ END_CS;
+}
+
+void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state)
+{
+ struct pipe_framebuffer_state* fb = (struct pipe_framebuffer_state*)state;
+ struct r300_surface* surf;
+ unsigned i;
+ CS_LOCALS(r300);
+
+ BEGIN_CS(size);
+
+ /* Set up scissors.
+ * By writing to the SC registers, SC & US assert idle. */
+ OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2);
+ if (r300->screen->caps.is_r500) {
+ OUT_CS(0);
+ OUT_CS(((fb->width - 1) << R300_SCISSORS_X_SHIFT) |
+ ((fb->height - 1) << R300_SCISSORS_Y_SHIFT));
+ } else {
+ OUT_CS((1440 << R300_SCISSORS_X_SHIFT) |
+ (1440 << R300_SCISSORS_Y_SHIFT));
+ OUT_CS(((fb->width + 1440-1) << R300_SCISSORS_X_SHIFT) |
+ ((fb->height + 1440-1) << R300_SCISSORS_Y_SHIFT));
+ }
+
+ /* Flush and free renderbuffer caches. */
+ OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT,
+ R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS |
+ R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D);
+ OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT,
+ R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE |
+ R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
+
+ /* Wait until the GPU is idle.
+ * This fixes random pixels sometimes appearing probably caused
+ * by incomplete rendering. */
+ OUT_CS_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
+
+ /* NUM_MULTIWRITES replicates COLOR[0] to all colorbuffers, which is not
+ * what we usually want. */
+ if (r300->screen->caps.is_r500) {
+ OUT_CS_REG(R300_RB3D_CCTL,
+ R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_ENABLE);
+ } else {
+ OUT_CS_REG(R300_RB3D_CCTL, 0);
+ }
+
+ /* Set up colorbuffers. */
+ for (i = 0; i < fb->nr_cbufs; i++) {
+ surf = r300_surface(fb->cbufs[i]);
+
+ OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1);
+ OUT_CS_RELOC(surf->buffer, surf->offset, 0, surf->domain, 0);
+
+ OUT_CS_REG_SEQ(R300_RB3D_COLORPITCH0 + (4 * i), 1);
+ OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain, 0);
+
+ OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), surf->format);
+ }
+ for (; i < 4; i++) {
+ OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), R300_US_OUT_FMT_UNUSED);
+ }
+
+ /* Set up a zbuffer. */
+ if (fb->zsbuf) {
+ surf = r300_surface(fb->zsbuf);
+
+ OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1);
+ OUT_CS_RELOC(surf->buffer, surf->offset, 0, surf->domain, 0);
+
+ OUT_CS_REG(R300_ZB_FORMAT, surf->format);
+
+ OUT_CS_REG_SEQ(R300_ZB_DEPTHPITCH, 1);
+ OUT_CS_RELOC(surf->buffer, surf->pitch, 0, surf->domain, 0);
+ }
+ END_CS;
+}
+
+void r300_emit_query_start(struct r300_context *r300, unsigned size, void*state)
+{
+ struct r300_query *query = r300->query_current;
+ CS_LOCALS(r300);
+
+ if (!query)
+ return;
+
+ BEGIN_CS(size);
+ if (r300->screen->caps.family == CHIP_FAMILY_RV530) {
+ OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL);
+ } else {
+ OUT_CS_REG(R300_SU_REG_DEST, R300_RASTER_PIPE_SELECT_ALL);
+ }
+ OUT_CS_REG(R300_ZB_ZPASS_DATA, 0);
+ END_CS;
+ query->begin_emitted = TRUE;
+ query->flushed = FALSE;
+}
+
+static void r300_emit_query_end_frag_pipes(struct r300_context *r300,
+ struct r300_query *query)
+{
+ struct r300_capabilities* caps = &r300->screen->caps;
+ struct r300_winsys_buffer *buf = r300->query_current->buffer;
+ CS_LOCALS(r300);
+
+ assert(caps->num_frag_pipes);
+
+ BEGIN_CS(6 * caps->num_frag_pipes + 2);
+ /* I'm not so sure I like this switch, but it's hard to be elegant
+ * when there's so many special cases...
+ *
+ * So here's the basic idea. For each pipe, enable writes to it only,
+ * then put out the relocation for ZPASS_ADDR, taking into account a
+ * 4-byte offset for each pipe. RV380 and older are special; they have
+ * only two pipes, and the second pipe's enable is on bit 3, not bit 1,
+ * so there's a chipset cap for that. */
+ switch (caps->num_frag_pipes) {
+ case 4:
+ /* pipe 3 only */
+ OUT_CS_REG(R300_SU_REG_DEST, 1 << 3);
+ OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
+ OUT_CS_RELOC(buf, (query->num_results + 3) * 4,
+ 0, query->domain, 0);
+ case 3:
+ /* pipe 2 only */
+ OUT_CS_REG(R300_SU_REG_DEST, 1 << 2);
+ OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
+ OUT_CS_RELOC(buf, (query->num_results + 2) * 4,
+ 0, query->domain, 0);
+ case 2:
+ /* pipe 1 only */
+ /* As mentioned above, accomodate RV380 and older. */
+ OUT_CS_REG(R300_SU_REG_DEST,
+ 1 << (caps->high_second_pipe ? 3 : 1));
+ OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
+ OUT_CS_RELOC(buf, (query->num_results + 1) * 4,
+ 0, query->domain, 0);
+ case 1:
+ /* pipe 0 only */
+ OUT_CS_REG(R300_SU_REG_DEST, 1 << 0);
+ OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
+ OUT_CS_RELOC(buf, (query->num_results + 0) * 4,
+ 0, query->domain, 0);
+ break;
+ default:
+ fprintf(stderr, "r300: Implementation error: Chipset reports %d"
+ " pixel pipes!\n", caps->num_frag_pipes);
+ abort();
+ }
+
+ /* And, finally, reset it to normal... */
+ OUT_CS_REG(R300_SU_REG_DEST, 0xF);
+ END_CS;
+}
+
+static void rv530_emit_query_end_single_z(struct r300_context *r300,
+ struct r300_query *query)
+{
+ struct r300_winsys_buffer *buf = r300->query_current->buffer;
+ CS_LOCALS(r300);
+
+ BEGIN_CS(8);
+ OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0);
+ OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
+ OUT_CS_RELOC(buf, query->num_results * 4, 0, query->domain, 0);
+ OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL);
+ END_CS;
+}
+
+static void rv530_emit_query_end_double_z(struct r300_context *r300,
+ struct r300_query *query)
+{
+ struct r300_winsys_buffer *buf = r300->query_current->buffer;
+ CS_LOCALS(r300);
+
+ BEGIN_CS(14);
+ OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_0);
+ OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
+ OUT_CS_RELOC(buf, (query->num_results + 0) * 4, 0, query->domain, 0);
+ OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_1);
+ OUT_CS_REG_SEQ(R300_ZB_ZPASS_ADDR, 1);
+ OUT_CS_RELOC(buf, (query->num_results + 1) * 4, 0, query->domain, 0);
+ OUT_CS_REG(RV530_FG_ZBREG_DEST, RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL);
+ END_CS;
+}
+
+void r300_emit_query_end(struct r300_context* r300)
+{
+ struct r300_capabilities *caps = &r300->screen->caps;
+ struct r300_query *query = r300->query_current;
+
+ if (!query)
+ return;
+
+ if (query->begin_emitted == FALSE)
+ return;
+
+ if (caps->family == CHIP_FAMILY_RV530) {
+ if (caps->num_z_pipes == 2)
+ rv530_emit_query_end_double_z(r300, query);
+ else
+ rv530_emit_query_end_single_z(r300, query);
+ } else
+ r300_emit_query_end_frag_pipes(r300, query);
+
+ query->begin_emitted = FALSE;
+ query->num_results += query->num_pipes;
+
+ /* XXX grab all the results and reset the counter. */
+ if (query->num_results >= query->buffer_size / 4 - 4) {
+ query->num_results = (query->buffer_size / 4) / 2;
+ fprintf(stderr, "r300: Rewinding OQBO...\n");
+ }
+}
+
+void r300_emit_rs_state(struct r300_context* r300, unsigned size, void* state)
+{
+ struct r300_rs_state* rs = state;
+ struct pipe_framebuffer_state* fb = r300->fb_state.state;
+ float scale, offset;
+ unsigned mspos0, mspos1, aa_config;
+ CS_LOCALS(r300);
+
+ BEGIN_CS(size);
+ OUT_CS_REG(R300_VAP_CNTL_STATUS, rs->vap_control_status);
+
+ /* Multisampling. Depends on framebuffer sample count. */
+ if (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0)) {
+ if (fb->nr_cbufs && fb->cbufs[0]->texture->nr_samples > 1) {
+ aa_config = R300_GB_AA_CONFIG_AA_ENABLE;
+ /* Subsample placement. These may not be optimal. */
+ switch (fb->cbufs[0]->texture->nr_samples) {
+ case 2:
+ aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2;
+ mspos0 = 0x33996633;
+ mspos1 = 0x6666663;
+ break;
+ case 3:
+ aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3;
+ mspos0 = 0x33936933;
+ mspos1 = 0x6666663;
+ break;
+ case 4:
+ aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4;
+ mspos0 = 0x33939933;
+ mspos1 = 0x3966663;
+ break;
+ case 6:
+ aa_config |= R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6;
+ mspos0 = 0x22a2aa22;
+ mspos1 = 0x2a65672;
+ break;
+ default:
+ debug_printf("r300: Bad number of multisamples!\n");
+ mspos0 = rs->multisample_position_0;
+ mspos1 = rs->multisample_position_1;
+ break;
+ }
+
+ OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2);
+ OUT_CS(mspos0);
+ OUT_CS(mspos1);
+
+ OUT_CS_REG(R300_GB_AA_CONFIG, aa_config);
+ } else {
+ OUT_CS_REG_SEQ(R300_GB_MSPOS0, 2);
+ OUT_CS(rs->multisample_position_0);
+ OUT_CS(rs->multisample_position_1);
+
+ OUT_CS_REG(R300_GB_AA_CONFIG, rs->antialiasing_config);
+ }
+ }
+
+ OUT_CS_REG(R300_GA_POINT_SIZE, rs->point_size);
+ OUT_CS_REG_SEQ(R300_GA_POINT_MINMAX, 2);
+ OUT_CS(rs->point_minmax);
+ OUT_CS(rs->line_control);
+
+ if (rs->polygon_offset_enable) {
+ scale = rs->depth_scale * 12;
+ offset = rs->depth_offset;
+
+ switch (r300->zbuffer_bpp) {
+ case 16:
+ offset *= 4;
+ break;
+ case 24:
+ offset *= 2;
+ break;
+ }
+
+ OUT_CS_REG_SEQ(R300_SU_POLY_OFFSET_FRONT_SCALE, 4);
+ OUT_CS_32F(scale);
+ OUT_CS_32F(offset);
+ OUT_CS_32F(scale);
+ OUT_CS_32F(offset);
+ }
+
+ OUT_CS_REG_SEQ(R300_SU_POLY_OFFSET_ENABLE, 2);
+ OUT_CS(rs->polygon_offset_enable);
+ OUT_CS(rs->cull_mode);
+ OUT_CS_REG(R300_GA_LINE_STIPPLE_CONFIG, rs->line_stipple_config);
+ OUT_CS_REG(R300_GA_LINE_STIPPLE_VALUE, rs->line_stipple_value);
+ OUT_CS_REG(R300_GA_POLY_MODE, rs->polygon_mode);
+ OUT_CS_REG(R300_SC_CLIP_RULE, rs->clip_rule);
+ OUT_CS_REG(R300_GB_ENABLE, rs->stuffing_enable);
+ OUT_CS_REG_SEQ(R300_GA_POINT_S0, 4);
+ OUT_CS_32F(rs->point_texcoord_left);
+ OUT_CS_32F(rs->point_texcoord_bottom);
+ OUT_CS_32F(rs->point_texcoord_right);
+ OUT_CS_32F(rs->point_texcoord_top);
+ END_CS;
+}
+
+void r300_emit_rs_block_state(struct r300_context* r300,
+ unsigned size, void* state)
+{
+ struct r300_rs_block* rs = (struct r300_rs_block*)state;
+ unsigned i;
+ /* It's the same for both INST and IP tables */
+ unsigned count = (rs->inst_count & R300_RS_INST_COUNT_MASK) + 1;
+ CS_LOCALS(r300);
+
+ if (SCREEN_DBG_ON(r300->screen, DBG_DRAW)) {
+ r500_dump_rs_block(rs);
+ }
+
+ DBG(r300, DBG_DRAW, "r300: RS emit:\n");
+
+ BEGIN_CS(size);
+ OUT_CS_REG_SEQ(R300_VAP_VTX_STATE_CNTL, 2);
+ OUT_CS(rs->vap_vtx_state_cntl);
+ OUT_CS(rs->vap_vsm_vtx_assm);
+ OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2);
+ OUT_CS(rs->vap_out_vtx_fmt[0]);
+ OUT_CS(rs->vap_out_vtx_fmt[1]);
+
+ if (r300->screen->caps.is_r500) {
+ OUT_CS_REG_SEQ(R500_RS_IP_0, count);
+ } else {
+ OUT_CS_REG_SEQ(R300_RS_IP_0, count);
+ }
+ OUT_CS_TABLE(rs->ip, count);
+ for (i = 0; i < count; i++) {
+ DBG(r300, DBG_DRAW, " : ip %d: 0x%08x\n", i, rs->ip[i]);
+ }
+
+ OUT_CS_REG_SEQ(R300_RS_COUNT, 2);
+ OUT_CS(rs->count);
+ OUT_CS(rs->inst_count);
+
+ if (r300->screen->caps.is_r500) {
+ OUT_CS_REG_SEQ(R500_RS_INST_0, count);
+ } else {
+ OUT_CS_REG_SEQ(R300_RS_INST_0, count);
+ }
+ OUT_CS_TABLE(rs->inst, count);
+ for (i = 0; i < count; i++) {
+ DBG(r300, DBG_DRAW, " : inst %d: 0x%08x\n", i, rs->inst[i]);
+ }
+
+ DBG(r300, DBG_DRAW, " : count: 0x%08x inst_count: 0x%08x\n",
+ rs->count, rs->inst_count);
+
+ END_CS;
+}
+
+void r300_emit_scissor_state(struct r300_context* r300,
+ unsigned size, void* state)
+{
+ struct pipe_scissor_state* scissor = (struct pipe_scissor_state*)state;
+ CS_LOCALS(r300);
+
+ BEGIN_CS(size);
+ OUT_CS_REG_SEQ(R300_SC_CLIPRECT_TL_0, 2);
+ if (r300->screen->caps.is_r500) {
+ OUT_CS((scissor->minx << R300_CLIPRECT_X_SHIFT) |
+ (scissor->miny << R300_CLIPRECT_Y_SHIFT));
+ OUT_CS(((scissor->maxx - 1) << R300_CLIPRECT_X_SHIFT) |
+ ((scissor->maxy - 1) << R300_CLIPRECT_Y_SHIFT));
+ } else {
+ OUT_CS(((scissor->minx + 1440) << R300_CLIPRECT_X_SHIFT) |
+ ((scissor->miny + 1440) << R300_CLIPRECT_Y_SHIFT));
+ OUT_CS(((scissor->maxx + 1440-1) << R300_CLIPRECT_X_SHIFT) |
+ ((scissor->maxy + 1440-1) << R300_CLIPRECT_Y_SHIFT));
+ }
+ END_CS;
+}
+
+void r300_emit_textures_state(struct r300_context *r300,
+ unsigned size, void *state)
+{
+ struct r300_textures_state *allstate = (struct r300_textures_state*)state;
+ struct r300_texture_sampler_state *texstate;
+ struct r300_texture *tex;
+ unsigned i;
+ CS_LOCALS(r300);
+
+ BEGIN_CS(size);
+ OUT_CS_REG(R300_TX_ENABLE, allstate->tx_enable);
+
+ for (i = 0; i < allstate->count; i++) {
+ if ((1 << i) & allstate->tx_enable) {
+ texstate = &allstate->regs[i];
+ tex = r300_texture(allstate->sampler_views[i]->base.texture);
+
+ OUT_CS_REG(R300_TX_FILTER0_0 + (i * 4), texstate->filter0);
+ OUT_CS_REG(R300_TX_FILTER1_0 + (i * 4), texstate->filter1);
+ OUT_CS_REG(R300_TX_BORDER_COLOR_0 + (i * 4),
+ texstate->border_color);
+
+ OUT_CS_REG(R300_TX_FORMAT0_0 + (i * 4), texstate->format.format0);
+ OUT_CS_REG(R300_TX_FORMAT1_0 + (i * 4), texstate->format.format1);
+ OUT_CS_REG(R300_TX_FORMAT2_0 + (i * 4), texstate->format.format2);
+
+ OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (i * 4), 1);
+ OUT_CS_TEX_RELOC(tex, texstate->format.tile_config, tex->domain,
+ 0, 0);
+ }
+ }
+ END_CS;
+}
+
+void r300_emit_aos(struct r300_context* r300, int offset, boolean indexed)
+{
+ struct pipe_vertex_buffer *vb1, *vb2, *vbuf = r300->vertex_buffer;
+ struct pipe_vertex_element *velem = r300->velems->velem;
+ struct r300_buffer *buf;
+ int i;
+ unsigned *hw_format_size = r300->velems->hw_format_size;
+ unsigned size1, size2, aos_count = r300->velems->count;
+ unsigned packet_size = (aos_count * 3 + 1) / 2;
+ CS_LOCALS(r300);
+
+ BEGIN_CS(2 + packet_size + aos_count * 2);
+ OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, packet_size);
+ OUT_CS(aos_count | (!indexed ? R300_VC_FORCE_PREFETCH : 0));
+
+ for (i = 0; i < aos_count - 1; i += 2) {
+ vb1 = &vbuf[velem[i].vertex_buffer_index];
+ vb2 = &vbuf[velem[i+1].vertex_buffer_index];
+ size1 = hw_format_size[i];
+ size2 = hw_format_size[i+1];
+
+ OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride) |
+ R300_VBPNTR_SIZE1(size2) | R300_VBPNTR_STRIDE1(vb2->stride));
+ OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride);
+ OUT_CS(vb2->buffer_offset + velem[i+1].src_offset + offset * vb2->stride);
+ }
+
+ if (aos_count & 1) {
+ vb1 = &vbuf[velem[i].vertex_buffer_index];
+ size1 = hw_format_size[i];
+
+ OUT_CS(R300_VBPNTR_SIZE0(size1) | R300_VBPNTR_STRIDE0(vb1->stride));
+ OUT_CS(vb1->buffer_offset + velem[i].src_offset + offset * vb1->stride);
+ }
+
+ for (i = 0; i < aos_count; i++) {
+ buf = r300_buffer(vbuf[velem[i].vertex_buffer_index].buffer);
+ OUT_CS_BUF_RELOC_NO_OFFSET(&buf->b.b, buf->domain, 0, 0);
+ }
+ END_CS;
+}
+
+void r300_emit_aos_swtcl(struct r300_context *r300, boolean indexed)
+{
+ CS_LOCALS(r300);
+
+ DBG(r300, DBG_DRAW, "r300: Preparing vertex buffer %p for render, "
+ "vertex size %d\n", r300->vbo,
+ r300->vertex_info.size);
+ /* Set the pointer to our vertex buffer. The emitted values are this:
+ * PACKET3 [3D_LOAD_VBPNTR]
+ * COUNT [1]
+ * FORMAT [size | stride << 8]
+ * OFFSET [offset into BO]
+ * VBPNTR [relocated BO]
+ */
+ BEGIN_CS(7);
+ OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, 3);
+ OUT_CS(1 | (!indexed ? R300_VC_FORCE_PREFETCH : 0));
+ OUT_CS(r300->vertex_info.size |
+ (r300->vertex_info.size << 8));
+ OUT_CS(r300->vbo_offset);
+ OUT_CS_BUF_RELOC(r300->vbo, 0, r300_buffer(r300->vbo)->domain, 0, 0);
+ END_CS;
+}
+
+void r300_emit_vertex_stream_state(struct r300_context* r300,
+ unsigned size, void* state)
+{
+ struct r300_vertex_stream_state *streams =
+ (struct r300_vertex_stream_state*)state;
+ unsigned i;
+ CS_LOCALS(r300);
+
+ DBG(r300, DBG_DRAW, "r300: PSC emit:\n");
+
+ BEGIN_CS(size);
+ OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_0, streams->count);
+ OUT_CS_TABLE(streams->vap_prog_stream_cntl, streams->count);
+ for (i = 0; i < streams->count; i++) {
+ DBG(r300, DBG_DRAW, " : prog_stream_cntl%d: 0x%08x\n", i,
+ streams->vap_prog_stream_cntl[i]);
+ }
+ OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_EXT_0, streams->count);
+ OUT_CS_TABLE(streams->vap_prog_stream_cntl_ext, streams->count);
+ for (i = 0; i < streams->count; i++) {
+ DBG(r300, DBG_DRAW, " : prog_stream_cntl_ext%d: 0x%08x\n", i,
+ streams->vap_prog_stream_cntl_ext[i]);
+ }
+ END_CS;
+}
+
+void r300_emit_pvs_flush(struct r300_context* r300, unsigned size, void* state)
+{
+ CS_LOCALS(r300);
+
+ BEGIN_CS(size);
+ OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0);
+ END_CS;
+}
+
+void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state)
+{
+ struct r300_vertex_shader* vs = (struct r300_vertex_shader*)state;
+ struct r300_vertex_program_code* code = &vs->code;
+ struct r300_screen* r300screen = r300->screen;
+ unsigned instruction_count = code->length / 4;
+ unsigned i;
+
+ unsigned vtx_mem_size = r300screen->caps.is_r500 ? 128 : 72;
+ unsigned input_count = MAX2(util_bitcount(code->InputsRead), 1);
+ unsigned output_count = MAX2(util_bitcount(code->OutputsWritten), 1);
+ unsigned temp_count = MAX2(code->num_temporaries, 1);
+
+ unsigned pvs_num_slots = MIN3(vtx_mem_size / input_count,
+ vtx_mem_size / output_count, 10);
+ unsigned pvs_num_controllers = MIN2(vtx_mem_size / temp_count, 6);
+
+ unsigned imm_first = vs->externals_count;
+ unsigned imm_end = vs->code.constants.Count;
+ unsigned imm_count = vs->immediates_count;
+
+ CS_LOCALS(r300);
+
+ BEGIN_CS(size);
+ /* R300_VAP_PVS_CODE_CNTL_0
+ * R300_VAP_PVS_CONST_CNTL
+ * R300_VAP_PVS_CODE_CNTL_1
+ * See the r5xx docs for instructions on how to use these. */
+ OUT_CS_REG_SEQ(R300_VAP_PVS_CODE_CNTL_0, 3);
+ OUT_CS(R300_PVS_FIRST_INST(0) |
+ R300_PVS_XYZW_VALID_INST(instruction_count - 1) |
+ R300_PVS_LAST_INST(instruction_count - 1));
+ OUT_CS(R300_PVS_MAX_CONST_ADDR(code->constants.Count - 1));
+ OUT_CS(instruction_count - 1);
+
+ OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, 0);
+ OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, code->length);
+ OUT_CS_TABLE(code->body.d, code->length);
+
+ OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(pvs_num_slots) |
+ R300_PVS_NUM_CNTLRS(pvs_num_controllers) |
+ R300_PVS_NUM_FPUS(r300screen->caps.num_vert_fpus) |
+ R300_PVS_VF_MAX_VTX_NUM(12) |
+ (r300screen->caps.is_r500 ? R500_TCL_STATE_OPTIMIZATION : 0));
+
+ /* Emit immediates. */
+ if (imm_count) {
+ OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG,
+ (r300->screen->caps.is_r500 ?
+ R500_PVS_CONST_START : R300_PVS_CONST_START) +
+ imm_first);
+ OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, imm_count * 4);
+ for (i = imm_first; i < imm_end; i++) {
+ const float *data = vs->code.constants.Constants[i].u.Immediate;
+ OUT_CS_TABLE(data, 4);
+ }
+ }
+ END_CS;
+}
+
+void r300_emit_vs_constants(struct r300_context* r300,
+ unsigned size, void *state)
+{
+ unsigned count =
+ ((struct r300_vertex_shader*)r300->vs_state.state)->externals_count;
+ struct r300_constant_buffer *buf = (struct r300_constant_buffer*)state;
+ CS_LOCALS(r300);
+
+ if (!count)
+ return;
+
+ BEGIN_CS(size);
+ OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG,
+ (r300->screen->caps.is_r500 ?
+ R500_PVS_CONST_START : R300_PVS_CONST_START));
+ OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, count * 4);
+ OUT_CS_TABLE(buf->constants, count * 4);
+ END_CS;
+}
+
+void r300_emit_viewport_state(struct r300_context* r300,
+ unsigned size, void* state)
+{
+ struct r300_viewport_state* viewport = (struct r300_viewport_state*)state;
+ CS_LOCALS(r300);
+
+ BEGIN_CS(size);
+ OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6);
+ OUT_CS_TABLE(&viewport->xscale, 6);
+ OUT_CS_REG(R300_VAP_VTE_CNTL, viewport->vte_control);
+ END_CS;
+}
+
+void r300_emit_ztop_state(struct r300_context* r300,
+ unsigned size, void* state)
+{
+ struct r300_ztop_state* ztop = (struct r300_ztop_state*)state;
+ CS_LOCALS(r300);
+
+ BEGIN_CS(size);
+ OUT_CS_REG(R300_ZB_ZTOP, ztop->z_buffer_top);
+ END_CS;
+}
+
+void r300_emit_texture_cache_inval(struct r300_context* r300, unsigned size, void* state)
+{
+ CS_LOCALS(r300);
+
+ BEGIN_CS(size);
+ OUT_CS_REG(R300_TX_INVALTAGS, 0);
+ END_CS;
+}
+
+void r300_emit_buffer_validate(struct r300_context *r300,
+ boolean do_validate_vertex_buffers,
+ struct pipe_resource *index_buffer)
+{
+ struct pipe_framebuffer_state* fb =
+ (struct pipe_framebuffer_state*)r300->fb_state.state;
+ struct r300_textures_state *texstate =
+ (struct r300_textures_state*)r300->textures_state.state;
+ struct r300_texture* tex;
+ struct pipe_vertex_buffer *vbuf = r300->vertex_buffer;
+ struct pipe_vertex_element *velem = r300->velems->velem;
+ struct pipe_resource *pbuf;
+ unsigned i;
+ boolean invalid = FALSE;
+
+ /* upload buffers first */
+ if (r300->screen->caps.has_tcl && r300->any_user_vbs) {
+ r300_upload_user_buffers(r300);
+ r300->any_user_vbs = false;
+ }
+
+ /* Clean out BOs. */
+ r300->rws->reset_bos(r300->rws);
+
+validate:
+ /* Color buffers... */
+ for (i = 0; i < fb->nr_cbufs; i++) {
+ tex = r300_texture(fb->cbufs[i]->texture);
+ assert(tex && tex->buffer && "cbuf is marked, but NULL!");
+ if (!r300_add_texture(r300->rws, tex, 0, tex->domain)) {
+ r300->context.flush(&r300->context, 0, NULL);
+ goto validate;
+ }
+ }
+ /* ...depth buffer... */
+ if (fb->zsbuf) {
+ tex = r300_texture(fb->zsbuf->texture);
+ assert(tex && tex->buffer && "zsbuf is marked, but NULL!");
+ if (!r300_add_texture(r300->rws, tex,
+ 0, tex->domain)) {
+ r300->context.flush(&r300->context, 0, NULL);
+ goto validate;
+ }
+ }
+ /* ...textures... */
+ for (i = 0; i < texstate->count; i++) {
+ if (!(texstate->tx_enable & (1 << i))) {
+ continue;
+ }
+
+ tex = r300_texture(texstate->sampler_views[i]->base.texture);
+ if (!r300_add_texture(r300->rws, tex, tex->domain, 0)) {
+ r300->context.flush(&r300->context, 0, NULL);
+ goto validate;
+ }
+ }
+ /* ...occlusion query buffer... */
+ if (r300->query_current) {
+ if (!r300->rws->add_buffer(r300->rws, r300->query_current->buffer,
+ 0, r300->query_current->domain)) {
+ r300->context.flush(&r300->context, 0, NULL);
+ goto validate;
+ }
+ }
+ /* ...vertex buffer for SWTCL path... */
+ if (r300->vbo) {
+ if (!r300_add_buffer(r300->rws, r300->vbo,
+ r300_buffer(r300->vbo)->domain, 0)) {
+ r300->context.flush(&r300->context, 0, NULL);
+ goto validate;
+ }
+ }
+ /* ...vertex buffers for HWTCL path... */
+ if (do_validate_vertex_buffers) {
+ for (i = 0; i < r300->velems->count; i++) {
+ pbuf = vbuf[velem[i].vertex_buffer_index].buffer;
+
+ if (!r300_add_buffer(r300->rws, pbuf,
+ r300_buffer(pbuf)->domain, 0)) {
+ r300->context.flush(&r300->context, 0, NULL);
+ goto validate;
+ }
+ }
+ }
+ /* ...and index buffer for HWTCL path. */
+ if (index_buffer) {
+ if (!r300_add_buffer(r300->rws, index_buffer,
+ r300_buffer(index_buffer)->domain, 0)) {
+ r300->context.flush(&r300->context, 0, NULL);
+ goto validate;
+ }
+ }
+ if (!r300->rws->validate(r300->rws)) {
+ r300->context.flush(&r300->context, 0, NULL);
+ if (invalid) {
+ /* Well, hell. */
+ fprintf(stderr, "r300: Stuck in validation loop, gonna quit now.\n");
+ abort();
+ }
+ invalid = TRUE;
+ goto validate;
+ }
+}
+
+unsigned r300_get_num_dirty_dwords(struct r300_context *r300)
+{
+ struct r300_atom* atom;
+ unsigned dwords = 0;
+
+ foreach(atom, &r300->atom_list) {
+ if (atom->dirty) {
+ dwords += atom->size;
+ }
+ }
+
+ /* let's reserve some more, just in case */
+ dwords += 32;
+
+ return dwords;
+}
+
+/* Emit all dirty state. */
+void r300_emit_dirty_state(struct r300_context* r300)
+{
+ struct r300_atom* atom;
+
+ foreach(atom, &r300->atom_list) {
+ if (atom->dirty) {
+ atom->emit(r300, atom->size, atom->state);
+ if (SCREEN_DBG_ON(r300->screen, DBG_STATS)) {
+ atom->counter++;
+ }
+ atom->dirty = FALSE;
+ }
+ }
+
+ r300->dirty_hw++;
+}
diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h
new file mode 100644
index 0000000000..36a29894d0
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_emit.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef R300_EMIT_H
+#define R300_EMIT_H
+
+#include "r300_context.h"
+#include "radeon_code.h"
+
+struct rX00_fragment_program_code;
+struct r300_vertex_program_code;
+
+uint32_t pack_float24(float f);
+
+void r300_emit_aos(struct r300_context* r300, int offset, boolean indexed);
+
+void r300_emit_blend_state(struct r300_context* r300,
+ unsigned size, void* state);
+
+void r300_emit_blend_color_state(struct r300_context* r300,
+ unsigned size, void* state);
+
+void r300_emit_clip_state(struct r300_context* r300,
+ unsigned size, void* state);
+
+void r300_emit_dsa_state(struct r300_context* r300,
+ unsigned size, void* state);
+
+void r300_emit_fs(struct r300_context* r300, unsigned size, void *state);
+
+void r300_emit_fs_constants(struct r300_context* r300, unsigned size, void *state);
+
+void r300_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, void *state);
+
+void r500_emit_fs(struct r300_context* r300, unsigned size, void *state);
+
+void r500_emit_fs_constants(struct r300_context* r300, unsigned size, void *state);
+
+void r500_emit_fs_rc_constant_state(struct r300_context* r300, unsigned size, void *state);
+
+void r300_emit_fb_state(struct r300_context* r300, unsigned size, void* state);
+
+void r300_emit_query_start(struct r300_context *r300, unsigned size, void *state);
+
+void r300_emit_query_end(struct r300_context* r300);
+
+void r300_emit_rs_state(struct r300_context* r300, unsigned size, void* state);
+
+void r300_emit_rs_block_state(struct r300_context* r300,
+ unsigned size, void* state);
+
+void r300_emit_scissor_state(struct r300_context* r300,
+ unsigned size, void* state);
+
+void r300_emit_textures_state(struct r300_context *r300,
+ unsigned size, void *state);
+
+void r300_emit_aos_swtcl(struct r300_context *r300, boolean indexed);
+
+void r300_emit_vertex_stream_state(struct r300_context* r300,
+ unsigned size, void* state);
+
+void r300_emit_vs_constants(struct r300_context* r300,
+ unsigned size, void *state);
+
+void r300_emit_vs_state(struct r300_context* r300, unsigned size, void* state);
+
+void r300_emit_viewport_state(struct r300_context* r300,
+ unsigned size, void* state);
+
+void r300_emit_ztop_state(struct r300_context* r300,
+ unsigned size, void* state);
+
+void r300_emit_pvs_flush(struct r300_context* r300, unsigned size, void* state);
+
+void r300_emit_texture_cache_inval(struct r300_context* r300, unsigned size, void* state);
+
+unsigned r300_get_num_dirty_dwords(struct r300_context *r300);
+
+/* Emit all dirty state. */
+void r300_emit_dirty_state(struct r300_context* r300);
+
+void r300_emit_buffer_validate(struct r300_context *r300,
+ boolean do_validate_vertex_buffers,
+ struct pipe_resource *index_buffer);
+
+#endif /* R300_EMIT_H */
diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c
new file mode 100644
index 0000000000..ba840bfff8
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_flush.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Copyright 2010 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "draw/draw_context.h"
+#include "draw/draw_private.h"
+
+#include "util/u_simple_list.h"
+
+#include "r300_context.h"
+#include "r300_cs.h"
+#include "r300_emit.h"
+
+static void r300_flush(struct pipe_context* pipe,
+ unsigned flags,
+ struct pipe_fence_handle** fence)
+{
+ struct r300_context *r300 = r300_context(pipe);
+ struct r300_query *query;
+ struct r300_atom *atom;
+ struct r300_fence **rfence = (struct r300_fence**)fence;
+
+ /* We probably need to flush Draw, but we may have been called from
+ * within Draw. This feels kludgy, but it might be the best thing.
+ *
+ * Of course, the best thing is to kill Draw with fire. :3 */
+ if (r300->draw && !r300->draw->flushing) {
+ draw_flush(r300->draw);
+ }
+
+ if (r300->dirty_hw) {
+ r300_emit_query_end(r300);
+
+ if (SCREEN_DBG_ON(r300->screen, DBG_STATS)) {
+ r300->flush_counter++;
+ }
+ r300->rws->flush_cs(r300->rws);
+ r300->dirty_hw = 0;
+
+ /* New kitchen sink, baby. */
+ foreach(atom, &r300->atom_list) {
+ if (atom->state || atom->allow_null_state) {
+ atom->dirty = TRUE;
+ }
+ }
+
+ /* Unmark HWTCL state for SWTCL. */
+ if (!r300->screen->caps.has_tcl) {
+ r300->vs_state.dirty = FALSE;
+ r300->vs_constants.dirty = FALSE;
+ }
+ }
+
+ /* reset flushed query */
+ foreach(query, &r300->query_list) {
+ query->flushed = TRUE;
+ }
+
+ /* Create a new fence. */
+ if (rfence) {
+ *rfence = CALLOC_STRUCT(r300_fence);
+ pipe_reference_init(&(*rfence)->reference, 1);
+ (*rfence)->ctx = r300;
+ }
+}
+
+void r300_init_flush_functions(struct r300_context* r300)
+{
+ r300->context.flush = r300_flush;
+}
diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c
new file mode 100644
index 0000000000..e585394304
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_fs.c
@@ -0,0 +1,506 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Joakim Sindholt <opensource@zhasha.com>
+ * Copyright 2009 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_ureg.h"
+
+#include "r300_cb.h"
+#include "r300_context.h"
+#include "r300_emit.h"
+#include "r300_screen.h"
+#include "r300_fs.h"
+#include "r300_reg.h"
+#include "r300_tgsi_to_rc.h"
+
+#include "radeon_code.h"
+#include "radeon_compiler.h"
+
+/* Convert info about FS input semantics to r300_shader_semantics. */
+void r300_shader_read_fs_inputs(struct tgsi_shader_info* info,
+ struct r300_shader_semantics* fs_inputs)
+{
+ int i;
+ unsigned index;
+
+ r300_shader_semantics_reset(fs_inputs);
+
+ for (i = 0; i < info->num_inputs; i++) {
+ index = info->input_semantic_index[i];
+
+ switch (info->input_semantic_name[i]) {
+ case TGSI_SEMANTIC_COLOR:
+ assert(index < ATTR_COLOR_COUNT);
+ fs_inputs->color[index] = i;
+ break;
+
+ case TGSI_SEMANTIC_GENERIC:
+ assert(index < ATTR_GENERIC_COUNT);
+ fs_inputs->generic[index] = i;
+ break;
+
+ case TGSI_SEMANTIC_FOG:
+ assert(index == 0);
+ fs_inputs->fog = i;
+ break;
+
+ case TGSI_SEMANTIC_POSITION:
+ assert(index == 0);
+ fs_inputs->wpos = i;
+ break;
+
+ default:
+ fprintf(stderr, "r300: FP: Unknown input semantic: %i\n",
+ info->input_semantic_name[i]);
+ }
+ }
+}
+
+static void find_output_registers(struct r300_fragment_program_compiler * compiler,
+ struct r300_fragment_shader_code *shader)
+{
+ unsigned i, colorbuf_count = 0;
+
+ /* Mark the outputs as not present initially */
+ compiler->OutputColor[0] = shader->info.num_outputs;
+ compiler->OutputColor[1] = shader->info.num_outputs;
+ compiler->OutputColor[2] = shader->info.num_outputs;
+ compiler->OutputColor[3] = shader->info.num_outputs;
+ compiler->OutputDepth = shader->info.num_outputs;
+
+ /* Now see where they really are. */
+ for(i = 0; i < shader->info.num_outputs; ++i) {
+ switch(shader->info.output_semantic_name[i]) {
+ case TGSI_SEMANTIC_COLOR:
+ compiler->OutputColor[colorbuf_count] = i;
+ colorbuf_count++;
+ break;
+ case TGSI_SEMANTIC_POSITION:
+ compiler->OutputDepth = i;
+ break;
+ }
+ }
+}
+
+static void allocate_hardware_inputs(
+ struct r300_fragment_program_compiler * c,
+ void (*allocate)(void * data, unsigned input, unsigned hwreg),
+ void * mydata)
+{
+ struct r300_shader_semantics* inputs =
+ (struct r300_shader_semantics*)c->UserData;
+ int i, reg = 0;
+
+ /* Allocate input registers. */
+ for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+ if (inputs->color[i] != ATTR_UNUSED) {
+ allocate(mydata, inputs->color[i], reg++);
+ }
+ }
+ for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
+ if (inputs->generic[i] != ATTR_UNUSED) {
+ allocate(mydata, inputs->generic[i], reg++);
+ }
+ }
+ if (inputs->fog != ATTR_UNUSED) {
+ allocate(mydata, inputs->fog, reg++);
+ }
+ if (inputs->wpos != ATTR_UNUSED) {
+ allocate(mydata, inputs->wpos, reg++);
+ }
+}
+
+static void get_external_state(
+ struct r300_context* r300,
+ struct r300_fragment_program_external_state* state)
+{
+ struct r300_textures_state *texstate = r300->textures_state.state;
+ unsigned i;
+ unsigned char *swizzle;
+
+ for (i = 0; i < texstate->sampler_state_count; i++) {
+ struct r300_sampler_state* s = texstate->sampler_states[i];
+
+ if (!s) {
+ continue;
+ }
+
+ if (s->state.compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) {
+ state->unit[i].compare_mode_enabled = 1;
+
+ /* Pass depth texture swizzling to the compiler. */
+ if (texstate->sampler_views[i]) {
+ swizzle = texstate->sampler_views[i]->swizzle;
+
+ state->unit[i].depth_texture_swizzle =
+ RC_MAKE_SWIZZLE(swizzle[0], swizzle[1],
+ swizzle[2], swizzle[3]);
+ } else {
+ state->unit[i].depth_texture_swizzle = RC_SWIZZLE_XYZW;
+ }
+
+ /* Fortunately, no need to translate this. */
+ state->unit[i].texture_compare_func = s->state.compare_func;
+ }
+
+ state->unit[i].non_normalized_coords = !s->state.normalized_coords;
+
+ if (texstate->sampler_views[i]) {
+ struct r300_texture *t;
+ t = (struct r300_texture*)texstate->sampler_views[i]->base.texture;
+
+ /* XXX this should probably take into account STR, not just S. */
+ if (t->uses_pitch) {
+ switch (s->state.wrap_s) {
+ case PIPE_TEX_WRAP_REPEAT:
+ state->unit[i].wrap_mode = RC_WRAP_REPEAT;
+ state->unit[i].fake_npot = TRUE;
+ break;
+
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ state->unit[i].wrap_mode = RC_WRAP_MIRRORED_REPEAT;
+ state->unit[i].fake_npot = TRUE;
+ break;
+
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ state->unit[i].wrap_mode = RC_WRAP_MIRRORED_CLAMP;
+ state->unit[i].fake_npot = TRUE;
+ break;
+
+ default:
+ state->unit[i].wrap_mode = RC_WRAP_NONE;
+ break;
+ }
+ }
+ }
+ }
+}
+
+static void r300_translate_fragment_shader(
+ struct r300_context* r300,
+ struct r300_fragment_shader_code* shader,
+ const struct tgsi_token *tokens);
+
+static void r300_dummy_fragment_shader(
+ struct r300_context* r300,
+ struct r300_fragment_shader_code* shader)
+{
+ struct pipe_shader_state state;
+ struct ureg_program *ureg;
+ struct ureg_dst out;
+ struct ureg_src imm;
+
+ /* Make a simple fragment shader which outputs (0, 0, 0, 1) */
+ ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
+ out = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
+ imm = ureg_imm4f(ureg, 0, 0, 0, 1);
+
+ ureg_MOV(ureg, out, imm);
+ ureg_END(ureg);
+
+ state.tokens = ureg_finalize(ureg);
+
+ shader->dummy = TRUE;
+ r300_translate_fragment_shader(r300, shader, state.tokens);
+
+ ureg_destroy(ureg);
+}
+
+static void r300_emit_fs_code_to_buffer(
+ struct r300_context *r300,
+ struct r300_fragment_shader_code *shader)
+{
+ struct rX00_fragment_program_code *generic_code = &shader->code;
+ unsigned imm_count = shader->immediates_count;
+ unsigned imm_first = shader->externals_count;
+ unsigned imm_end = generic_code->constants.Count;
+ struct rc_constant *constants = generic_code->constants.Constants;
+ unsigned i;
+ CB_LOCALS;
+
+ if (r300->screen->caps.is_r500) {
+ struct r500_fragment_program_code *code = &generic_code->code.r500;
+
+ shader->cb_code_size = 17 +
+ ((code->inst_end + 1) * 6) +
+ imm_count * 7;
+
+ NEW_CB(shader->cb_code, shader->cb_code_size);
+ OUT_CB_REG(R500_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO);
+ OUT_CB_REG(R500_US_PIXSIZE, code->max_temp_idx);
+ OUT_CB_REG(R500_US_CODE_RANGE,
+ R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(code->inst_end));
+ OUT_CB_REG(R500_US_CODE_OFFSET, 0);
+ OUT_CB_REG(R500_US_CODE_ADDR,
+ R500_US_CODE_START_ADDR(0) | R500_US_CODE_END_ADDR(code->inst_end));
+
+ OUT_CB_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_INSTR);
+ OUT_CB_ONE_REG(R500_GA_US_VECTOR_DATA, (code->inst_end + 1) * 6);
+ for (i = 0; i <= code->inst_end; i++) {
+ OUT_CB(code->inst[i].inst0);
+ OUT_CB(code->inst[i].inst1);
+ OUT_CB(code->inst[i].inst2);
+ OUT_CB(code->inst[i].inst3);
+ OUT_CB(code->inst[i].inst4);
+ OUT_CB(code->inst[i].inst5);
+ }
+
+ /* Emit immediates. */
+ if (imm_count) {
+ for(i = imm_first; i < imm_end; ++i) {
+ if (constants[i].Type == RC_CONSTANT_IMMEDIATE) {
+ const float *data = constants[i].u.Immediate;
+
+ OUT_CB_REG(R500_GA_US_VECTOR_INDEX,
+ R500_GA_US_VECTOR_INDEX_TYPE_CONST |
+ (i & R500_GA_US_VECTOR_INDEX_MASK));
+ OUT_CB_ONE_REG(R500_GA_US_VECTOR_DATA, 4);
+ OUT_CB_TABLE(data, 4);
+ }
+ }
+ }
+ } else { /* r300 */
+ struct r300_fragment_program_code *code = &generic_code->code.r300;
+
+ shader->cb_code_size = 19 +
+ code->alu.length * 4 +
+ (code->tex.length ? (1 + code->tex.length) : 0) +
+ imm_count * 5;
+
+ NEW_CB(shader->cb_code, shader->cb_code_size);
+ OUT_CB_REG(R300_US_CONFIG, code->config);
+ OUT_CB_REG(R300_US_PIXSIZE, code->pixsize);
+ OUT_CB_REG(R300_US_CODE_OFFSET, code->code_offset);
+
+ OUT_CB_REG_SEQ(R300_US_CODE_ADDR_0, 4);
+ OUT_CB_TABLE(code->code_addr, 4);
+
+ OUT_CB_REG_SEQ(R300_US_ALU_RGB_INST_0, code->alu.length);
+ for (i = 0; i < code->alu.length; i++)
+ OUT_CB(code->alu.inst[i].rgb_inst);
+
+ OUT_CB_REG_SEQ(R300_US_ALU_RGB_ADDR_0, code->alu.length);
+ for (i = 0; i < code->alu.length; i++)
+ OUT_CB(code->alu.inst[i].rgb_addr);
+
+ OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_INST_0, code->alu.length);
+ for (i = 0; i < code->alu.length; i++)
+ OUT_CB(code->alu.inst[i].alpha_inst);
+
+ OUT_CB_REG_SEQ(R300_US_ALU_ALPHA_ADDR_0, code->alu.length);
+ for (i = 0; i < code->alu.length; i++)
+ OUT_CB(code->alu.inst[i].alpha_addr);
+
+ if (code->tex.length) {
+ OUT_CB_REG_SEQ(R300_US_TEX_INST_0, code->tex.length);
+ OUT_CB_TABLE(code->tex.inst, code->tex.length);
+ }
+
+ /* Emit immediates. */
+ if (imm_count) {
+ for(i = imm_first; i < imm_end; ++i) {
+ if (constants[i].Type == RC_CONSTANT_IMMEDIATE) {
+ const float *data = constants[i].u.Immediate;
+
+ OUT_CB_REG_SEQ(R300_PFS_PARAM_0_X + i * 16, 4);
+ OUT_CB(pack_float24(data[0]));
+ OUT_CB(pack_float24(data[1]));
+ OUT_CB(pack_float24(data[2]));
+ OUT_CB(pack_float24(data[3]));
+ }
+ }
+ }
+ }
+
+ OUT_CB_REG(R300_FG_DEPTH_SRC, shader->fg_depth_src);
+ OUT_CB_REG(R300_US_W_FMT, shader->us_out_w);
+ END_CB;
+}
+
+static void r300_translate_fragment_shader(
+ struct r300_context* r300,
+ struct r300_fragment_shader_code* shader,
+ const struct tgsi_token *tokens)
+{
+ struct r300_fragment_program_compiler compiler;
+ struct tgsi_to_rc ttr;
+ int wpos;
+ unsigned i;
+
+ tgsi_scan_shader(tokens, &shader->info);
+ r300_shader_read_fs_inputs(&shader->info, &shader->inputs);
+
+ wpos = shader->inputs.wpos;
+
+ /* Setup the compiler. */
+ memset(&compiler, 0, sizeof(compiler));
+ rc_init(&compiler.Base);
+ compiler.Base.Debug = DBG_ON(r300, DBG_FP);
+
+ compiler.code = &shader->code;
+ compiler.state = shader->compare_state;
+ compiler.Base.is_r500 = r300->screen->caps.is_r500;
+ compiler.Base.max_temp_regs = compiler.Base.is_r500 ? 128 : 32;
+ compiler.AllocateHwInputs = &allocate_hardware_inputs;
+ compiler.UserData = &shader->inputs;
+
+ find_output_registers(&compiler, shader);
+
+ if (compiler.Base.Debug) {
+ debug_printf("r300: Initial fragment program\n");
+ tgsi_dump(tokens, 0);
+ }
+
+ /* Translate TGSI to our internal representation */
+ ttr.compiler = &compiler.Base;
+ ttr.info = &shader->info;
+ ttr.use_half_swizzles = TRUE;
+
+ r300_tgsi_to_rc(&ttr, tokens);
+
+ /**
+ * Transform the program to support WPOS.
+ *
+ * Introduce a small fragment at the start of the program that will be
+ * the only code that directly reads the WPOS input.
+ * All other code pieces that reference that input will be rewritten
+ * to read from a newly allocated temporary. */
+ if (wpos != ATTR_UNUSED) {
+ /* Moving the input to some other reg is not really necessary. */
+ rc_transform_fragment_wpos(&compiler.Base, wpos, wpos, TRUE);
+ }
+
+ /* Invoke the compiler */
+ r3xx_compile_fragment_program(&compiler);
+
+ /* Shaders with zero instructions are invalid,
+ * use the dummy shader instead. */
+ if (shader->code.code.r500.inst_end == -1) {
+ rc_destroy(&compiler.Base);
+ r300_dummy_fragment_shader(r300, shader);
+ return;
+ }
+
+ if (compiler.Base.Error) {
+ fprintf(stderr, "r300 FP: Compiler Error:\n%sUsing a dummy shader"
+ " instead.\nIf there's an 'unknown opcode' message, please"
+ " file a bug report and attach this log.\n", compiler.Base.ErrorMsg);
+
+ if (shader->dummy) {
+ fprintf(stderr, "r300 FP: Cannot compile the dummy shader! "
+ "Giving up...\n");
+ abort();
+ }
+
+ rc_destroy(&compiler.Base);
+ r300_dummy_fragment_shader(r300, shader);
+ return;
+ }
+
+ /* Initialize numbers of constants for each type. */
+ shader->externals_count = ttr.immediate_offset;
+ shader->immediates_count = 0;
+ shader->rc_state_count = 0;
+
+ for (i = shader->externals_count; i < shader->code.constants.Count; i++) {
+ switch (shader->code.constants.Constants[i].Type) {
+ case RC_CONSTANT_IMMEDIATE:
+ ++shader->immediates_count;
+ break;
+ case RC_CONSTANT_STATE:
+ ++shader->rc_state_count;
+ break;
+ default:
+ assert(0);
+ }
+ }
+
+ /* Setup shader depth output. */
+ if (shader->code.writes_depth) {
+ shader->fg_depth_src = R300_FG_DEPTH_SRC_SHADER;
+ shader->us_out_w = R300_W_FMT_W24 | R300_W_SRC_US;
+ } else {
+ shader->fg_depth_src = R300_FG_DEPTH_SRC_SCAN;
+ shader->us_out_w = R300_W_FMT_W0 | R300_W_SRC_US;
+ }
+
+ /* And, finally... */
+ rc_destroy(&compiler.Base);
+
+ /* Build the command buffer. */
+ r300_emit_fs_code_to_buffer(r300, shader);
+}
+
+boolean r300_pick_fragment_shader(struct r300_context* r300)
+{
+ struct r300_fragment_shader* fs = r300_fs(r300);
+ struct r300_fragment_program_external_state state = {{{ 0 }}};
+ struct r300_fragment_shader_code* ptr;
+
+ get_external_state(r300, &state);
+
+ if (!fs->first) {
+ /* Build the fragment shader for the first time. */
+ fs->first = fs->shader = CALLOC_STRUCT(r300_fragment_shader_code);
+
+ memcpy(&fs->shader->compare_state, &state,
+ sizeof(struct r300_fragment_program_external_state));
+ r300_translate_fragment_shader(r300, fs->shader, fs->state.tokens);
+ return TRUE;
+
+ } else {
+ /* Check if the currently-bound shader has been compiled
+ * with the texture-compare state we need. */
+ if (memcmp(&fs->shader->compare_state, &state, sizeof(state)) != 0) {
+ /* Search for the right shader. */
+ ptr = fs->first;
+ while (ptr) {
+ if (memcmp(&ptr->compare_state, &state, sizeof(state)) == 0) {
+ if (fs->shader != ptr) {
+ fs->shader = ptr;
+ return TRUE;
+ }
+ /* The currently-bound one is OK. */
+ return FALSE;
+ }
+ ptr = ptr->next;
+ }
+
+ /* Not found, gotta compile a new one. */
+ ptr = CALLOC_STRUCT(r300_fragment_shader_code);
+ ptr->next = fs->first;
+ fs->first = fs->shader = ptr;
+
+ ptr->compare_state = state;
+ r300_translate_fragment_shader(r300, ptr, fs->state.tokens);
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
diff --git a/src/gallium/drivers/r300/r300_fs.h b/src/gallium/drivers/r300/r300_fs.h
new file mode 100644
index 0000000000..51bfa88c5e
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_fs.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Joakim Sindholt <opensource@zhasha.com>
+ * Copyright 2009 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef R300_FS_H
+#define R300_FS_H
+
+#include "pipe/p_state.h"
+#include "tgsi/tgsi_scan.h"
+#include "radeon_code.h"
+#include "r300_shader_semantics.h"
+
+struct r300_fragment_shader_code {
+ struct tgsi_shader_info info;
+ struct r300_shader_semantics inputs;
+
+ /* Whether the shader was replaced by a dummy one due to a shader
+ * compilation failure. */
+ boolean dummy;
+
+ /* Numbers of constants for each type. */
+ unsigned externals_count;
+ unsigned immediates_count;
+ unsigned rc_state_count;
+
+ /* Registers for fragment depth output setup. */
+ uint32_t fg_depth_src; /* R300_FG_DEPTH_SRC: 0x4bd8 */
+ uint32_t us_out_w; /* R300_US_W_FMT: 0x46b4 */
+
+ struct r300_fragment_program_external_state compare_state;
+ struct rX00_fragment_program_code code;
+
+ unsigned cb_code_size;
+ uint32_t *cb_code;
+
+ struct r300_fragment_shader_code* next;
+};
+
+struct r300_fragment_shader {
+ /* Parent class */
+ struct pipe_shader_state state;
+
+ /* Currently-bound fragment shader. */
+ struct r300_fragment_shader_code* shader;
+
+ /* List of the same shaders compiled with different texture-compare
+ * states. */
+ struct r300_fragment_shader_code* first;
+};
+
+void r300_shader_read_fs_inputs(struct tgsi_shader_info* info,
+ struct r300_shader_semantics* fs_inputs);
+
+/* Return TRUE if the shader was switched and should be re-emitted. */
+boolean r300_pick_fragment_shader(struct r300_context* r300);
+
+static INLINE boolean r300_fragment_shader_writes_depth(struct r300_fragment_shader *fs)
+{
+ if (!fs)
+ return FALSE;
+ return (fs->shader->code.writes_depth) ? TRUE : FALSE;
+}
+
+#endif /* R300_FS_H */
diff --git a/src/gallium/drivers/r300/r300_hyperz.c b/src/gallium/drivers/r300/r300_hyperz.c
new file mode 100644
index 0000000000..e5c7658952
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_hyperz.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Copyright 2009 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+
+#include "r300_hyperz.h"
+#include "r300_context.h"
+#include "r300_reg.h"
+#include "r300_fs.h"
+
+/*****************************************************************************/
+/* The ZTOP state */
+/*****************************************************************************/
+
+static boolean r300_dsa_writes_stencil(
+ struct pipe_stencil_state *s)
+{
+ return s->enabled && s->writemask &&
+ (s->fail_op != PIPE_STENCIL_OP_KEEP ||
+ s->zfail_op != PIPE_STENCIL_OP_KEEP ||
+ s->zpass_op != PIPE_STENCIL_OP_KEEP);
+}
+
+static boolean r300_dsa_writes_depth_stencil(
+ struct pipe_depth_stencil_alpha_state *dsa)
+{
+ /* We are interested only in the cases when a depth or stencil value
+ * can be changed. */
+
+ if (dsa->depth.enabled && dsa->depth.writemask &&
+ dsa->depth.func != PIPE_FUNC_NEVER)
+ return TRUE;
+
+ if (r300_dsa_writes_stencil(&dsa->stencil[0]) ||
+ r300_dsa_writes_stencil(&dsa->stencil[1]))
+ return TRUE;
+
+ return FALSE;
+}
+
+static boolean r300_dsa_alpha_test_enabled(
+ struct pipe_depth_stencil_alpha_state *dsa)
+{
+ /* We are interested only in the cases when alpha testing can kill
+ * a fragment. */
+
+ return dsa->alpha.enabled && dsa->alpha.func != PIPE_FUNC_ALWAYS;
+}
+
+static void r300_update_ztop(struct r300_context* r300)
+{
+ struct r300_ztop_state* ztop_state =
+ (struct r300_ztop_state*)r300->ztop_state.state;
+ uint32_t old_ztop = ztop_state->z_buffer_top;
+
+ /* This is important enough that I felt it warranted a comment.
+ *
+ * According to the docs, these are the conditions where ZTOP must be
+ * disabled:
+ * 1) Alpha testing enabled
+ * 2) Texture kill instructions in fragment shader
+ * 3) Chroma key culling enabled
+ * 4) W-buffering enabled
+ *
+ * The docs claim that for the first three cases, if no ZS writes happen,
+ * then ZTOP can be used.
+ *
+ * (3) will never apply since we do not support chroma-keyed operations.
+ * (4) will need to be re-examined (and this comment updated) if/when
+ * Hyper-Z becomes supported.
+ *
+ * Additionally, the following conditions require disabled ZTOP:
+ * 5) Depth writes in fragment shader
+ * 6) Outstanding occlusion queries
+ *
+ * This register causes stalls all the way from SC to CB when changed,
+ * but it is buffered on-chip so it does not hurt to write it if it has
+ * not changed.
+ *
+ * ~C.
+ */
+
+ /* ZS writes */
+ if (r300_dsa_writes_depth_stencil(r300->dsa_state.state) &&
+ (r300_dsa_alpha_test_enabled(r300->dsa_state.state) || /* (1) */
+ r300_fs(r300)->shader->info.uses_kill)) { /* (2) */
+ ztop_state->z_buffer_top = R300_ZTOP_DISABLE;
+ } else if (r300_fragment_shader_writes_depth(r300_fs(r300))) { /* (5) */
+ ztop_state->z_buffer_top = R300_ZTOP_DISABLE;
+ } else if (r300->query_current) { /* (6) */
+ ztop_state->z_buffer_top = R300_ZTOP_DISABLE;
+ } else {
+ ztop_state->z_buffer_top = R300_ZTOP_ENABLE;
+ }
+
+ if (ztop_state->z_buffer_top != old_ztop)
+ r300->ztop_state.dirty = TRUE;
+}
+
+void r300_update_hyperz_state(struct r300_context* r300)
+{
+ r300_update_ztop(r300);
+}
diff --git a/src/gallium/drivers/r300/r300_hyperz.h b/src/gallium/drivers/r300/r300_hyperz.h
new file mode 100644
index 0000000000..3df5053b89
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_hyperz.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2010 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef R300_HYPERZ_H
+#define R300_HYPERZ_H
+
+struct r300_context;
+
+void r300_update_hyperz_state(struct r300_context* r300);
+
+#endif
diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c
new file mode 100644
index 0000000000..10cb468dfc
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_query.c
@@ -0,0 +1,231 @@
+/*
+ * Copyright 2009 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "util/u_memory.h"
+#include "util/u_simple_list.h"
+
+#include "r300_context.h"
+#include "r300_screen.h"
+#include "r300_emit.h"
+#include "r300_winsys.h"
+
+#include <stdio.h>
+
+static struct pipe_query *r300_create_query(struct pipe_context *pipe,
+ unsigned query_type)
+{
+ struct r300_context *r300 = r300_context(pipe);
+ struct r300_screen *r300screen = r300->screen;
+ struct r300_query *q;
+
+ assert(query_type == PIPE_QUERY_OCCLUSION_COUNTER);
+
+ q = CALLOC_STRUCT(r300_query);
+ if (!q)
+ return NULL;
+
+ q->type = query_type;
+ q->domain = R300_DOMAIN_GTT;
+ q->buffer_size = 4096;
+
+ if (r300screen->caps.family == CHIP_FAMILY_RV530)
+ q->num_pipes = r300screen->caps.num_z_pipes;
+ else
+ q->num_pipes = r300screen->caps.num_frag_pipes;
+
+ insert_at_tail(&r300->query_list, q);
+
+ /* Open up the occlusion query buffer. */
+ q->buffer = r300->rws->buffer_create(r300->rws, 4096, 0, q->domain, q->buffer_size);
+
+ return (struct pipe_query*)q;
+}
+
+static void r300_destroy_query(struct pipe_context* pipe,
+ struct pipe_query* query)
+{
+ struct r300_context *r300 = r300_context(pipe);
+ struct r300_query* q = r300_query(query);
+
+ r300->rws->buffer_reference(r300->rws, &q->buffer, NULL);
+ remove_from_list(q);
+ FREE(query);
+}
+
+void r300_resume_query(struct r300_context *r300,
+ struct r300_query *query)
+{
+ r300->query_current = query;
+ r300->query_start.dirty = TRUE;
+}
+
+static void r300_begin_query(struct pipe_context* pipe,
+ struct pipe_query* query)
+{
+ struct r300_context* r300 = r300_context(pipe);
+ struct r300_query* q = r300_query(query);
+
+ if (r300->query_current != NULL) {
+ fprintf(stderr, "r300: begin_query: "
+ "Some other query has already been started.\n");
+ assert(0);
+ return;
+ }
+
+ q->num_results = 0;
+ r300_resume_query(r300, q);
+}
+
+void r300_stop_query(struct r300_context *r300)
+{
+ r300_emit_query_end(r300);
+ r300->query_current = NULL;
+}
+
+static void r300_end_query(struct pipe_context* pipe,
+ struct pipe_query* query)
+{
+ struct r300_context* r300 = r300_context(pipe);
+ struct r300_query *q = r300_query(query);
+
+ if (q != r300->query_current) {
+ fprintf(stderr, "r300: end_query: Got invalid query.\n");
+ assert(0);
+ return;
+ }
+
+ r300_stop_query(r300);
+}
+
+static boolean r300_get_query_result(struct pipe_context* pipe,
+ struct pipe_query* query,
+ boolean wait,
+ void* vresult)
+{
+ struct r300_context* r300 = r300_context(pipe);
+ struct r300_query *q = r300_query(query);
+ unsigned flags, i;
+ uint32_t temp, *map;
+ uint64_t *result = (uint64_t*)vresult;
+
+ if (!q->flushed)
+ pipe->flush(pipe, 0, NULL);
+
+ flags = PIPE_TRANSFER_READ | (!wait ? PIPE_TRANSFER_DONTBLOCK : 0);
+
+ map = r300->rws->buffer_map(r300->rws, q->buffer, flags);
+ if (!map)
+ return FALSE;
+
+ /* Sum up the results. */
+ temp = 0;
+ for (i = 0; i < q->num_results; i++) {
+ temp += *map;
+ map++;
+ }
+
+ r300->rws->buffer_unmap(r300->rws, q->buffer);
+
+ *result = temp;
+ return TRUE;
+}
+
+static void r300_render_condition(struct pipe_context *pipe,
+ struct pipe_query *query,
+ uint mode)
+{
+ struct r300_context *r300 = r300_context(pipe);
+ uint64_t result;
+ boolean wait;
+
+ if (query) {
+ wait = mode == PIPE_RENDER_COND_WAIT ||
+ mode == PIPE_RENDER_COND_BY_REGION_WAIT;
+
+ if (!r300_get_query_result(pipe, query, wait, &result)) {
+ r300->skip_rendering = FALSE;
+ }
+
+ r300->skip_rendering = result == 0;
+ } else {
+ r300->skip_rendering = FALSE;
+ }
+}
+
+/***************************************************************************
+ * Fake occlusion queries (for debugging)
+ ***************************************************************************/
+
+static unsigned r300_fake_query;
+
+static struct pipe_query *r300_fake_create_query(struct pipe_context *pipe,
+ unsigned query_type)
+{
+ return (struct pipe_query*)&r300_fake_query;
+}
+
+static void r300_fake_destroy_query(struct pipe_context* pipe,
+ struct pipe_query* query)
+{
+}
+
+static void r300_fake_begin_query(struct pipe_context* pipe,
+ struct pipe_query* query)
+{
+}
+
+static void r300_fake_end_query(struct pipe_context* pipe,
+ struct pipe_query* query)
+{
+}
+
+static boolean r300_fake_get_query_result(struct pipe_context* pipe,
+ struct pipe_query* query,
+ boolean wait, void* vresult)
+{
+ uint64_t *result = (uint64_t*)vresult;
+ *result = 1000000;
+ return TRUE;
+}
+
+static void r300_fake_render_condition(struct pipe_context *pipe,
+ struct pipe_query *query, uint mode)
+{
+}
+
+void r300_init_query_functions(struct r300_context* r300) {
+ if (DBG_ON(r300, DBG_FAKE_OCC)) {
+ r300->context.create_query = r300_fake_create_query;
+ r300->context.destroy_query = r300_fake_destroy_query;
+ r300->context.begin_query = r300_fake_begin_query;
+ r300->context.end_query = r300_fake_end_query;
+ r300->context.get_query_result = r300_fake_get_query_result;
+ r300->context.render_condition = r300_fake_render_condition;
+ } else {
+ r300->context.create_query = r300_create_query;
+ r300->context.destroy_query = r300_destroy_query;
+ r300->context.begin_query = r300_begin_query;
+ r300->context.end_query = r300_end_query;
+ r300->context.get_query_result = r300_get_query_result;
+ r300->context.render_condition = r300_render_condition;
+ }
+}
diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h
new file mode 100644
index 0000000000..c783998c78
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_reg.h
@@ -0,0 +1,3491 @@
+/**************************************************************************
+
+Copyright (C) 2004-2005 Nicolai Haehnle et al.
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the "Software"),
+to deal in the Software without restriction, including without limitation
+on the rights to use, copy, modify, merge, publish, distribute, sub
+license, and/or sell copies of the Software, and to permit persons to whom
+the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+**************************************************************************/
+
+/* *INDENT-OFF* */
+
+#ifndef _R300_REG_H
+#define _R300_REG_H
+
+#define R300_MC_INIT_MISC_LAT_TIMER 0x180
+# define R300_MC_MISC__MC_CPR_INIT_LAT_SHIFT 0
+# define R300_MC_MISC__MC_VF_INIT_LAT_SHIFT 4
+# define R300_MC_MISC__MC_DISP0R_INIT_LAT_SHIFT 8
+# define R300_MC_MISC__MC_DISP1R_INIT_LAT_SHIFT 12
+# define R300_MC_MISC__MC_FIXED_INIT_LAT_SHIFT 16
+# define R300_MC_MISC__MC_E2R_INIT_LAT_SHIFT 20
+# define R300_MC_MISC__MC_SAME_PAGE_PRIO_SHIFT 24
+# define R300_MC_MISC__MC_GLOBW_INIT_LAT_SHIFT 28
+
+
+#define R300_MC_INIT_GFX_LAT_TIMER 0x154
+# define R300_MC_MISC__MC_G3D0R_INIT_LAT_SHIFT 0
+# define R300_MC_MISC__MC_G3D1R_INIT_LAT_SHIFT 4
+# define R300_MC_MISC__MC_G3D2R_INIT_LAT_SHIFT 8
+# define R300_MC_MISC__MC_G3D3R_INIT_LAT_SHIFT 12
+# define R300_MC_MISC__MC_TX0R_INIT_LAT_SHIFT 16
+# define R300_MC_MISC__MC_TX1R_INIT_LAT_SHIFT 20
+# define R300_MC_MISC__MC_GLOBR_INIT_LAT_SHIFT 24
+# define R300_MC_MISC__MC_GLOBW_FULL_LAT_SHIFT 28
+
+/*
+ * This file contains registers and constants for the R300. They have been
+ * found mostly by examining command buffers captured using glxtest, as well
+ * as by extrapolating some known registers and constants from the R200.
+ * I am fairly certain that they are correct unless stated otherwise
+ * in comments.
+ */
+
+#define R300_SE_VPORT_XSCALE 0x1D98
+#define R300_SE_VPORT_XOFFSET 0x1D9C
+#define R300_SE_VPORT_YSCALE 0x1DA0
+#define R300_SE_VPORT_YOFFSET 0x1DA4
+#define R300_SE_VPORT_ZSCALE 0x1DA8
+#define R300_SE_VPORT_ZOFFSET 0x1DAC
+
+#define R300_VAP_PORT_IDX0 0x2040
+/*
+ * Vertex Array Processing (VAP) Control
+ */
+#define R300_VAP_CNTL 0x2080
+# define R300_PVS_NUM_SLOTS_SHIFT 0
+# define R300_PVS_NUM_CNTLRS_SHIFT 4
+# define R300_PVS_NUM_FPUS_SHIFT 8
+# define R300_VF_MAX_VTX_NUM_SHIFT 18
+# define R300_PVS_NUM_SLOTS(x) ((x) << 0)
+# define R300_PVS_NUM_CNTLRS(x) ((x) << 4)
+# define R300_PVS_NUM_FPUS(x) ((x) << 8)
+# define R300_PVS_VF_MAX_VTX_NUM(x) ((x) << 18)
+# define R300_GL_CLIP_SPACE_DEF (0 << 22)
+# define R300_DX_CLIP_SPACE_DEF (1 << 22)
+# define R500_TCL_STATE_OPTIMIZATION (1 << 23)
+
+/* This register is written directly and also starts data section
+ * in many 3d CP_PACKET3's
+ */
+#define R300_VAP_VF_CNTL 0x2084
+# define R300_VAP_VF_CNTL__PRIM_TYPE__SHIFT 0
+# define R300_VAP_VF_CNTL__PRIM_NONE (0<<0)
+# define R300_VAP_VF_CNTL__PRIM_POINTS (1<<0)
+# define R300_VAP_VF_CNTL__PRIM_LINES (2<<0)
+# define R300_VAP_VF_CNTL__PRIM_LINE_STRIP (3<<0)
+# define R300_VAP_VF_CNTL__PRIM_TRIANGLES (4<<0)
+# define R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN (5<<0)
+# define R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP (6<<0)
+# define R300_VAP_VF_CNTL__PRIM_LINE_LOOP (12<<0)
+# define R300_VAP_VF_CNTL__PRIM_QUADS (13<<0)
+# define R300_VAP_VF_CNTL__PRIM_QUAD_STRIP (14<<0)
+# define R300_VAP_VF_CNTL__PRIM_POLYGON (15<<0)
+
+# define R300_VAP_VF_CNTL__PRIM_WALK__SHIFT 4
+ /* State based - direct writes to registers trigger vertex
+ generation */
+# define R300_VAP_VF_CNTL__PRIM_WALK_STATE_BASED (0<<4)
+# define R300_VAP_VF_CNTL__PRIM_WALK_INDICES (1<<4)
+# define R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST (2<<4)
+# define R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED (3<<4)
+
+ /* I don't think I saw these three used.. */
+# define R300_VAP_VF_CNTL__COLOR_ORDER__SHIFT 6
+# define R300_VAP_VF_CNTL__TCL_OUTPUT_CTL_ENA__SHIFT 9
+# define R300_VAP_VF_CNTL__PROG_STREAM_ENA__SHIFT 10
+
+ /* index size - when not set the indices are assumed to be 16 bit */
+# define R300_VAP_VF_CNTL__INDEX_SIZE_32bit (1<<11)
+# define R500_VAP_VF_CNTL__USE_ALT_NUM_VERTS (1<<14)
+ /* number of vertices */
+# define R300_VAP_VF_CNTL__NUM_VERTICES__SHIFT 16
+
+#define R500_VAP_INDEX_OFFSET 0x208c
+
+#define R500_VAP_ALT_NUM_VERTICES 0x2088
+
+#define R300_VAP_OUTPUT_VTX_FMT_0 0x2090
+# define R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT (1<<0)
+# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT (1<<1)
+# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT (1<<2)
+# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT (1<<3)
+# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT (1<<4)
+# define R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT (1<<16)
+
+#define R300_VAP_OUTPUT_VTX_FMT_1 0x2094
+ /* each of the following is 3 bits wide, specifies number
+ of components */
+# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_0_COMP_CNT_SHIFT 0
+# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_1_COMP_CNT_SHIFT 3
+# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_2_COMP_CNT_SHIFT 6
+# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_3_COMP_CNT_SHIFT 9
+# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_4_COMP_CNT_SHIFT 12
+# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_5_COMP_CNT_SHIFT 15
+# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_6_COMP_CNT_SHIFT 18
+# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_7_COMP_CNT_SHIFT 21
+# define R300_VAP_OUTPUT_VTX_FMT_1__NOT_PRESENT 0
+# define R300_VAP_OUTPUT_VTX_FMT_1__1_COMPONENT 1
+# define R300_VAP_OUTPUT_VTX_FMT_1__2_COMPONENTS 2
+# define R300_VAP_OUTPUT_VTX_FMT_1__3_COMPONENTS 3
+# define R300_VAP_OUTPUT_VTX_FMT_1__4_COMPONENTS 4
+
+#define R300_VAP_VPORT_XSCALE 0x2098
+#define R300_VAP_VPORT_XOFFSET 0x209c
+#define R300_VAP_VPORT_YSCALE 0x20a0
+#define R300_VAP_VPORT_YOFFSET 0x20a4
+#define R300_VAP_VPORT_ZSCALE 0x20a8
+#define R300_VAP_VPORT_ZOFFSET 0x20ac
+
+#define R300_VAP_VTE_CNTL 0x20b0
+#define R300_SE_VTE_CNTL R300_VAP_VTE_CNTL
+# define R300_VPORT_X_SCALE_ENA (1 << 0)
+# define R300_VPORT_X_OFFSET_ENA (1 << 1)
+# define R300_VPORT_Y_SCALE_ENA (1 << 2)
+# define R300_VPORT_Y_OFFSET_ENA (1 << 3)
+# define R300_VPORT_Z_SCALE_ENA (1 << 4)
+# define R300_VPORT_Z_OFFSET_ENA (1 << 5)
+# define R300_VTX_XY_FMT (1 << 8)
+# define R300_VTX_Z_FMT (1 << 9)
+# define R300_VTX_W0_FMT (1 << 10)
+# define R300_SERIAL_PROC_ENA (1 << 11)
+
+#define R300_VAP_VTX_SIZE 0x20b4
+
+/* BEGIN: Vertex data assembly - lots of uncertainties */
+
+/* gap */
+
+/* Maximum Vertex Indx Clamp */
+#define R300_VAP_VF_MAX_VTX_INDX 0x2134
+/* Minimum Vertex Indx Clamp */
+#define R300_VAP_VF_MIN_VTX_INDX 0x2138
+
+/** Vertex assembler/processor control status */
+#define R300_VAP_CNTL_STATUS 0x2140
+/* No swap at all (default) */
+# define R300_VC_NO_SWAP (0 << 0)
+/* 16-bit swap: 0xAABBCCDD becomes 0xBBAADDCC */
+# define R300_VC_16BIT_SWAP (1 << 0)
+/* 32-bit swap: 0xAABBCCDD becomes 0xDDCCBBAA */
+# define R300_VC_32BIT_SWAP (2 << 0)
+/* Half-dword swap: 0xAABBCCDD becomes 0xCCDDAABB */
+# define R300_VC_HALF_DWORD_SWAP (3 << 0)
+/* The TCL engine will not be used (as it is logically or even physically removed) */
+# define R300_VAP_TCL_BYPASS (1 << 8)
+/* Read only flag if TCL engine is busy. */
+# define R300_VAP_PVS_BUSY (1 << 11)
+/* TODO: gap for MAX_MPS */
+/* Read only flag if the vertex store is busy. */
+# define R300_VAP_VS_BUSY (1 << 24)
+/* Read only flag if the reciprocal engine is busy. */
+# define R300_VAP_RCP_BUSY (1 << 25)
+/* Read only flag if the viewport transform engine is busy. */
+# define R300_VAP_VTE_BUSY (1 << 26)
+/* Read only flag if the memory interface unit is busy. */
+# define R300_VAP_MUI_BUSY (1 << 27)
+/* Read only flag if the vertex cache is busy. */
+# define R300_VAP_VC_BUSY (1 << 28)
+/* Read only flag if the vertex fetcher is busy. */
+# define R300_VAP_VF_BUSY (1 << 29)
+/* Read only flag if the register pipeline is busy. */
+# define R300_VAP_REGPIPE_BUSY (1 << 30)
+/* Read only flag if the VAP engine is busy. */
+# define R300_VAP_VAP_BUSY (1 << 31)
+
+/* gap */
+
+/* Where do we get our vertex data?
+ *
+ * Vertex data either comes either from immediate mode registers or from
+ * vertex arrays.
+ * There appears to be no mixed mode (though we can force the pitch of
+ * vertex arrays to 0, effectively reusing the same element over and over
+ * again).
+ *
+ * Immediate mode is controlled by the INPUT_CNTL registers. I am not sure
+ * if these registers influence vertex array processing.
+ *
+ * Vertex arrays are controlled via the 3D_LOAD_VBPNTR packet3.
+ *
+ * In both cases, vertex attributes are then passed through INPUT_ROUTE.
+ *
+ * Beginning with INPUT_ROUTE_0_0 is a list of WORDs that route vertex data
+ * into the vertex processor's input registers.
+ * The first word routes the first input, the second word the second, etc.
+ * The corresponding input is routed into the register with the given index.
+ * The list is ended by a word with INPUT_ROUTE_END set.
+ *
+ * Always set COMPONENTS_4 in immediate mode.
+ */
+
+#define R300_VAP_PROG_STREAM_CNTL_0 0x2150
+# define R300_DATA_TYPE_0_SHIFT 0
+# define R300_DATA_TYPE_FLOAT_1 0
+# define R300_DATA_TYPE_FLOAT_2 1
+# define R300_DATA_TYPE_FLOAT_3 2
+# define R300_DATA_TYPE_FLOAT_4 3
+# define R300_DATA_TYPE_BYTE 4
+# define R300_DATA_TYPE_D3DCOLOR 5
+# define R300_DATA_TYPE_SHORT_2 6
+# define R300_DATA_TYPE_SHORT_4 7
+# define R300_DATA_TYPE_VECTOR_3_TTT 8
+# define R300_DATA_TYPE_VECTOR_3_EET 9
+# define R300_DATA_TYPE_FLOAT_8 10
+# define R300_DATA_TYPE_FLT16_2 11
+# define R300_DATA_TYPE_FLT16_4 12
+# define R300_SKIP_DWORDS_SHIFT 4
+# define R300_DST_VEC_LOC_SHIFT 8
+# define R300_LAST_VEC (1 << 13)
+# define R300_SIGNED (1 << 14)
+# define R300_NORMALIZE (1 << 15)
+# define R300_DATA_TYPE_1_SHIFT 16
+#define R300_VAP_PROG_STREAM_CNTL_1 0x2154
+#define R300_VAP_PROG_STREAM_CNTL_2 0x2158
+#define R300_VAP_PROG_STREAM_CNTL_3 0x215C
+#define R300_VAP_PROG_STREAM_CNTL_4 0x2160
+#define R300_VAP_PROG_STREAM_CNTL_5 0x2164
+#define R300_VAP_PROG_STREAM_CNTL_6 0x2168
+#define R300_VAP_PROG_STREAM_CNTL_7 0x216C
+/* gap */
+
+/* Notes:
+ * - always set up to produce at least two attributes:
+ * if vertex program uses only position, fglrx will set normal, too
+ * - INPUT_CNTL_0_COLOR and INPUT_CNTL_COLOR bits are always equal.
+ */
+#define R300_VAP_VTX_STATE_CNTL 0x2180
+# define R300_COLOR_0_ASSEMBLY_SHIFT 0
+# define R300_SEL_COLOR 0
+# define R300_SEL_USER_COLOR_0 1
+# define R300_SEL_USER_COLOR_1 2
+# define R300_COLOR_1_ASSEMBLY_SHIFT 2
+# define R300_COLOR_2_ASSEMBLY_SHIFT 4
+# define R300_COLOR_3_ASSEMBLY_SHIFT 6
+# define R300_COLOR_4_ASSEMBLY_SHIFT 8
+# define R300_COLOR_5_ASSEMBLY_SHIFT 10
+# define R300_COLOR_6_ASSEMBLY_SHIFT 12
+# define R300_COLOR_7_ASSEMBLY_SHIFT 14
+# define R300_UPDATE_USER_COLOR_0_ENA (1 << 16)
+
+/*
+ * Each bit in this field applies to the corresponding vector in the VSM
+ * memory (i.e. Bit 0 applies to VECTOR_0 (POSITION), etc.). If the bit
+ * is set, then the corresponding 4-Dword Vector is output into the Vertex Stream.
+ */
+#define R300_VAP_VSM_VTX_ASSM 0x2184
+# define R300_INPUT_CNTL_POS 0x00000001
+# define R300_INPUT_CNTL_NORMAL 0x00000002
+# define R300_INPUT_CNTL_COLOR 0x00000004
+# define R300_INPUT_CNTL_TC0 0x00000400
+# define R300_INPUT_CNTL_TC1 0x00000800
+# define R300_INPUT_CNTL_TC2 0x00001000 /* GUESS */
+# define R300_INPUT_CNTL_TC3 0x00002000 /* GUESS */
+# define R300_INPUT_CNTL_TC4 0x00004000 /* GUESS */
+# define R300_INPUT_CNTL_TC5 0x00008000 /* GUESS */
+# define R300_INPUT_CNTL_TC6 0x00010000 /* GUESS */
+# define R300_INPUT_CNTL_TC7 0x00020000 /* GUESS */
+
+/* Programmable Stream Control Signed Normalize Control */
+#define R300_VAP_PSC_SGN_NORM_CNTL 0x21dc
+# define SGN_NORM_ZERO 0
+# define SGN_NORM_ZERO_CLAMP_MINUS_ONE 1
+# define SGN_NORM_NO_ZERO 2
+# define R300_SGN_NORM_NO_ZERO (SGN_NORM_NO_ZERO | \
+ (SGN_NORM_NO_ZERO << 2) | (SGN_NORM_NO_ZERO << 4) | \
+ (SGN_NORM_NO_ZERO << 6) | (SGN_NORM_NO_ZERO << 8) | \
+ (SGN_NORM_NO_ZERO << 10) | (SGN_NORM_NO_ZERO << 12) | \
+ (SGN_NORM_NO_ZERO << 14) | (SGN_NORM_NO_ZERO << 16) | \
+ (SGN_NORM_NO_ZERO << 18) | (SGN_NORM_NO_ZERO << 20) | \
+ (SGN_NORM_NO_ZERO << 22) | (SGN_NORM_NO_ZERO << 24) | \
+ (SGN_NORM_NO_ZERO << 26) | (SGN_NORM_NO_ZERO << 28) | \
+ (SGN_NORM_NO_ZERO << 30))
+
+/* gap */
+
+/* Words parallel to INPUT_ROUTE_0; All words that are active in INPUT_ROUTE_0
+ * are set to a swizzling bit pattern, other words are 0.
+ *
+ * In immediate mode, the pattern is always set to xyzw. In vertex array
+ * mode, the swizzling pattern is e.g. used to set zw components in texture
+ * coordinates with only tweo components.
+ */
+#define R300_VAP_PROG_STREAM_CNTL_EXT_0 0x21e0
+# define R300_SWIZZLE0_SHIFT 0
+# define R300_SWIZZLE_SELECT_X_SHIFT 0
+# define R300_SWIZZLE_SELECT_Y_SHIFT 3
+# define R300_SWIZZLE_SELECT_Z_SHIFT 6
+# define R300_SWIZZLE_SELECT_W_SHIFT 9
+
+# define R300_SWIZZLE_SELECT_X 0
+# define R300_SWIZZLE_SELECT_Y 1
+# define R300_SWIZZLE_SELECT_Z 2
+# define R300_SWIZZLE_SELECT_W 3
+# define R300_SWIZZLE_SELECT_FP_ZERO 4
+# define R300_SWIZZLE_SELECT_FP_ONE 5
+/* alternate forms for r300_emit.c */
+# define R300_INPUT_ROUTE_SELECT_X 0
+# define R300_INPUT_ROUTE_SELECT_Y 1
+# define R300_INPUT_ROUTE_SELECT_Z 2
+# define R300_INPUT_ROUTE_SELECT_W 3
+# define R300_INPUT_ROUTE_SELECT_ZERO 4
+# define R300_INPUT_ROUTE_SELECT_ONE 5
+
+# define R300_WRITE_ENA_SHIFT 12
+# define R300_WRITE_ENA_X 1
+# define R300_WRITE_ENA_Y 2
+# define R300_WRITE_ENA_Z 4
+# define R300_WRITE_ENA_W 8
+# define R300_SWIZZLE1_SHIFT 16
+
+# define R300_VAP_SWIZZLE_X001 \
+ ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | \
+ (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Y_SHIFT) | \
+ (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_SHIFT) | \
+ (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_SHIFT) | \
+ (0xf << R300_WRITE_ENA_SHIFT))
+
+# define R300_VAP_SWIZZLE_XY01 \
+ ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | \
+ (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | \
+ (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_SHIFT) | \
+ (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_SHIFT) | \
+ (0xf << R300_WRITE_ENA_SHIFT))
+
+# define R300_VAP_SWIZZLE_XYZ1 \
+ ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | \
+ (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | \
+ (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) | \
+ (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_SHIFT) | \
+ (0xf << R300_WRITE_ENA_SHIFT))
+
+# define R300_VAP_SWIZZLE_XYZW \
+ ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | \
+ (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | \
+ (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) | \
+ (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_SHIFT) | \
+ (0xf << R300_WRITE_ENA_SHIFT))
+
+#define R300_VAP_PROG_STREAM_CNTL_EXT_1 0x21e4
+#define R300_VAP_PROG_STREAM_CNTL_EXT_2 0x21e8
+#define R300_VAP_PROG_STREAM_CNTL_EXT_3 0x21ec
+#define R300_VAP_PROG_STREAM_CNTL_EXT_4 0x21f0
+#define R300_VAP_PROG_STREAM_CNTL_EXT_5 0x21f4
+#define R300_VAP_PROG_STREAM_CNTL_EXT_6 0x21f8
+#define R300_VAP_PROG_STREAM_CNTL_EXT_7 0x21fc
+
+/* END: Vertex data assembly */
+
+/* gap */
+
+/* BEGIN: Upload vertex program and data */
+
+/*
+ * The programmable vertex shader unit has a memory bank of unknown size
+ * that can be written to in 16 byte units by writing the address into
+ * UPLOAD_ADDRESS, followed by data in UPLOAD_DATA (multiples of 4 DWORDs).
+ *
+ * Pointers into the memory bank are always in multiples of 16 bytes.
+ *
+ * The memory bank is divided into areas with fixed meaning.
+ *
+ * Starting at address UPLOAD_PROGRAM: Vertex program instructions.
+ * Native limits reported by drivers from ATI suggest size 256 (i.e. 4KB),
+ * whereas the difference between known addresses suggests size 512.
+ *
+ * Starting at address UPLOAD_PARAMETERS: Vertex program parameters.
+ * Native reported limits and the VPI layout suggest size 256, whereas
+ * difference between known addresses suggests size 512.
+ *
+ * At address UPLOAD_POINTSIZE is a vector (0, 0, ps, 0), where ps is the
+ * floating point pointsize. The exact purpose of this state is uncertain,
+ * as there is also the R300_RE_POINTSIZE register.
+ *
+ * Multiple vertex programs and parameter sets can be loaded at once,
+ * which could explain the size discrepancy.
+ */
+#define R300_VAP_PVS_VECTOR_INDX_REG 0x2200
+# define R300_PVS_CODE_START 0
+# define R300_MAX_PVS_CODE_LINES 256
+# define R500_MAX_PVS_CODE_LINES 1024
+# define R300_PVS_CONST_START 512
+# define R500_PVS_CONST_START 1024
+# define R300_MAX_PVS_CONST_VECS 256
+# define R500_MAX_PVS_CONST_VECS 1024
+# define R300_PVS_UCP_START 1024
+# define R500_PVS_UCP_START 1536
+# define R300_POINT_VPORT_SCALE_OFFSET 1030
+# define R500_POINT_VPORT_SCALE_OFFSET 1542
+# define R300_POINT_GEN_TEX_OFFSET 1031
+# define R500_POINT_GEN_TEX_OFFSET 1543
+
+/*
+ * These are obsolete defines form r300_context.h, but they might give some
+ * clues when investigating the addresses further...
+ */
+#if 0
+#define VSF_DEST_PROGRAM 0x0
+#define VSF_DEST_MATRIX0 0x200
+#define VSF_DEST_MATRIX1 0x204
+#define VSF_DEST_MATRIX2 0x208
+#define VSF_DEST_VECTOR0 0x20c
+#define VSF_DEST_VECTOR1 0x20d
+#define VSF_DEST_UNKNOWN1 0x400
+#define VSF_DEST_UNKNOWN2 0x406
+#endif
+
+/* gap */
+
+#define R300_VAP_PVS_UPLOAD_DATA 0x2208
+
+/* END: Upload vertex program and data */
+
+/* gap */
+
+/* I do not know the purpose of this register. However, I do know that
+ * it is set to 221C_CLEAR for clear operations and to 221C_NORMAL
+ * for normal rendering.
+ *
+ * 2007-11-05: This register is the user clip plane control register, but there
+ * also seems to be a rendering mode control; the NORMAL/CLEAR defines.
+ *
+ * See bug #9871. http://bugs.freedesktop.org/attachment.cgi?id=10672&action=view
+ */
+#define R300_VAP_CLIP_CNTL 0x221C
+# define R300_VAP_UCP_ENABLE_0 (1 << 0)
+# define R300_VAP_UCP_ENABLE_1 (1 << 1)
+# define R300_VAP_UCP_ENABLE_2 (1 << 2)
+# define R300_VAP_UCP_ENABLE_3 (1 << 3)
+# define R300_VAP_UCP_ENABLE_4 (1 << 4)
+# define R300_VAP_UCP_ENABLE_5 (1 << 5)
+# define R300_PS_UCP_MODE_DIST_COP (0 << 14)
+# define R300_PS_UCP_MODE_RADIUS_COP (1 << 14)
+# define R300_PS_UCP_MODE_RADIUS_COP_CLIP (2 << 14)
+# define R300_PS_UCP_MODE_CLIP_AS_TRIFAN (3 << 14)
+# define R300_CLIP_DISABLE (1 << 16)
+# define R300_UCP_CULL_ONLY_ENABLE (1 << 17)
+# define R300_BOUNDARY_EDGE_FLAG_ENABLE (1 << 18)
+# define R500_COLOR2_IS_TEXTURE (1 << 20)
+# define R500_COLOR3_IS_TEXTURE (1 << 21)
+
+/* These seem to be per-pixel and per-vertex X and Y clipping planes. The first
+ * plane is per-pixel and the second plane is per-vertex.
+ *
+ * This was determined by experimentation alone but I believe it is correct.
+ *
+ * These registers are called X_QUAD0_1_FL to X_QUAD0_4_FL by glxtest.
+ */
+#define R300_VAP_GB_VERT_CLIP_ADJ 0x2220
+#define R300_VAP_GB_VERT_DISC_ADJ 0x2224
+#define R300_VAP_GB_HORZ_CLIP_ADJ 0x2228
+#define R300_VAP_GB_HORZ_DISC_ADJ 0x222c
+
+/* gap */
+
+/* Sometimes, END_OF_PKT and 0x2284=0 are the only commands sent between
+ * rendering commands and overwriting vertex program parameters.
+ * Therefore, I suspect writing zero to 0x2284 synchronizes the engine and
+ * avoids bugs caused by still running shaders reading bad data from memory.
+ */
+#define R300_VAP_PVS_STATE_FLUSH_REG 0x2284
+
+/* This register is used to define the number of core clocks to wait for a
+ * vertex to be received by the VAP input controller (while the primitive
+ * path is backed up) before forcing any accumulated vertices to be submitted
+ * to the vertex processing path.
+ */
+#define VAP_PVS_VTX_TIMEOUT_REG 0x2288
+# define R300_2288_R300 0x00750000 /* -- nh */
+# define R300_2288_RV350 0x0000FFFF /* -- Vladimir */
+
+/* gap */
+
+/* Addresses are relative to the vertex program instruction area of the
+ * memory bank. PROGRAM_END points to the last instruction of the active
+ * program
+ *
+ * The meaning of the two UNKNOWN fields is obviously not known. However,
+ * experiments so far have shown that both *must* point to an instruction
+ * inside the vertex program, otherwise the GPU locks up.
+ *
+ * fglrx usually sets CNTL_3_UNKNOWN to the end of the program and
+ * R300_PVS_CNTL_1_POS_END_SHIFT points to instruction where last write to
+ * position takes place.
+ *
+ * Most likely this is used to ignore rest of the program in cases
+ * where group of verts arent visible. For some reason this "section"
+ * is sometimes accepted other instruction that have no relationship with
+ * position calculations.
+ */
+#define R300_VAP_PVS_CODE_CNTL_0 0x22D0
+# define R300_PVS_FIRST_INST_SHIFT 0
+# define R300_PVS_XYZW_VALID_INST_SHIFT 10
+# define R300_PVS_LAST_INST_SHIFT 20
+# define R300_PVS_FIRST_INST(x) ((x) << 0)
+# define R300_PVS_XYZW_VALID_INST(x) ((x) << 10)
+# define R300_PVS_LAST_INST(x) ((x) << 20)
+/* Addresses are relative to the vertex program parameters area. */
+#define R300_VAP_PVS_CONST_CNTL 0x22D4
+# define R300_PVS_CONST_BASE_OFFSET_SHIFT 0
+# define R300_PVS_MAX_CONST_ADDR_SHIFT 16
+# define R300_PVS_MAX_CONST_ADDR(x) ((x) << 16)
+#define R300_VAP_PVS_CODE_CNTL_1 0x22D8
+# define R300_PVS_LAST_VTX_SRC_INST_SHIFT 0
+#define R300_VAP_PVS_FLOW_CNTL_OPC 0x22DC
+
+/* The entire range from 0x2300 to 0x2AC inclusive seems to be used for
+ * immediate vertices
+ */
+#define R300_VAP_VTX_COLOR_R 0x2464
+#define R300_VAP_VTX_COLOR_G 0x2468
+#define R300_VAP_VTX_COLOR_B 0x246C
+#define R300_VAP_VTX_POS_0_X_1 0x2490 /* used for glVertex2*() */
+#define R300_VAP_VTX_POS_0_Y_1 0x2494
+#define R300_VAP_VTX_COLOR_PKD 0x249C /* RGBA */
+#define R300_VAP_VTX_POS_0_X_2 0x24A0 /* used for glVertex3*() */
+#define R300_VAP_VTX_POS_0_Y_2 0x24A4
+#define R300_VAP_VTX_POS_0_Z_2 0x24A8
+/* write 0 to indicate end of packet? */
+#define R300_VAP_VTX_END_OF_PKT 0x24AC
+
+/* gap */
+
+/* These are values from r300_reg/r300_reg.h - they are known to be correct
+ * and are here so we can use one register file instead of several
+ * - Vladimir
+ */
+#define R300_GB_VAP_RASTER_VTX_FMT_0 0x4000
+# define R300_GB_VAP_RASTER_VTX_FMT_0__POS_PRESENT (1<<0)
+# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_0_PRESENT (1<<1)
+# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_1_PRESENT (1<<2)
+# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_2_PRESENT (1<<3)
+# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_3_PRESENT (1<<4)
+# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_SPACE (0xf<<5)
+# define R300_GB_VAP_RASTER_VTX_FMT_0__PT_SIZE_PRESENT (0x1<<16)
+
+#define R300_GB_VAP_RASTER_VTX_FMT_1 0x4004
+ /* each of the following is 3 bits wide, specifies number
+ of components */
+# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_0_COMP_CNT_SHIFT 0
+# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_1_COMP_CNT_SHIFT 3
+# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_2_COMP_CNT_SHIFT 6
+# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_3_COMP_CNT_SHIFT 9
+# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_4_COMP_CNT_SHIFT 12
+# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_5_COMP_CNT_SHIFT 15
+# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_6_COMP_CNT_SHIFT 18
+# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_7_COMP_CNT_SHIFT 21
+
+/* UNK30 seems to enables point to quad transformation on textures
+ * (or something closely related to that).
+ * This bit is rather fatal at the time being due to lackings at pixel
+ * shader side
+ * Specifies top of Raster pipe specific enable controls.
+ */
+#define R300_GB_ENABLE 0x4008
+# define R300_GB_POINT_STUFF_DISABLE (0 << 0)
+# define R300_GB_POINT_STUFF_ENABLE (1 << 0) /* Specifies if points will have stuffed texture coordinates. */
+# define R300_GB_LINE_STUFF_DISABLE (0 << 1)
+# define R300_GB_LINE_STUFF_ENABLE (1 << 1) /* Specifies if lines will have stuffed texture coordinates. */
+# define R300_GB_TRIANGLE_STUFF_DISABLE (0 << 2)
+# define R300_GB_TRIANGLE_STUFF_ENABLE (1 << 2) /* Specifies if triangles will have stuffed texture coordinates. */
+# define R300_GB_STENCIL_AUTO_DISABLE (0 << 4)
+# define R300_GB_STENCIL_AUTO_ENABLE (1 << 4) /* Enable stencil auto inc/dec based on triangle cw/ccw, force into dzy low bit. */
+# define R300_GB_STENCIL_AUTO_FORCE (2 << 4) /* Force 0 into dzy low bit. */
+
+ /* each of the following is 2 bits wide */
+#define R300_GB_TEX_REPLICATE 0 /* Replicate VAP source texture coordinates (S,T,[R,Q]). */
+#define R300_GB_TEX_ST 1 /* Stuff with source texture coordinates (S,T). */
+#define R300_GB_TEX_STR 2 /* Stuff with source texture coordinates (S,T,R). */
+# define R300_GB_TEX0_SOURCE_SHIFT 16
+# define R300_GB_TEX1_SOURCE_SHIFT 18
+# define R300_GB_TEX2_SOURCE_SHIFT 20
+# define R300_GB_TEX3_SOURCE_SHIFT 22
+# define R300_GB_TEX4_SOURCE_SHIFT 24
+# define R300_GB_TEX5_SOURCE_SHIFT 26
+# define R300_GB_TEX6_SOURCE_SHIFT 28
+# define R300_GB_TEX7_SOURCE_SHIFT 30
+
+/* MSPOS - positions for multisample antialiasing (?) */
+#define R300_GB_MSPOS0 0x4010
+ /* shifts - each of the fields is 4 bits */
+# define R300_GB_MSPOS0__MS_X0_SHIFT 0
+# define R300_GB_MSPOS0__MS_Y0_SHIFT 4
+# define R300_GB_MSPOS0__MS_X1_SHIFT 8
+# define R300_GB_MSPOS0__MS_Y1_SHIFT 12
+# define R300_GB_MSPOS0__MS_X2_SHIFT 16
+# define R300_GB_MSPOS0__MS_Y2_SHIFT 20
+# define R300_GB_MSPOS0__MSBD0_Y 24
+# define R300_GB_MSPOS0__MSBD0_X 28
+
+#define R300_GB_MSPOS1 0x4014
+# define R300_GB_MSPOS1__MS_X3_SHIFT 0
+# define R300_GB_MSPOS1__MS_Y3_SHIFT 4
+# define R300_GB_MSPOS1__MS_X4_SHIFT 8
+# define R300_GB_MSPOS1__MS_Y4_SHIFT 12
+# define R300_GB_MSPOS1__MS_X5_SHIFT 16
+# define R300_GB_MSPOS1__MS_Y5_SHIFT 20
+# define R300_GB_MSPOS1__MSBD1 24
+
+/* Specifies the graphics pipeline configuration for rasterization. */
+#define R300_GB_TILE_CONFIG 0x4018
+# define R300_GB_TILE_DISABLE (0 << 0)
+# define R300_GB_TILE_ENABLE (1 << 0)
+# define R300_GB_TILE_PIPE_COUNT_RV300 (0 << 1) /* RV350 (1 pipe, 1 ctx) */
+# define R300_GB_TILE_PIPE_COUNT_R300 (3 << 1) /* R300 (2 pipes, 1 ctx) */
+# define R300_GB_TILE_PIPE_COUNT_R420_3P (6 << 1) /* R420-3P (3 pipes, 1 ctx) */
+# define R300_GB_TILE_PIPE_COUNT_R420 (7 << 1) /* R420 (4 pipes, 1 ctx) */
+# define R300_GB_TILE_SIZE_8 (0 << 4)
+# define R300_GB_TILE_SIZE_16 (1 << 4)
+# define R300_GB_TILE_SIZE_32 (2 << 4)
+# define R300_GB_SUPER_SIZE_1 (0 << 6)
+# define R300_GB_SUPER_SIZE_2 (1 << 6)
+# define R300_GB_SUPER_SIZE_4 (2 << 6)
+# define R300_GB_SUPER_SIZE_8 (3 << 6)
+# define R300_GB_SUPER_SIZE_16 (4 << 6)
+# define R300_GB_SUPER_SIZE_32 (5 << 6)
+# define R300_GB_SUPER_SIZE_64 (6 << 6)
+# define R300_GB_SUPER_SIZE_128 (7 << 6)
+# define R300_GB_SUPER_X_SHIFT 9 /* 3 bits wide */
+# define R300_GB_SUPER_Y_SHIFT 12 /* 3 bits wide */
+# define R300_GB_SUPER_TILE_A (0 << 15)
+# define R300_GB_SUPER_TILE_B (1 << 15)
+# define R300_GB_SUBPIXEL_1_12 (0 << 16)
+# define R300_GB_SUBPIXEL_1_16 (1 << 16)
+# define R300_GB_TILE_CONFIG_QUADS_PER_RAS_4 (0 << 17)
+# define R300_GB_TILE_CONFIG_QUADS_PER_RAS_8 (1 << 17)
+# define R300_GB_TILE_CONFIG_QUADS_PER_RAS_16 (2 << 17)
+# define R300_GB_TILE_CONFIG_QUADS_PER_RAS_32 (3 << 17)
+# define R300_GB_TILE_CONFIG_BB_SCAN_INTERCEPT (0 << 19)
+# define R300_GB_TILE_CONFIG_BB_SCAN_BOUND_BOX (1 << 19)
+# define R300_GB_TILE_CONFIG_ALT_SCAN_EN_LR (0 << 20)
+# define R300_GB_TILE_CONFIG_ALT_SCAN_EN_LRL (1 << 20)
+# define R300_GB_TILE_CONFIG_ALT_OFFSET (0 << 21)
+# define R300_GB_TILE_CONFIG_SUBPRECISION (0 << 22)
+# define R300_GB_TILE_CONFIG_ALT_TILING_DEF (0 << 23)
+# define R300_GB_TILE_CONFIG_ALT_TILING_3_2 (1 << 23)
+# define R300_GB_TILE_CONFIG_Z_EXTENDED_24_1 (0 << 24)
+# define R300_GB_TILE_CONFIG_Z_EXTENDED_S25_1 (1 << 24)
+
+/* Specifies the sizes of the various FIFO`s in the sc/rs/us. This register must be the first one written */
+#define R300_GB_FIFO_SIZE 0x4024
+ /* each of the following is 2 bits wide */
+#define R300_GB_FIFO_SIZE_32 0
+#define R300_GB_FIFO_SIZE_64 1
+#define R300_GB_FIFO_SIZE_128 2
+#define R300_GB_FIFO_SIZE_256 3
+# define R300_SC_IFIFO_SIZE_SHIFT 0
+# define R300_SC_TZFIFO_SIZE_SHIFT 2
+# define R300_SC_BFIFO_SIZE_SHIFT 4
+
+# define R300_US_OFIFO_SIZE_SHIFT 12
+# define R300_US_WFIFO_SIZE_SHIFT 14
+ /* the following use the same constants as above, but meaning is
+ is times 2 (i.e. instead of 32 words it means 64 */
+# define R300_RS_TFIFO_SIZE_SHIFT 6
+# define R300_RS_CFIFO_SIZE_SHIFT 8
+# define R300_US_RAM_SIZE_SHIFT 10
+ /* watermarks, 3 bits wide */
+# define R300_RS_HIGHWATER_COL_SHIFT 16
+# define R300_RS_HIGHWATER_TEX_SHIFT 19
+# define R300_OFIFO_HIGHWATER_SHIFT 22 /* two bits only */
+# define R300_CUBE_FIFO_HIGHWATER_COL_SHIFT 24
+
+#define R300_GB_Z_PEQ_CONFIG 0x4028
+# define R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_4_4 (0 << 0)
+# define R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8 (1 << 0)
+
+/* Specifies various polygon specific selects (fog, depth, perspective). */
+#define R300_GB_SELECT 0x401c
+# define R300_GB_FOG_SELECT_C0A (0 << 0)
+# define R300_GB_FOG_SELECT_C1A (1 << 0)
+# define R300_GB_FOG_SELECT_C2A (2 << 0)
+# define R300_GB_FOG_SELECT_C3A (3 << 0)
+# define R300_GB_FOG_SELECT_1_1_W (4 << 0)
+# define R300_GB_FOG_SELECT_Z (5 << 0)
+# define R300_GB_DEPTH_SELECT_Z (0 << 3)
+# define R300_GB_DEPTH_SELECT_1_1_W (1 << 3)
+# define R300_GB_W_SELECT_1_W (0 << 4)
+# define R300_GB_W_SELECT_1 (1 << 4)
+# define R300_GB_FOG_STUFF_DISABLE (0 << 5)
+# define R300_GB_FOG_STUFF_ENABLE (1 << 5)
+# define R300_GB_FOG_STUFF_TEX_SHIFT 6
+# define R300_GB_FOG_STUFF_TEX_MASK 0x000003c0
+# define R300_GB_FOG_STUFF_COMP_SHIFT 10
+# define R300_GB_FOG_STUFF_COMP_MASK 0x00000c00
+
+/* Specifies the graphics pipeline configuration for antialiasing. */
+#define R300_GB_AA_CONFIG 0x4020
+# define R300_GB_AA_CONFIG_AA_DISABLE (0 << 0)
+# define R300_GB_AA_CONFIG_AA_ENABLE (1 << 0)
+# define R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2 (0 << 1)
+# define R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3 (1 << 1)
+# define R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4 (2 << 1)
+# define R300_GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6 (3 << 1)
+
+/* Selects which of 4 pipes are active. */
+#define R300_GB_PIPE_SELECT 0x402c
+# define R300_GB_PIPE_SELECT_PIPE0_ID_SHIFT 0
+# define R300_GB_PIPE_SELECT_PIPE1_ID_SHIFT 2
+# define R300_GB_PIPE_SELECT_PIPE2_ID_SHIFT 4
+# define R300_GB_PIPE_SELECT_PIPE3_ID_SHIFT 6
+# define R300_GB_PIPE_SELECT_PIPE_MASK_SHIFT 8
+# define R300_GB_PIPE_SELECT_MAX_PIPE 12
+# define R300_GB_PIPE_SELECT_BAD_PIPES 14
+# define R300_GB_PIPE_SELECT_CONFIG_PIPES 18
+
+
+/* Specifies the sizes of the various FIFO`s in the sc/rs. */
+#define R300_GB_FIFO_SIZE1 0x4070
+/* High water mark for SC input fifo */
+# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_SHIFT 0
+# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_MASK 0x0000003f
+/* High water mark for SC input fifo (B) */
+# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_SHIFT 6
+# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_MASK 0x00000fc0
+/* High water mark for RS colors' fifo */
+# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_COL_SHIFT 12
+# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_COL_MASK 0x0003f000
+/* High water mark for RS textures' fifo */
+# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_TEX_SHIFT 18
+# define R300_GB_FIFO_SIZE1_SC_HIGHWATER_TEX_MASK 0x00fc0000
+
+/* This table specifies the source location and format for up to 16 texture
+ * addresses (i[0]:i[15]) and four colors (c[0]:c[3])
+ */
+#define R500_RS_IP_0 0x4074
+#define R500_RS_IP_1 0x4078
+#define R500_RS_IP_2 0x407C
+#define R500_RS_IP_3 0x4080
+#define R500_RS_IP_4 0x4084
+#define R500_RS_IP_5 0x4088
+#define R500_RS_IP_6 0x408C
+#define R500_RS_IP_7 0x4090
+#define R500_RS_IP_8 0x4094
+#define R500_RS_IP_9 0x4098
+#define R500_RS_IP_10 0x409C
+#define R500_RS_IP_11 0x40A0
+#define R500_RS_IP_12 0x40A4
+#define R500_RS_IP_13 0x40A8
+#define R500_RS_IP_14 0x40AC
+#define R500_RS_IP_15 0x40B0
+#define R500_RS_IP_PTR_K0 62
+#define R500_RS_IP_PTR_K1 63
+#define R500_RS_IP_TEX_PTR_S_SHIFT 0
+#define R500_RS_IP_TEX_PTR_T_SHIFT 6
+#define R500_RS_IP_TEX_PTR_R_SHIFT 12
+#define R500_RS_IP_TEX_PTR_Q_SHIFT 18
+#define R500_RS_IP_COL_PTR_SHIFT 24
+#define R500_RS_IP_COL_FMT_SHIFT 27
+# define R500_RS_SEL_S(x) ((x) << 0)
+# define R500_RS_SEL_T(x) ((x) << 6)
+# define R500_RS_SEL_R(x) ((x) << 12)
+# define R500_RS_SEL_Q(x) ((x) << 18)
+# define R500_RS_COL_PTR(x) ((x) << 24)
+# define R500_RS_COL_FMT(x) ((x) << 27)
+/* gap */
+#define R500_RS_IP_OFFSET_DIS (0 << 31)
+#define R500_RS_IP_OFFSET_EN (1 << 31)
+
+/* gap */
+
+/* Zero to flush caches. */
+#define R300_TX_INVALTAGS 0x4100
+#define R300_TX_FLUSH 0x0
+
+/* The upper enable bits are guessed, based on fglrx reported limits. */
+#define R300_TX_ENABLE 0x4104
+# define R300_TX_ENABLE_0 (1 << 0)
+# define R300_TX_ENABLE_1 (1 << 1)
+# define R300_TX_ENABLE_2 (1 << 2)
+# define R300_TX_ENABLE_3 (1 << 3)
+# define R300_TX_ENABLE_4 (1 << 4)
+# define R300_TX_ENABLE_5 (1 << 5)
+# define R300_TX_ENABLE_6 (1 << 6)
+# define R300_TX_ENABLE_7 (1 << 7)
+# define R300_TX_ENABLE_8 (1 << 8)
+# define R300_TX_ENABLE_9 (1 << 9)
+# define R300_TX_ENABLE_10 (1 << 10)
+# define R300_TX_ENABLE_11 (1 << 11)
+# define R300_TX_ENABLE_12 (1 << 12)
+# define R300_TX_ENABLE_13 (1 << 13)
+# define R300_TX_ENABLE_14 (1 << 14)
+# define R300_TX_ENABLE_15 (1 << 15)
+
+#define R500_TX_FILTER_4 0x4110
+# define R500_TX_WEIGHT_1_SHIFT (0)
+# define R500_TX_WEIGHT_0_SHIFT (11)
+# define R500_TX_WEIGHT_PAIR (1<<22)
+# define R500_TX_PHASE_SHIFT (23)
+# define R500_TX_DIRECTION_HORIZONTAL (0<<27)
+# define R500_TX_DIRECTION_VERITCAL (1<<27)
+
+/* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) */
+#define R300_GA_POINT_S0 0x4200
+
+/* T Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) */
+#define R300_GA_POINT_T0 0x4204
+
+/* S Texture Coordinate of Vertex 2 for Point texture stuffing (URC) */
+#define R300_GA_POINT_S1 0x4208
+
+/* T Texture Coordinate of Vertex 2 for Point texture stuffing (URC) */
+#define R300_GA_POINT_T1 0x420c
+
+/* Specifies amount to shift integer position of vertex (screen space) before
+ * converting to float for triangle stipple.
+ */
+#define R300_GA_TRIANGLE_STIPPLE 0x4214
+# define R300_GA_TRIANGLE_STIPPLE_X_SHIFT_SHIFT 0
+# define R300_GA_TRIANGLE_STIPPLE_X_SHIFT_MASK 0x0000000f
+# define R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_SHIFT 16
+# define R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_MASK 0x000f0000
+
+/* The pointsize is given in multiples of 6. The pointsize can be enormous:
+ * Clear() renders a single point that fills the entire framebuffer.
+ * 1/2 Height of point; fixed (16.0), subpixel format (1/12 or 1/16, even if in
+ * 8b precision).
+ */
+#define R300_GA_POINT_SIZE 0x421C
+# define R300_POINTSIZE_Y_SHIFT 0
+# define R300_POINTSIZE_Y_MASK 0x0000ffff
+# define R300_POINTSIZE_X_SHIFT 16
+# define R300_POINTSIZE_X_MASK 0xffff0000
+# define R300_POINTSIZE_MAX (R300_POINTSIZE_Y_MASK / 6)
+
+/* Red fill color */
+#define R500_GA_FILL_R 0x4220
+
+/* Green fill color */
+#define R500_GA_FILL_G 0x4224
+
+/* Blue fill color */
+#define R500_GA_FILL_B 0x4228
+
+/* Alpha fill color */
+#define R500_GA_FILL_A 0x422c
+
+
+/* Specifies maximum and minimum point & sprite sizes for per vertex size
+ * specification. The lower part (15:0) is MIN and (31:16) is max.
+ */
+#define R300_GA_POINT_MINMAX 0x4230
+# define R300_GA_POINT_MINMAX_MIN_SHIFT 0
+# define R300_GA_POINT_MINMAX_MIN_MASK (0xFFFF << 0)
+# define R300_GA_POINT_MINMAX_MAX_SHIFT 16
+# define R300_GA_POINT_MINMAX_MAX_MASK (0xFFFF << 16)
+
+/* 1/2 width of line, in subpixels (1/12 or 1/16 only, even in 8b
+ * subprecision); (16.0) fixed format.
+ *
+ * The line width is given in multiples of 6.
+ * In default mode lines are classified as vertical lines.
+ * HO: horizontal
+ * VE: vertical or horizontal
+ * HO & VE: no classification
+ */
+#define R300_GA_LINE_CNTL 0x4234
+# define R300_GA_LINE_CNTL_WIDTH_SHIFT 0
+# define R300_GA_LINE_CNTL_WIDTH_MASK 0x0000ffff
+# define R300_GA_LINE_CNTL_END_TYPE_HOR (0 << 16)
+# define R300_GA_LINE_CNTL_END_TYPE_VER (1 << 16)
+# define R300_GA_LINE_CNTL_END_TYPE_SQR (2 << 16) /* horizontal or vertical depending upon slope */
+# define R300_GA_LINE_CNTL_END_TYPE_COMP (3 << 16) /* Computed (perpendicular to slope) */
+# define R500_GA_LINE_CNTL_SORT_NO (0 << 18)
+# define R500_GA_LINE_CNTL_SORT_MINX_MINY (1 << 18)
+/** TODO: looks wrong */
+# define R300_LINESIZE_MAX (R300_GA_LINE_CNTL_WIDTH_MASK / 6)
+/** TODO: looks wrong */
+# define R300_LINE_CNT_HO (1 << 16)
+/** TODO: looks wrong */
+# define R300_LINE_CNT_VE (1 << 17)
+
+/* Line Stipple configuration information. */
+#define R300_GA_LINE_STIPPLE_CONFIG 0x4238
+# define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_NO (0 << 0)
+# define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_LINE (1 << 0)
+# define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_PACKET (2 << 0)
+# define R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_SHIFT 2
+# define R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_MASK 0xfffffffc
+
+/* Used to load US instructions and constants */
+#define R500_GA_US_VECTOR_INDEX 0x4250
+# define R500_GA_US_VECTOR_INDEX_SHIFT 0
+# define R500_GA_US_VECTOR_INDEX_MASK 0x000000ff
+# define R500_GA_US_VECTOR_INDEX_TYPE_INSTR (0 << 16)
+# define R500_GA_US_VECTOR_INDEX_TYPE_CONST (1 << 16)
+# define R500_GA_US_VECTOR_INDEX_CLAMP_NO (0 << 17)
+# define R500_GA_US_VECTOR_INDEX_CLAMP_CONST (1 << 17)
+
+/* Data register for loading US instructions and constants */
+#define R500_GA_US_VECTOR_DATA 0x4254
+
+/* Specifies color properties and mappings of textures. */
+#define R500_GA_COLOR_CONTROL_PS3 0x4258
+# define R500_TEX0_SHADING_PS3_SOLID (0 << 0)
+# define R500_TEX0_SHADING_PS3_FLAT (1 << 0)
+# define R500_TEX0_SHADING_PS3_GOURAUD (2 << 0)
+# define R500_TEX1_SHADING_PS3_SOLID (0 << 2)
+# define R500_TEX1_SHADING_PS3_FLAT (1 << 2)
+# define R500_TEX1_SHADING_PS3_GOURAUD (2 << 2)
+# define R500_TEX2_SHADING_PS3_SOLID (0 << 4)
+# define R500_TEX2_SHADING_PS3_FLAT (1 << 4)
+# define R500_TEX2_SHADING_PS3_GOURAUD (2 << 4)
+# define R500_TEX3_SHADING_PS3_SOLID (0 << 6)
+# define R500_TEX3_SHADING_PS3_FLAT (1 << 6)
+# define R500_TEX3_SHADING_PS3_GOURAUD (2 << 6)
+# define R500_TEX4_SHADING_PS3_SOLID (0 << 8)
+# define R500_TEX4_SHADING_PS3_FLAT (1 << 8)
+# define R500_TEX4_SHADING_PS3_GOURAUD (2 << 8)
+# define R500_TEX5_SHADING_PS3_SOLID (0 << 10)
+# define R500_TEX5_SHADING_PS3_FLAT (1 << 10)
+# define R500_TEX5_SHADING_PS3_GOURAUD (2 << 10)
+# define R500_TEX6_SHADING_PS3_SOLID (0 << 12)
+# define R500_TEX6_SHADING_PS3_FLAT (1 << 12)
+# define R500_TEX6_SHADING_PS3_GOURAUD (2 << 12)
+# define R500_TEX7_SHADING_PS3_SOLID (0 << 14)
+# define R500_TEX7_SHADING_PS3_FLAT (1 << 14)
+# define R500_TEX7_SHADING_PS3_GOURAUD (2 << 14)
+# define R500_TEX8_SHADING_PS3_SOLID (0 << 16)
+# define R500_TEX8_SHADING_PS3_FLAT (1 << 16)
+# define R500_TEX8_SHADING_PS3_GOURAUD (2 << 16)
+# define R500_TEX9_SHADING_PS3_SOLID (0 << 18)
+# define R500_TEX9_SHADING_PS3_FLAT (1 << 18)
+# define R500_TEX9_SHADING_PS3_GOURAUD (2 << 18)
+# define R500_TEX10_SHADING_PS3_SOLID (0 << 20)
+# define R500_TEX10_SHADING_PS3_FLAT (1 << 20)
+# define R500_TEX10_SHADING_PS3_GOURAUD (2 << 20)
+# define R500_COLOR0_TEX_OVERRIDE_NO (0 << 22)
+# define R500_COLOR0_TEX_OVERRIDE_TEX_0 (1 << 22)
+# define R500_COLOR0_TEX_OVERRIDE_TEX_1 (2 << 22)
+# define R500_COLOR0_TEX_OVERRIDE_TEX_2 (3 << 22)
+# define R500_COLOR0_TEX_OVERRIDE_TEX_3 (4 << 22)
+# define R500_COLOR0_TEX_OVERRIDE_TEX_4 (5 << 22)
+# define R500_COLOR0_TEX_OVERRIDE_TEX_5 (6 << 22)
+# define R500_COLOR0_TEX_OVERRIDE_TEX_6 (7 << 22)
+# define R500_COLOR0_TEX_OVERRIDE_TEX_7 (8 << 22)
+# define R500_COLOR0_TEX_OVERRIDE_TEX_8_C2 (9 << 22)
+# define R500_COLOR0_TEX_OVERRIDE_TEX_9_C3 (10 << 22)
+# define R500_COLOR1_TEX_OVERRIDE_NO (0 << 26)
+# define R500_COLOR1_TEX_OVERRIDE_TEX_0 (1 << 26)
+# define R500_COLOR1_TEX_OVERRIDE_TEX_1 (2 << 26)
+# define R500_COLOR1_TEX_OVERRIDE_TEX_2 (3 << 26)
+# define R500_COLOR1_TEX_OVERRIDE_TEX_3 (4 << 26)
+# define R500_COLOR1_TEX_OVERRIDE_TEX_4 (5 << 26)
+# define R500_COLOR1_TEX_OVERRIDE_TEX_5 (6 << 26)
+# define R500_COLOR1_TEX_OVERRIDE_TEX_6 (7 << 26)
+# define R500_COLOR1_TEX_OVERRIDE_TEX_7 (8 << 26)
+# define R500_COLOR1_TEX_OVERRIDE_TEX_8_C2 (9 << 26)
+# define R500_COLOR1_TEX_OVERRIDE_TEX_9_C3 (10 << 26)
+
+/* Returns idle status of various G3D block, captured when GA_IDLE written or
+ * when hard or soft reset asserted.
+ */
+#define R500_GA_IDLE 0x425c
+# define R500_GA_IDLE_PIPE3_Z_IDLE (0 << 0)
+# define R500_GA_IDLE_PIPE2_Z_IDLE (0 << 1)
+# define R500_GA_IDLE_PIPE3_CD_IDLE (0 << 2)
+# define R500_GA_IDLE_PIPE2_CD_IDLE (0 << 3)
+# define R500_GA_IDLE_PIPE3_FG_IDLE (0 << 4)
+# define R500_GA_IDLE_PIPE2_FG_IDLE (0 << 5)
+# define R500_GA_IDLE_PIPE3_US_IDLE (0 << 6)
+# define R500_GA_IDLE_PIPE2_US_IDLE (0 << 7)
+# define R500_GA_IDLE_PIPE3_SC_IDLE (0 << 8)
+# define R500_GA_IDLE_PIPE2_SC_IDLE (0 << 9)
+# define R500_GA_IDLE_PIPE3_RS_IDLE (0 << 10)
+# define R500_GA_IDLE_PIPE2_RS_IDLE (0 << 11)
+# define R500_GA_IDLE_PIPE1_Z_IDLE (0 << 12)
+# define R500_GA_IDLE_PIPE0_Z_IDLE (0 << 13)
+# define R500_GA_IDLE_PIPE1_CD_IDLE (0 << 14)
+# define R500_GA_IDLE_PIPE0_CD_IDLE (0 << 15)
+# define R500_GA_IDLE_PIPE1_FG_IDLE (0 << 16)
+# define R500_GA_IDLE_PIPE0_FG_IDLE (0 << 17)
+# define R500_GA_IDLE_PIPE1_US_IDLE (0 << 18)
+# define R500_GA_IDLE_PIPE0_US_IDLE (0 << 19)
+# define R500_GA_IDLE_PIPE1_SC_IDLE (0 << 20)
+# define R500_GA_IDLE_PIPE0_SC_IDLE (0 << 21)
+# define R500_GA_IDLE_PIPE1_RS_IDLE (0 << 22)
+# define R500_GA_IDLE_PIPE0_RS_IDLE (0 << 23)
+# define R500_GA_IDLE_SU_IDLE (0 << 24)
+# define R500_GA_IDLE_GA_IDLE (0 << 25)
+# define R500_GA_IDLE_GA_UNIT2_IDLE (0 << 26)
+
+/* Current value of stipple accumulator. */
+#define R300_GA_LINE_STIPPLE_VALUE 0x4260
+
+/* S Texture Coordinate Value for Vertex 0 of Line (stuff textures -- i.e. AA) */
+#define R300_GA_LINE_S0 0x4264
+/* S Texture Coordinate Value for Vertex 1 of Lines (V2 of parallelogram -- stuff textures -- i.e. AA) */
+#define R300_GA_LINE_S1 0x4268
+
+/* GA Input fifo high water marks */
+#define R500_GA_FIFO_CNTL 0x4270
+# define R500_GA_FIFO_CNTL_VERTEX_FIFO_MASK 0x00000007
+# define R500_GA_FIFO_CNTL_VERTEX_FIFO_SHIFT 0
+# define R500_GA_FIFO_CNTL_VERTEX_INDEX_MASK 0x00000038
+# define R500_GA_FIFO_CNTL_VERTEX_INDEX_SHIFT 3
+# define R500_GA_FIFO_CNTL_VERTEX_REG_MASK 0x00003fc0
+# define R500_GA_FIFO_CNTL_VERTEX_REG_SHIFT 6
+
+/* GA enhance/tweaks */
+#define R300_GA_ENHANCE 0x4274
+# define R300_GA_ENHANCE_DEADLOCK_CNTL_NO_EFFECT (0 << 0)
+# define R300_GA_ENHANCE_DEADLOCK_CNTL_PREVENT_TCL (1 << 0) /* Prevents TCL interface from deadlocking on GA side. */
+# define R300_GA_ENHANCE_FASTSYNC_CNTL_NO_EFFECT (0 << 1)
+# define R300_GA_ENHANCE_FASTSYNC_CNTL_ENABLE (1 << 1) /* Enables high-performance register/primitive switching. */
+# define R500_GA_ENHANCE_REG_READWRITE_NO_EFFECT (0 << 2) /* R520+ only */
+# define R500_GA_ENHANCE_REG_READWRITE_ENABLE (1 << 2) /* R520+ only, Enables GA support of simultaneous register reads and writes. */
+# define R500_GA_ENHANCE_REG_NOSTALL_NO_EFFECT (0 << 3)
+# define R500_GA_ENHANCE_REG_NOSTALL_ENABLE (1 << 3) /* Enables GA support of no-stall reads for register read back. */
+
+#define R300_GA_COLOR_CONTROL 0x4278
+# define R300_GA_COLOR_CONTROL_RGB0_SHADING_SOLID (0 << 0)
+# define R300_GA_COLOR_CONTROL_RGB0_SHADING_FLAT (1 << 0)
+# define R300_GA_COLOR_CONTROL_RGB0_SHADING_GOURAUD (2 << 0)
+# define R300_GA_COLOR_CONTROL_ALPHA0_SHADING_SOLID (0 << 2)
+# define R300_GA_COLOR_CONTROL_ALPHA0_SHADING_FLAT (1 << 2)
+# define R300_GA_COLOR_CONTROL_ALPHA0_SHADING_GOURAUD (2 << 2)
+# define R300_GA_COLOR_CONTROL_RGB1_SHADING_SOLID (0 << 4)
+# define R300_GA_COLOR_CONTROL_RGB1_SHADING_FLAT (1 << 4)
+# define R300_GA_COLOR_CONTROL_RGB1_SHADING_GOURAUD (2 << 4)
+# define R300_GA_COLOR_CONTROL_ALPHA1_SHADING_SOLID (0 << 6)
+# define R300_GA_COLOR_CONTROL_ALPHA1_SHADING_FLAT (1 << 6)
+# define R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD (2 << 6)
+# define R300_GA_COLOR_CONTROL_RGB2_SHADING_SOLID (0 << 8)
+# define R300_GA_COLOR_CONTROL_RGB2_SHADING_FLAT (1 << 8)
+# define R300_GA_COLOR_CONTROL_RGB2_SHADING_GOURAUD (2 << 8)
+# define R300_GA_COLOR_CONTROL_ALPHA2_SHADING_SOLID (0 << 10)
+# define R300_GA_COLOR_CONTROL_ALPHA2_SHADING_FLAT (1 << 10)
+# define R300_GA_COLOR_CONTROL_ALPHA2_SHADING_GOURAUD (2 << 10)
+# define R300_GA_COLOR_CONTROL_RGB3_SHADING_SOLID (0 << 12)
+# define R300_GA_COLOR_CONTROL_RGB3_SHADING_FLAT (1 << 12)
+# define R300_GA_COLOR_CONTROL_RGB3_SHADING_GOURAUD (2 << 12)
+# define R300_GA_COLOR_CONTROL_ALPHA3_SHADING_SOLID (0 << 14)
+# define R300_GA_COLOR_CONTROL_ALPHA3_SHADING_FLAT (1 << 14)
+# define R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD (2 << 14)
+# define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_FIRST (0 << 16)
+# define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_SECOND (1 << 16)
+# define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_THIRD (2 << 16)
+# define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST (3 << 16)
+
+# define R300_SHADE_MODEL_FLAT ( \
+ R300_GA_COLOR_CONTROL_RGB0_SHADING_FLAT | \
+ R300_GA_COLOR_CONTROL_ALPHA0_SHADING_FLAT | \
+ R300_GA_COLOR_CONTROL_RGB1_SHADING_FLAT | \
+ R300_GA_COLOR_CONTROL_ALPHA1_SHADING_FLAT | \
+ R300_GA_COLOR_CONTROL_RGB2_SHADING_FLAT | \
+ R300_GA_COLOR_CONTROL_ALPHA2_SHADING_FLAT | \
+ R300_GA_COLOR_CONTROL_RGB3_SHADING_FLAT | \
+ R300_GA_COLOR_CONTROL_ALPHA3_SHADING_FLAT )
+
+# define R300_SHADE_MODEL_SMOOTH ( \
+ R300_GA_COLOR_CONTROL_RGB0_SHADING_GOURAUD | \
+ R300_GA_COLOR_CONTROL_ALPHA0_SHADING_GOURAUD | \
+ R300_GA_COLOR_CONTROL_RGB1_SHADING_GOURAUD | \
+ R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD | \
+ R300_GA_COLOR_CONTROL_RGB2_SHADING_GOURAUD | \
+ R300_GA_COLOR_CONTROL_ALPHA2_SHADING_GOURAUD | \
+ R300_GA_COLOR_CONTROL_RGB3_SHADING_GOURAUD | \
+ R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD )
+
+/* Specifies red & green components of fill color -- S312 format -- Backwards comp. */
+#define R300_GA_SOLID_RG 0x427c
+# define GA_SOLID_RG_COLOR_GREEN_SHIFT 0
+# define GA_SOLID_RG_COLOR_GREEN_MASK 0x0000ffff
+# define GA_SOLID_RG_COLOR_RED_SHIFT 16
+# define GA_SOLID_RG_COLOR_RED_MASK 0xffff0000
+/* Specifies blue & alpha components of fill color -- S312 format -- Backwards comp. */
+#define R300_GA_SOLID_BA 0x4280
+# define GA_SOLID_BA_COLOR_ALPHA_SHIFT 0
+# define GA_SOLID_BA_COLOR_ALPHA_MASK 0x0000ffff
+# define GA_SOLID_BA_COLOR_BLUE_SHIFT 16
+# define GA_SOLID_BA_COLOR_BLUE_MASK 0xffff0000
+
+/* Polygon Mode
+ * Dangerous
+ */
+#define R300_GA_POLY_MODE 0x4288
+# define R300_GA_POLY_MODE_DISABLE (0 << 0)
+# define R300_GA_POLY_MODE_DUAL (1 << 0) /* send 2 sets of 3 polys with specified poly type */
+/* reserved */
+# define R300_GA_POLY_MODE_FRONT_PTYPE_POINT (0 << 4)
+# define R300_GA_POLY_MODE_FRONT_PTYPE_LINE (1 << 4)
+# define R300_GA_POLY_MODE_FRONT_PTYPE_TRI (2 << 4)
+/* reserved */
+# define R300_GA_POLY_MODE_BACK_PTYPE_POINT (0 << 7)
+# define R300_GA_POLY_MODE_BACK_PTYPE_LINE (1 << 7)
+# define R300_GA_POLY_MODE_BACK_PTYPE_TRI (2 << 7)
+/* reserved */
+
+/* Specifies the rouding mode for geometry & color SPFP to FP conversions. */
+#define R300_GA_ROUND_MODE 0x428c
+# define R300_GA_ROUND_MODE_GEOMETRY_ROUND_TRUNC (0 << 0)
+# define R300_GA_ROUND_MODE_GEOMETRY_ROUND_NEAREST (1 << 0)
+# define R300_GA_ROUND_MODE_COLOR_ROUND_TRUNC (0 << 2)
+# define R300_GA_ROUND_MODE_COLOR_ROUND_NEAREST (1 << 2)
+# define R300_GA_ROUND_MODE_RGB_CLAMP_RGB (0 << 4)
+# define R300_GA_ROUND_MODE_RGB_CLAMP_FP20 (1 << 4)
+# define R300_GA_ROUND_MODE_ALPHA_CLAMP_RGB (0 << 5)
+# define R300_GA_ROUND_MODE_ALPHA_CLAMP_FP20 (1 << 5)
+# define R500_GA_ROUND_MODE_GEOMETRY_MASK_SHIFT 6
+# define R500_GA_ROUND_MODE_GEOMETRY_MASK_MASK 0x000003c0
+
+/* Specifies x & y offsets for vertex data after conversion to FP.
+ * Offsets are in S15 format (subpixels -- 1/12 or 1/16, even in 8b
+ * subprecision).
+ */
+#define R300_GA_OFFSET 0x4290
+# define R300_GA_OFFSET_X_OFFSET_SHIFT 0
+# define R300_GA_OFFSET_X_OFFSET_MASK 0x0000ffff
+# define R300_GA_OFFSET_Y_OFFSET_SHIFT 16
+# define R300_GA_OFFSET_Y_OFFSET_MASK 0xffff0000
+
+/* Specifies the scale to apply to fog. */
+#define R300_GA_FOG_SCALE 0x4294
+/* Specifies the offset to apply to fog. */
+#define R300_GA_FOG_OFFSET 0x4298
+/* Specifies number of cycles to assert reset, and also causes RB3D soft reset to assert. */
+#define R300_GA_SOFT_RESET 0x429c
+
+/* Not sure why there are duplicate of factor and constant values.
+ * My best guess so far is that there are seperate zbiases for test and write.
+ * Ordering might be wrong.
+ * Some of the tests indicate that fgl has a fallback implementation of zbias
+ * via pixel shaders.
+ */
+#define R300_SU_TEX_WRAP 0x42A0
+#define R300_SU_POLY_OFFSET_FRONT_SCALE 0x42A4
+#define R300_SU_POLY_OFFSET_FRONT_OFFSET 0x42A8
+#define R300_SU_POLY_OFFSET_BACK_SCALE 0x42AC
+#define R300_SU_POLY_OFFSET_BACK_OFFSET 0x42B0
+
+/* This register needs to be set to (1<<1) for RV350 to correctly
+ * perform depth test (see --vb-triangles in r300_demo)
+ * Don't know about other chips. - Vladimir
+ * This is set to 3 when GL_POLYGON_OFFSET_FILL is on.
+ * My guess is that there are two bits for each zbias primitive
+ * (FILL, LINE, POINT).
+ * One to enable depth test and one for depth write.
+ * Yet this doesnt explain why depth writes work ...
+ */
+#define R300_SU_POLY_OFFSET_ENABLE 0x42B4
+# define R300_FRONT_ENABLE (1 << 0)
+# define R300_BACK_ENABLE (1 << 1)
+# define R300_PARA_ENABLE (1 << 2)
+
+#define R300_SU_CULL_MODE 0x42B8
+# define R300_CULL_FRONT (1 << 0)
+# define R300_CULL_BACK (1 << 1)
+# define R300_FRONT_FACE_CCW (0 << 2)
+# define R300_FRONT_FACE_CW (1 << 2)
+
+/* SU Depth Scale value */
+#define R300_SU_DEPTH_SCALE 0x42c0
+/* SU Depth Offset value */
+#define R300_SU_DEPTH_OFFSET 0x42c4
+
+#define R300_SU_REG_DEST 0x42c8
+# define R300_RASTER_PIPE_SELECT_0 (1 << 0)
+# define R300_RASTER_PIPE_SELECT_1 (1 << 1)
+# define R300_RASTER_PIPE_SELECT_2 (1 << 2)
+# define R300_RASTER_PIPE_SELECT_3 (1 << 3)
+# define R300_RASTER_PIPE_SELECT_ALL 0xf
+
+
+/* BEGIN: Rasterization / Interpolators - many guesses */
+
+/*
+ * TC_CNT is the number of incoming texture coordinate sets (i.e. it depends
+ * on the vertex program, *not* the fragment program)
+ */
+#define R300_RS_COUNT 0x4300
+# define R300_IT_COUNT_SHIFT 0
+# define R300_IT_COUNT_MASK 0x0000007f
+# define R300_IC_COUNT_SHIFT 7
+# define R300_IC_COUNT_MASK 0x00000780
+# define R300_W_ADDR_SHIFT 12
+# define R300_W_ADDR_MASK 0x0003f000
+# define R300_HIRES_DIS (0 << 18)
+# define R300_HIRES_EN (1 << 18)
+# define R300_IT_COUNT(x) ((x) << 0)
+# define R300_IC_COUNT(x) ((x) << 7)
+# define R300_W_COUNT(x) ((x) << 12)
+
+#define R300_RS_INST_COUNT 0x4304
+# define R300_RS_INST_COUNT_SHIFT 0
+# define R300_RS_INST_COUNT_MASK 0x0000000f
+# define R300_RS_TX_OFFSET_SHIFT 5
+# define R300_RS_TX_OFFSET_MASK 0x000000e0
+# define R300_RS_TX_OFFSET(x) ((x) << 5)
+
+/* gap */
+
+/* Only used for texture coordinates.
+ * Use the source field to route texture coordinate input from the
+ * vertex program to the desired interpolator. Note that the source
+ * field is relative to the outputs the vertex program *actually*
+ * writes. If a vertex program only writes texcoord[1], this will
+ * be source index 0.
+ * Set INTERP_USED on all interpolators that produce data used by
+ * the fragment program. INTERP_USED looks like a swizzling mask,
+ * but I haven't seen it used that way.
+ *
+ * Note: The _UNKNOWN constants are always set in their respective
+ * register. I don't know if this is necessary.
+ */
+#define R300_RS_IP_0 0x4310
+#define R300_RS_IP_1 0x4314
+#define R300_RS_IP_2 0x4318
+#define R300_RS_IP_3 0x431C
+# define R300_RS_INTERP_SRC_SHIFT 2 /* TODO: check for removal */
+# define R300_RS_INTERP_SRC_MASK (7 << 2) /* TODO: check for removal */
+# define R300_RS_TEX_PTR(x) (x << 0)
+# define R300_RS_COL_PTR(x) ((x) << 6)
+# define R300_RS_COL_FMT(x) ((x) << 9)
+# define R300_RS_COL_FMT_RGBA 0
+# define R300_RS_COL_FMT_RGB0 1
+# define R300_RS_COL_FMT_RGB1 2
+# define R300_RS_COL_FMT_000A 4
+# define R300_RS_COL_FMT_0000 5
+# define R300_RS_COL_FMT_0001 6
+# define R300_RS_COL_FMT_111A 8
+# define R300_RS_COL_FMT_1110 9
+# define R300_RS_COL_FMT_1111 10
+# define R300_RS_SEL_S(x) ((x) << 13)
+# define R300_RS_SEL_T(x) ((x) << 16)
+# define R300_RS_SEL_R(x) ((x) << 19)
+# define R300_RS_SEL_Q(x) ((x) << 22)
+# define R300_RS_SEL_C0 0
+# define R300_RS_SEL_C1 1
+# define R300_RS_SEL_C2 2
+# define R300_RS_SEL_C3 3
+# define R300_RS_SEL_K0 4
+# define R300_RS_SEL_K1 5
+
+
+/* */
+#define R500_RS_INST_0 0x4320
+#define R500_RS_INST_1 0x4324
+#define R500_RS_INST_2 0x4328
+#define R500_RS_INST_3 0x432c
+#define R500_RS_INST_4 0x4330
+#define R500_RS_INST_5 0x4334
+#define R500_RS_INST_6 0x4338
+#define R500_RS_INST_7 0x433c
+#define R500_RS_INST_8 0x4340
+#define R500_RS_INST_9 0x4344
+#define R500_RS_INST_10 0x4348
+#define R500_RS_INST_11 0x434c
+#define R500_RS_INST_12 0x4350
+#define R500_RS_INST_13 0x4354
+#define R500_RS_INST_14 0x4358
+#define R500_RS_INST_15 0x435c
+#define R500_RS_INST_TEX_ID_SHIFT 0
+# define R500_RS_INST_TEX_ID(x) ((x) << 0)
+#define R500_RS_INST_TEX_CN_WRITE (1 << 4)
+#define R500_RS_INST_TEX_ADDR_SHIFT 5
+# define R500_RS_INST_TEX_ADDR(x) ((x) << 5)
+#define R500_RS_INST_COL_ID_SHIFT 12
+# define R500_RS_INST_COL_ID(x) ((x) << 12)
+#define R500_RS_INST_COL_CN_NO_WRITE (0 << 16)
+#define R500_RS_INST_COL_CN_WRITE (1 << 16)
+#define R500_RS_INST_COL_CN_WRITE_FBUFFER (2 << 16)
+#define R500_RS_INST_COL_CN_WRITE_BACKFACE (3 << 16)
+#define R500_RS_INST_COL_ADDR_SHIFT 18
+# define R500_RS_INST_COL_ADDR(x) ((x) << 18)
+#define R500_RS_INST_TEX_ADJ (1 << 25)
+#define R500_RS_INST_W_CN (1 << 26)
+
+/* These DWORDs control how vertex data is routed into fragment program
+ * registers, after interpolators.
+ */
+#define R300_RS_INST_0 0x4330
+#define R300_RS_INST_1 0x4334
+#define R300_RS_INST_2 0x4338
+#define R300_RS_INST_3 0x433C
+#define R300_RS_INST_4 0x4340
+#define R300_RS_INST_5 0x4344
+#define R300_RS_INST_6 0x4348
+#define R300_RS_INST_7 0x434C
+# define R300_RS_INST_TEX_ID(x) ((x) << 0)
+# define R300_RS_INST_TEX_CN_WRITE (1 << 3)
+# define R300_RS_INST_TEX_ADDR(x) ((x) << 6)
+# define R300_RS_INST_TEX_ADDR_SHIFT 6
+# define R300_RS_INST_COL_ID(x) ((x) << 11)
+# define R300_RS_INST_COL_CN_WRITE (1 << 14)
+# define R300_RS_INST_COL_ADDR(x) ((x) << 17)
+# define R300_RS_INST_COL_ADDR_SHIFT 17
+# define R300_RS_INST_TEX_ADJ (1 << 22)
+# define R300_RS_COL_BIAS_UNUSED_SHIFT 23
+
+/* END: Rasterization / Interpolators - many guesses */
+
+/* Hierarchical Z Enable */
+#define R300_SC_HYPERZ 0x43a4
+# define R300_SC_HYPERZ_DISABLE (0 << 0)
+# define R300_SC_HYPERZ_ENABLE (1 << 0)
+# define R300_SC_HYPERZ_MIN (0 << 1)
+# define R300_SC_HYPERZ_MAX (1 << 1)
+# define R300_SC_HYPERZ_ADJ_256 (0 << 2)
+# define R300_SC_HYPERZ_ADJ_128 (1 << 2)
+# define R300_SC_HYPERZ_ADJ_64 (2 << 2)
+# define R300_SC_HYPERZ_ADJ_32 (3 << 2)
+# define R300_SC_HYPERZ_ADJ_16 (4 << 2)
+# define R300_SC_HYPERZ_ADJ_8 (5 << 2)
+# define R300_SC_HYPERZ_ADJ_4 (6 << 2)
+# define R300_SC_HYPERZ_ADJ_2 (7 << 2)
+# define R300_SC_HYPERZ_HZ_Z0MIN_NO (0 << 5)
+# define R300_SC_HYPERZ_HZ_Z0MIN (1 << 5)
+# define R300_SC_HYPERZ_HZ_Z0MAX_NO (0 << 6)
+# define R300_SC_HYPERZ_HZ_Z0MAX (1 << 6)
+
+#define R300_SC_EDGERULE 0x43a8
+
+/* BEGIN: Scissors and cliprects */
+
+/* There are four clipping rectangles. Their corner coordinates are inclusive.
+ * Every pixel is assigned a number from 0 and 15 by setting bits 0-3 depending
+ * on whether the pixel is inside cliprects 0-3, respectively. For example,
+ * if a pixel is inside cliprects 0 and 1, but outside 2 and 3, it is assigned
+ * the number 3 (binary 0011).
+ * Iff the bit corresponding to the pixel's number in RE_CLIPRECT_CNTL is set,
+ * the pixel is rasterized.
+ *
+ * In addition to this, there is a scissors rectangle. Only pixels inside the
+ * scissors rectangle are drawn. (coordinates are inclusive)
+ *
+ * For some reason, the top-left corner of the framebuffer is at (1440, 1440)
+ * for the purpose of clipping and scissors.
+ */
+#define R300_SC_CLIPRECT_TL_0 0x43B0
+#define R300_SC_CLIPRECT_BR_0 0x43B4
+#define R300_SC_CLIPRECT_TL_1 0x43B8
+#define R300_SC_CLIPRECT_BR_1 0x43BC
+#define R300_SC_CLIPRECT_TL_2 0x43C0
+#define R300_SC_CLIPRECT_BR_2 0x43C4
+#define R300_SC_CLIPRECT_TL_3 0x43C8
+#define R300_SC_CLIPRECT_BR_3 0x43CC
+# define R300_CLIPRECT_OFFSET 1440
+# define R300_CLIPRECT_MASK 0x1FFF
+# define R300_CLIPRECT_X_SHIFT 0
+# define R300_CLIPRECT_X_MASK (0x1FFF << 0)
+# define R300_CLIPRECT_Y_SHIFT 13
+# define R300_CLIPRECT_Y_MASK (0x1FFF << 13)
+#define R300_SC_CLIP_RULE 0x43D0
+# define R300_CLIP_OUT (1 << 0)
+# define R300_CLIP_0 (1 << 1)
+# define R300_CLIP_1 (1 << 2)
+# define R300_CLIP_10 (1 << 3)
+# define R300_CLIP_2 (1 << 4)
+# define R300_CLIP_20 (1 << 5)
+# define R300_CLIP_21 (1 << 6)
+# define R300_CLIP_210 (1 << 7)
+# define R300_CLIP_3 (1 << 8)
+# define R300_CLIP_30 (1 << 9)
+# define R300_CLIP_31 (1 << 10)
+# define R300_CLIP_310 (1 << 11)
+# define R300_CLIP_32 (1 << 12)
+# define R300_CLIP_320 (1 << 13)
+# define R300_CLIP_321 (1 << 14)
+# define R300_CLIP_3210 (1 << 15)
+
+/* gap */
+
+#define R300_SC_SCISSORS_TL 0x43E0
+#define R300_SC_SCISSORS_BR 0x43E4
+# define R300_SCISSORS_OFFSET 1440
+# define R300_SCISSORS_X_SHIFT 0
+# define R300_SCISSORS_X_MASK (0x1FFF << 0)
+# define R300_SCISSORS_Y_SHIFT 13
+# define R300_SCISSORS_Y_MASK (0x1FFF << 13)
+
+/* Screen door sample mask */
+#define R300_SC_SCREENDOOR 0x43e8
+
+/* END: Scissors and cliprects */
+
+/* BEGIN: Texture specification */
+
+/*
+ * The texture specification dwords are grouped by meaning and not by texture
+ * unit. This means that e.g. the offset for texture image unit N is found in
+ * register TX_OFFSET_0 + (4*N)
+ */
+#define R300_TX_FILTER0_0 0x4400
+#define R300_TX_FILTER0_1 0x4404
+#define R300_TX_FILTER0_2 0x4408
+#define R300_TX_FILTER0_3 0x440c
+#define R300_TX_FILTER0_4 0x4410
+#define R300_TX_FILTER0_5 0x4414
+#define R300_TX_FILTER0_6 0x4418
+#define R300_TX_FILTER0_7 0x441c
+#define R300_TX_FILTER0_8 0x4420
+#define R300_TX_FILTER0_9 0x4424
+#define R300_TX_FILTER0_10 0x4428
+#define R300_TX_FILTER0_11 0x442c
+#define R300_TX_FILTER0_12 0x4430
+#define R300_TX_FILTER0_13 0x4434
+#define R300_TX_FILTER0_14 0x4438
+#define R300_TX_FILTER0_15 0x443c
+# define R300_TX_REPEAT 0
+# define R300_TX_MIRRORED 1
+# define R300_TX_CLAMP_TO_EDGE 2
+# define R300_TX_MIRROR_ONCE_TO_EDGE 3
+# define R300_TX_CLAMP 4
+# define R300_TX_MIRROR_ONCE 5
+# define R300_TX_CLAMP_TO_BORDER 6
+# define R300_TX_MIRROR_ONCE_TO_BORDER 7
+# define R300_TX_WRAP_S_SHIFT 0
+# define R300_TX_WRAP_S_MASK (7 << 0)
+# define R300_TX_WRAP_T_SHIFT 3
+# define R300_TX_WRAP_T_MASK (7 << 3)
+# define R300_TX_WRAP_R_SHIFT 6
+# define R300_TX_WRAP_R_MASK (7 << 6)
+# define R300_TX_MAG_FILTER_4 (0 << 9)
+# define R300_TX_MAG_FILTER_NEAREST (1 << 9)
+# define R300_TX_MAG_FILTER_LINEAR (2 << 9)
+# define R300_TX_MAG_FILTER_ANISO (3 << 9)
+# define R300_TX_MAG_FILTER_MASK (3 << 9)
+# define R300_TX_MIN_FILTER_NEAREST (1 << 11)
+# define R300_TX_MIN_FILTER_LINEAR (2 << 11)
+# define R300_TX_MIN_FILTER_ANISO (3 << 11)
+# define R300_TX_MIN_FILTER_MASK (3 << 11)
+# define R300_TX_MIN_FILTER_MIP_NONE (0 << 13)
+# define R300_TX_MIN_FILTER_MIP_NEAREST (1 << 13)
+# define R300_TX_MIN_FILTER_MIP_LINEAR (2 << 13)
+# define R300_TX_MIN_FILTER_MIP_MASK (3 << 13)
+# define R300_TX_MAX_MIP_LEVEL_SHIFT 17
+# define R300_TX_MAX_MIP_LEVEL_MASK (0xf << 17)
+# define R300_TX_MAX_ANISO_1_TO_1 (0 << 21)
+# define R300_TX_MAX_ANISO_2_TO_1 (1 << 21)
+# define R300_TX_MAX_ANISO_4_TO_1 (2 << 21)
+# define R300_TX_MAX_ANISO_8_TO_1 (3 << 21)
+# define R300_TX_MAX_ANISO_16_TO_1 (4 << 21)
+# define R300_TX_MAX_ANISO_MASK (7 << 21)
+# define R300_TX_WRAP_S(x) ((x) << 0)
+# define R300_TX_WRAP_T(x) ((x) << 3)
+# define R300_TX_MAX_MIP_LEVEL(x) ((x) << 17)
+
+#define R300_TX_FILTER1_0 0x4440
+# define R300_CHROMA_KEY_MODE_DISABLE 0
+# define R300_CHROMA_KEY_FORCE 1
+# define R300_CHROMA_KEY_BLEND 2
+# define R300_MC_ROUND_NORMAL (0<<2)
+# define R300_MC_ROUND_MPEG4 (1<<2)
+# define R300_LOD_BIAS_SHIFT 3
+# define R300_LOD_BIAS_MASK 0x1ff8
+# define R300_EDGE_ANISO_EDGE_DIAG (0<<13)
+# define R300_EDGE_ANISO_EDGE_ONLY (1<<13)
+# define R300_MC_COORD_TRUNCATE_DISABLE (0<<14)
+# define R300_MC_COORD_TRUNCATE_MPEG (1<<14)
+# define R300_TX_TRI_PERF_0_8 (0<<15)
+# define R300_TX_TRI_PERF_1_8 (1<<15)
+# define R300_TX_TRI_PERF_1_4 (2<<15)
+# define R300_TX_TRI_PERF_3_8 (3<<15)
+# define R300_ANISO_THRESHOLD_MASK (7<<17)
+
+# define R500_MACRO_SWITCH (1<<22)
+# define R500_TX_MAX_ANISO(x) ((x) << 23)
+# define R500_TX_MAX_ANISO_MASK (63 << 23)
+# define R500_TX_ANISO_HIGH_QUALITY (1 << 30)
+
+# define R500_BORDER_FIX (1<<31)
+
+#define R300_TX_FORMAT0_0 0x4480
+# define R300_TX_WIDTHMASK_SHIFT 0
+# define R300_TX_WIDTHMASK_MASK (2047 << 0)
+# define R300_TX_HEIGHTMASK_SHIFT 11
+# define R300_TX_HEIGHTMASK_MASK (2047 << 11)
+# define R300_TX_DEPTHMASK_SHIFT 22
+# define R300_TX_DEPTHMASK_MASK (0xf << 22)
+# define R300_TX_SIZE_PROJECTED (1 << 30)
+# define R300_TX_PITCH_EN (1 << 31)
+# define R300_TX_WIDTH(x) ((x) << 0)
+# define R300_TX_HEIGHT(x) ((x) << 11)
+# define R300_TX_DEPTH(x) ((x) << 22)
+# define R300_TX_NUM_LEVELS(x) ((x) << 26)
+
+#define R300_TX_FORMAT1_0 0x44C0
+ /* The interpretation of the format word by Wladimir van der Laan */
+ /* The X, Y, Z and W refer to the layout of the components.
+ They are given meanings as R, G, B and Alpha by the swizzle
+ specification */
+# define R300_TX_FORMAT_X8 0x0
+# define R300_TX_FORMAT_X16 0x1
+# define R300_TX_FORMAT_Y4X4 0x2
+# define R300_TX_FORMAT_Y8X8 0x3
+# define R300_TX_FORMAT_Y16X16 0x4
+# define R300_TX_FORMAT_Z3Y3X2 0x5
+# define R300_TX_FORMAT_Z5Y6X5 0x6
+# define R300_TX_FORMAT_Z6Y5X5 0x7
+# define R300_TX_FORMAT_Z11Y11X10 0x8
+# define R300_TX_FORMAT_Z10Y11X11 0x9
+# define R300_TX_FORMAT_W4Z4Y4X4 0xA
+# define R300_TX_FORMAT_W1Z5Y5X5 0xB
+# define R300_TX_FORMAT_W8Z8Y8X8 0xC
+# define R300_TX_FORMAT_W2Z10Y10X10 0xD
+# define R300_TX_FORMAT_W16Z16Y16X16 0xE
+# define R300_TX_FORMAT_DXT1 0xF
+# define R300_TX_FORMAT_DXT3 0x10
+# define R300_TX_FORMAT_DXT5 0x11
+# define R300_TX_FORMAT_CxV8U8 0x12
+# define R300_TX_FORMAT_AVYU444 0x13
+# define R300_TX_FORMAT_VYUY422 0x14
+# define R300_TX_FORMAT_YVYU422 0x15
+# define R300_TX_FORMAT_16_MPEG 0x16
+# define R300_TX_FORMAT_16_16_MPEG 0x17
+# define R300_TX_FORMAT_16F 0x18
+# define R300_TX_FORMAT_16F_16F 0x19
+# define R300_TX_FORMAT_16F_16F_16F_16F 0x1A
+# define R300_TX_FORMAT_32F 0x1B
+# define R300_TX_FORMAT_32F_32F 0x1C
+# define R300_TX_FORMAT_32F_32F_32F_32F 0x1D
+# define R300_TX_FORMAT_W24_FP 0x1E
+# define R400_TX_FORMAT_ATI2N 0x1F
+
+/* These need TX_FORMAT2_[0-15].TXFORMAT_MSB set.
+
+ My guess is the 10-bit formats are the 8-bit ones but with filtering being
+ performed with the precision of 10 bits per channel. This makes sense
+ with sRGB textures since the conversion to linear space reduces the precision
+ significantly so the shader gets approximately the 8-bit precision
+ in the end. It might also improve the quality of HDR rendering where
+ high-precision filtering is desirable.
+
+ Again, this is guessed, the formats might mean something entirely else.
+ The others should be fine. */
+# define R500_TX_FORMAT_X1 0x0
+# define R500_TX_FORMAT_X1_REV 0x1
+# define R500_TX_FORMAT_X10 0x2
+# define R500_TX_FORMAT_Y10X10 0x3
+# define R500_TX_FORMAT_W10Z10Y10X10 0x4
+# define R500_TX_FORMAT_ATI1N 0x5
+# define R500_TX_FORMAT_Y8X24 0x6
+
+
+# define R300_TX_FORMAT_SIGNED_W (1 << 5)
+# define R300_TX_FORMAT_SIGNED_Z (1 << 6)
+# define R300_TX_FORMAT_SIGNED_Y (1 << 7)
+# define R300_TX_FORMAT_SIGNED_X (1 << 8)
+# define R300_TX_FORMAT_SIGNED (0xf << 5)
+
+# define R300_TX_FORMAT_3D (1 << 25)
+# define R300_TX_FORMAT_CUBIC_MAP (2 << 25)
+
+ /* alpha modes, convenience mostly */
+ /* if you have alpha, pick constant appropriate to the
+ number of channels (1 for I8, 2 for I8A8, 4 for R8G8B8A8, etc */
+# define R300_TX_FORMAT_ALPHA_1CH 0x000
+# define R300_TX_FORMAT_ALPHA_2CH 0x200
+# define R300_TX_FORMAT_ALPHA_4CH 0x600
+# define R300_TX_FORMAT_ALPHA_NONE 0xA00
+ /* Swizzling */
+ /* constants */
+# define R300_TX_FORMAT_X 0
+# define R300_TX_FORMAT_Y 1
+# define R300_TX_FORMAT_Z 2
+# define R300_TX_FORMAT_W 3
+# define R300_TX_FORMAT_ZERO 4
+# define R300_TX_FORMAT_ONE 5
+ /* 2.0*Z, everything above 1.0 is set to 0.0 */
+# define R300_TX_FORMAT_CUT_Z 6
+ /* 2.0*W, everything above 1.0 is set to 0.0 */
+# define R300_TX_FORMAT_CUT_W 7
+
+# define R300_TX_FORMAT_B_SHIFT 18
+# define R300_TX_FORMAT_G_SHIFT 15
+# define R300_TX_FORMAT_R_SHIFT 12
+# define R300_TX_FORMAT_A_SHIFT 9
+ /* Convenience macro to take care of layout and swizzling */
+# define R300_EASY_TX_FORMAT(B, G, R, A, FMT) ( \
+ ((R300_TX_FORMAT_##B)<<R300_TX_FORMAT_B_SHIFT) \
+ | ((R300_TX_FORMAT_##G)<<R300_TX_FORMAT_G_SHIFT) \
+ | ((R300_TX_FORMAT_##R)<<R300_TX_FORMAT_R_SHIFT) \
+ | ((R300_TX_FORMAT_##A)<<R300_TX_FORMAT_A_SHIFT) \
+ | (R300_TX_FORMAT_##FMT) \
+ )
+ /* These can be ORed with result of R300_EASY_TX_FORMAT()
+ We don't really know what they do. Take values from a
+ constant color ? */
+# define R300_TX_FORMAT_CONST_X (1<<5)
+# define R300_TX_FORMAT_CONST_Y (2<<5)
+# define R300_TX_FORMAT_CONST_Z (4<<5)
+# define R300_TX_FORMAT_CONST_W (8<<5)
+
+# define R300_TX_FORMAT_GAMMA (1 << 21)
+# define R300_TX_FORMAT_YUV_TO_RGB (1 << 22)
+
+# define R300_TX_CACHE(x) ((x) << 27)
+# define R300_TX_CACHE_WHOLE 0
+/* reserved */
+# define R300_TX_CACHE_HALF_0 2
+# define R300_TX_CACHE_HALF_1 3
+# define R300_TX_CACHE_FOURTH_0 4
+# define R300_TX_CACHE_FOURTH_1 5
+# define R300_TX_CACHE_FOURTH_2 6
+# define R300_TX_CACHE_FOURTH_3 7
+# define R300_TX_CACHE_EIGHTH_0 8
+# define R300_TX_CACHE_EIGHTH_1 9
+# define R300_TX_CACHE_EIGHTH_2 10
+# define R300_TX_CACHE_EIGHTH_3 11
+# define R300_TX_CACHE_EIGHTH_4 12
+# define R300_TX_CACHE_EIGHTH_5 13
+# define R300_TX_CACHE_EIGHTH_6 14
+# define R300_TX_CACHE_EIGHTH_7 15
+# define R300_TX_CACHE_SIXTEENTH_0 16
+# define R300_TX_CACHE_SIXTEENTH_1 17
+# define R300_TX_CACHE_SIXTEENTH_2 18
+# define R300_TX_CACHE_SIXTEENTH_3 19
+# define R300_TX_CACHE_SIXTEENTH_4 20
+# define R300_TX_CACHE_SIXTEENTH_5 21
+# define R300_TX_CACHE_SIXTEENTH_6 22
+# define R300_TX_CACHE_SIXTEENTH_7 23
+# define R300_TX_CACHE_SIXTEENTH_8 24
+# define R300_TX_CACHE_SIXTEENTH_9 25
+# define R300_TX_CACHE_SIXTEENTH_10 26
+# define R300_TX_CACHE_SIXTEENTH_11 27
+# define R300_TX_CACHE_SIXTEENTH_12 28
+# define R300_TX_CACHE_SIXTEENTH_13 29
+# define R300_TX_CACHE_SIXTEENTH_14 30
+# define R300_TX_CACHE_SIXTEENTH_15 31
+
+#define R300_TX_FORMAT2_0 0x4500 /* obvious missing in gap */
+# define R300_TX_PITCHMASK_SHIFT 0
+# define R300_TX_PITCHMASK_MASK (2047 << 0)
+# define R500_TXFORMAT_MSB (1 << 14)
+# define R500_TXWIDTH_BIT11 (1 << 15)
+# define R500_TXHEIGHT_BIT11 (1 << 16)
+# define R500_POW2FIX2FLT (1 << 17)
+# define R500_SEL_FILTER4_TC0 (0 << 18)
+# define R500_SEL_FILTER4_TC1 (1 << 18)
+# define R500_SEL_FILTER4_TC2 (2 << 18)
+# define R500_SEL_FILTER4_TC3 (3 << 18)
+
+#define R300_TX_OFFSET_0 0x4540
+#define R300_TX_OFFSET_1 0x4544
+#define R300_TX_OFFSET_2 0x4548
+#define R300_TX_OFFSET_3 0x454C
+#define R300_TX_OFFSET_4 0x4550
+#define R300_TX_OFFSET_5 0x4554
+#define R300_TX_OFFSET_6 0x4558
+#define R300_TX_OFFSET_7 0x455C
+
+# define R300_TXO_ENDIAN_NO_SWAP (0 << 0)
+# define R300_TXO_ENDIAN_BYTE_SWAP (1 << 0)
+# define R300_TXO_ENDIAN_WORD_SWAP (2 << 0)
+# define R300_TXO_ENDIAN_HALFDW_SWAP (3 << 0)
+# define R300_TXO_MACRO_TILE_LINEAR (0 << 2)
+# define R300_TXO_MACRO_TILE_TILED (1 << 2)
+# define R300_TXO_MACRO_TILE(x) ((x) << 2)
+# define R300_TXO_MICRO_TILE_LINEAR (0 << 3)
+# define R300_TXO_MICRO_TILE_TILED (1 << 3)
+# define R300_TXO_MICRO_TILE_TILED_SQUARE (2 << 3)
+# define R300_TXO_MICRO_TILE(x) ((x) << 3)
+# define R300_TXO_OFFSET_MASK 0xffffffe0
+# define R300_TXO_OFFSET_SHIFT 5
+
+/* 32 bit chroma key */
+#define R300_TX_CHROMA_KEY_0 0x4580
+#define R300_TX_CHROMA_KEY_1 0x4584
+#define R300_TX_CHROMA_KEY_2 0x4588
+#define R300_TX_CHROMA_KEY_3 0x458c
+#define R300_TX_CHROMA_KEY_4 0x4590
+#define R300_TX_CHROMA_KEY_5 0x4594
+#define R300_TX_CHROMA_KEY_6 0x4598
+#define R300_TX_CHROMA_KEY_7 0x459c
+#define R300_TX_CHROMA_KEY_8 0x45a0
+#define R300_TX_CHROMA_KEY_9 0x45a4
+#define R300_TX_CHROMA_KEY_10 0x45a8
+#define R300_TX_CHROMA_KEY_11 0x45ac
+#define R300_TX_CHROMA_KEY_12 0x45b0
+#define R300_TX_CHROMA_KEY_13 0x45b4
+#define R300_TX_CHROMA_KEY_14 0x45b8
+#define R300_TX_CHROMA_KEY_15 0x45bc
+/* ff00ff00 == { 0, 1.0, 0, 1.0 } */
+
+/* Border Color */
+#define R300_TX_BORDER_COLOR_0 0x45c0
+#define R300_TX_BORDER_COLOR_1 0x45c4
+#define R300_TX_BORDER_COLOR_2 0x45c8
+#define R300_TX_BORDER_COLOR_3 0x45cc
+#define R300_TX_BORDER_COLOR_4 0x45d0
+#define R300_TX_BORDER_COLOR_5 0x45d4
+#define R300_TX_BORDER_COLOR_6 0x45d8
+#define R300_TX_BORDER_COLOR_7 0x45dc
+#define R300_TX_BORDER_COLOR_8 0x45e0
+#define R300_TX_BORDER_COLOR_9 0x45e4
+#define R300_TX_BORDER_COLOR_10 0x45e8
+#define R300_TX_BORDER_COLOR_11 0x45ec
+#define R300_TX_BORDER_COLOR_12 0x45f0
+#define R300_TX_BORDER_COLOR_13 0x45f4
+#define R300_TX_BORDER_COLOR_14 0x45f8
+#define R300_TX_BORDER_COLOR_15 0x45fc
+
+
+/* END: Texture specification */
+
+/* BEGIN: Fragment program instruction set */
+
+/* Fragment programs are written directly into register space.
+ * There are separate instruction streams for texture instructions and ALU
+ * instructions.
+ * In order to synchronize these streams, the program is divided into up
+ * to 4 nodes. Each node begins with a number of TEX operations, followed
+ * by a number of ALU operations.
+ * The first node can have zero TEX ops, all subsequent nodes must have at
+ * least
+ * one TEX ops.
+ * All nodes must have at least one ALU op.
+ *
+ * The index of the last node is stored in PFS_CNTL_0: A value of 0 means
+ * 1 node, a value of 3 means 4 nodes.
+ * The total amount of instructions is defined in PFS_CNTL_2. The offsets are
+ * offsets into the respective instruction streams, while *_END points to the
+ * last instruction relative to this offset.
+ */
+#define R300_US_CONFIG 0x4600
+# define R300_PFS_CNTL_LAST_NODES_SHIFT 0
+# define R300_PFS_CNTL_LAST_NODES_MASK (3 << 0)
+# define R300_PFS_CNTL_FIRST_NODE_HAS_TEX (1 << 3)
+#define R300_US_PIXSIZE 0x4604
+/* There is an unshifted value here which has so far always been equal to the
+ * index of the highest used temporary register.
+ */
+#define R300_US_CODE_OFFSET 0x4608
+# define R300_PFS_CNTL_ALU_OFFSET_SHIFT 0
+# define R300_PFS_CNTL_ALU_OFFSET_MASK (63 << 0)
+# define R300_PFS_CNTL_ALU_END_SHIFT 6
+# define R300_PFS_CNTL_ALU_END_MASK (63 << 6)
+# define R300_PFS_CNTL_TEX_OFFSET_SHIFT 13
+# define R300_PFS_CNTL_TEX_OFFSET_MASK (31 << 13)
+# define R300_PFS_CNTL_TEX_END_SHIFT 18
+# define R300_PFS_CNTL_TEX_END_MASK (31 << 18)
+# define R400_PFS_CNTL_TEX_OFFSET_MSB_SHIFT 24
+# define R400_PFS_CNTL_TEX_OFFSET_MSB_MASK (0xf << 24)
+# define R400_PFS_CNTL_TEX_END_MSB_SHIFT 28
+# define R400_PFS_CNTL_TEX_END_MSB_MASK (0xf << 28)
+
+/* gap */
+
+/* Nodes are stored backwards. The last active node is always stored in
+ * PFS_NODE_3.
+ * Example: In a 2-node program, NODE_0 and NODE_1 are set to 0. The
+ * first node is stored in NODE_2, the second node is stored in NODE_3.
+ *
+ * Offsets are relative to the master offset from PFS_CNTL_2.
+ */
+#define R300_US_CODE_ADDR_0 0x4610
+#define R300_US_CODE_ADDR_1 0x4614
+#define R300_US_CODE_ADDR_2 0x4618
+#define R300_US_CODE_ADDR_3 0x461C
+# define R300_ALU_START_SHIFT 0
+# define R300_ALU_START_MASK (63 << 0)
+# define R300_ALU_SIZE_SHIFT 6
+# define R300_ALU_SIZE_MASK (63 << 6)
+# define R300_TEX_START_SHIFT 12
+# define R300_TEX_START_MASK (31 << 12)
+# define R300_TEX_SIZE_SHIFT 17
+# define R300_TEX_SIZE_MASK (31 << 17)
+# define R300_RGBA_OUT (1 << 22)
+# define R300_W_OUT (1 << 23)
+# define R400_TEX_START_MSB_SHIFT 24
+# define R400_TEX_START_MSG_MASK (0xf << 24)
+# define R400_TEX_SIZE_MSB_SHIFT 28
+# define R400_TEX_SIZE_MSG_MASK (0xf << 28)
+
+/* TEX
+ * As far as I can tell, texture instructions cannot write into output
+ * registers directly. A subsequent ALU instruction is always necessary,
+ * even if it's just MAD o0, r0, 1, 0
+ */
+#define R300_US_TEX_INST_0 0x4620
+# define R300_SRC_ADDR_SHIFT 0
+# define R300_SRC_ADDR_MASK (31 << 0)
+# define R300_DST_ADDR_SHIFT 6
+# define R300_DST_ADDR_MASK (31 << 6)
+# define R300_TEX_ID_SHIFT 11
+# define R300_TEX_ID_MASK (15 << 11)
+# define R300_TEX_INST_SHIFT 15
+# define R300_TEX_OP_NOP 0
+# define R300_TEX_OP_LD 1
+# define R300_TEX_OP_KIL 2
+# define R300_TEX_OP_TXP 3
+# define R300_TEX_OP_TXB 4
+# define R300_TEX_INST_MASK (7 << 15)
+# define R400_SRC_ADDR_EXT_BIT (1 << 19)
+# define R400_DST_ADDR_EXT_BIT (1 << 20)
+
+/* Output format from the unfied shader */
+#define R300_US_OUT_FMT_0 0x46A4
+# define R300_US_OUT_FMT_C4_8 (0 << 0)
+# define R300_US_OUT_FMT_C4_10 (1 << 0)
+# define R300_US_OUT_FMT_C4_10_GAMMA (2 << 0)
+# define R300_US_OUT_FMT_C_16 (3 << 0)
+# define R300_US_OUT_FMT_C2_16 (4 << 0)
+# define R300_US_OUT_FMT_C4_16 (5 << 0)
+# define R300_US_OUT_FMT_C_16_MPEG (6 << 0)
+# define R300_US_OUT_FMT_C2_16_MPEG (7 << 0)
+# define R300_US_OUT_FMT_C2_4 (8 << 0)
+# define R300_US_OUT_FMT_C_3_3_2 (9 << 0)
+# define R300_US_OUT_FMT_C_6_5_6 (10 << 0)
+# define R300_US_OUT_FMT_C_11_11_10 (11 << 0)
+# define R300_US_OUT_FMT_C_10_11_11 (12 << 0)
+# define R300_US_OUT_FMT_C_2_10_10_10 (13 << 0)
+/* reserved */
+# define R300_US_OUT_FMT_UNUSED (15 << 0)
+# define R300_US_OUT_FMT_C_16_FP (16 << 0)
+# define R300_US_OUT_FMT_C2_16_FP (17 << 0)
+# define R300_US_OUT_FMT_C4_16_FP (18 << 0)
+# define R300_US_OUT_FMT_C_32_FP (19 << 0)
+# define R300_US_OUT_FMT_C2_32_FP (20 << 0)
+# define R300_US_OUT_FMT_C4_32_FP (21 << 0)
+# define R300_C0_SEL_A (0 << 8)
+# define R300_C0_SEL_R (1 << 8)
+# define R300_C0_SEL_G (2 << 8)
+# define R300_C0_SEL_B (3 << 8)
+# define R300_C1_SEL_A (0 << 10)
+# define R300_C1_SEL_R (1 << 10)
+# define R300_C1_SEL_G (2 << 10)
+# define R300_C1_SEL_B (3 << 10)
+# define R300_C2_SEL_A (0 << 12)
+# define R300_C2_SEL_R (1 << 12)
+# define R300_C2_SEL_G (2 << 12)
+# define R300_C2_SEL_B (3 << 12)
+# define R300_C3_SEL_A (0 << 14)
+# define R300_C3_SEL_R (1 << 14)
+# define R300_C3_SEL_G (2 << 14)
+# define R300_C3_SEL_B (3 << 14)
+# define R300_OUT_SIGN(x) ((x) << 16)
+# define R500_ROUND_ADJ (1 << 20)
+
+/* ALU
+ * The ALU instructions register blocks are enumerated according to the order
+ * in which fglrx. I assume there is space for 64 instructions, since
+ * each block has space for a maximum of 64 DWORDs, and this matches reported
+ * native limits.
+ *
+ * The basic functional block seems to be one MAD for each color and alpha,
+ * and an adder that adds all components after the MUL.
+ * - ADD, MUL, MAD etc.: use MAD with appropriate neutral operands
+ * - DP4: Use OUTC_DP4, OUTA_DP4
+ * - DP3: Use OUTC_DP3, OUTA_DP4, appropriate alpha operands
+ * - DPH: Use OUTC_DP4, OUTA_DP4, appropriate alpha operands
+ * - CMPH: If ARG2 > 0.5, return ARG0, else return ARG1
+ * - CMP: If ARG2 < 0, return ARG1, else return ARG0
+ * - FLR: use FRC+MAD
+ * - XPD: use MAD+MAD
+ * - SGE, SLT: use MAD+CMP
+ * - RSQ: use ABS modifier for argument
+ * - Use OUTC_REPL_ALPHA to write results of an alpha-only operation
+ * (e.g. RCP) into color register
+ * - apparently, there's no quick DST operation
+ * - fglrx set FPI2_UNKNOWN_31 on a "MAD fragment.color, tmp0, tmp1, tmp2"
+ * - fglrx set FPI2_UNKNOWN_31 on a "MAX r2, r1, c0"
+ * - fglrx once set FPI0_UNKNOWN_31 on a "FRC r1, r1"
+ *
+ * Operand selection
+ * First stage selects three sources from the available registers and
+ * constant parameters. This is defined in INSTR1 (color) and INSTR3 (alpha).
+ * fglrx sorts the three source fields: Registers before constants,
+ * lower indices before higher indices; I do not know whether this is
+ * necessary.
+ *
+ * fglrx fills unused sources with "read constant 0"
+ * According to specs, you cannot select more than two different constants.
+ *
+ * Second stage selects the operands from the sources. This is defined in
+ * INSTR0 (color) and INSTR2 (alpha). You can also select the special constants
+ * zero and one.
+ * Swizzling and negation happens in this stage, as well.
+ *
+ * Important: Color and alpha seem to be mostly separate, i.e. their sources
+ * selection appears to be fully independent (the register storage is probably
+ * physically split into a color and an alpha section).
+ * However (because of the apparent physical split), there is some interaction
+ * WRT swizzling. If, for example, you want to load an R component into an
+ * Alpha operand, this R component is taken from a *color* source, not from
+ * an alpha source. The corresponding register doesn't even have to appear in
+ * the alpha sources list. (I hope this all makes sense to you)
+ *
+ * Destination selection
+ * The destination register index is in FPI1 (color) and FPI3 (alpha)
+ * together with enable bits.
+ * There are separate enable bits for writing into temporary registers
+ * (DSTC_REG_* /DSTA_REG) and program output registers (DSTC_OUTPUT_*
+ * /DSTA_OUTPUT). You can write to both at once, or not write at all (the
+ * same index must be used for both).
+ *
+ * Note: There is a special form for LRP
+ * - Argument order is the same as in ARB_fragment_program.
+ * - Operation is MAD
+ * - ARG1 is set to ARGC_SRC1C_LRP/ARGC_SRC1A_LRP
+ * - Set FPI0/FPI2_SPECIAL_LRP
+ * Arbitrary LRP (including support for swizzling) requires vanilla MAD+MAD
+ */
+#define R300_US_ALU_RGB_ADDR_0 0x46C0
+# define R300_ALU_SRC0C_SHIFT 0
+# define R300_ALU_SRC0C_MASK (31 << 0)
+# define R300_ALU_SRC0C_CONST (1 << 5)
+# define R300_ALU_SRC1C_SHIFT 6
+# define R300_ALU_SRC1C_MASK (31 << 6)
+# define R300_ALU_SRC1C_CONST (1 << 11)
+# define R300_ALU_SRC2C_SHIFT 12
+# define R300_ALU_SRC2C_MASK (31 << 12)
+# define R300_ALU_SRC2C_CONST (1 << 17)
+# define R300_ALU_SRC_MASK 0x0003ffff
+# define R300_ALU_DSTC_SHIFT 18
+# define R300_ALU_DSTC_MASK (31 << 18)
+# define R300_ALU_DSTC_REG_MASK_SHIFT 23
+# define R300_ALU_DSTC_REG_X (1 << 23)
+# define R300_ALU_DSTC_REG_Y (1 << 24)
+# define R300_ALU_DSTC_REG_Z (1 << 25)
+# define R300_ALU_DSTC_OUTPUT_MASK_SHIFT 26
+# define R300_ALU_DSTC_OUTPUT_X (1 << 26)
+# define R300_ALU_DSTC_OUTPUT_Y (1 << 27)
+# define R300_ALU_DSTC_OUTPUT_Z (1 << 28)
+# define R300_ALU_DSTC_OUTPUT_XYZ (7 << 26)
+# define R300_RGB_ADDR0(x) ((x) << 0)
+# define R300_RGB_ADDR1(x) ((x) << 6)
+# define R300_RGB_ADDR2(x) ((x) << 12)
+# define R300_RGB_TARGET(x) ((x) << 29)
+
+#define R300_US_ALU_ALPHA_ADDR_0 0x47C0
+# define R300_ALU_SRC0A_SHIFT 0
+# define R300_ALU_SRC0A_MASK (31 << 0)
+# define R300_ALU_SRC0A_CONST (1 << 5)
+# define R300_ALU_SRC1A_SHIFT 6
+# define R300_ALU_SRC1A_MASK (31 << 6)
+# define R300_ALU_SRC1A_CONST (1 << 11)
+# define R300_ALU_SRC2A_SHIFT 12
+# define R300_ALU_SRC2A_MASK (31 << 12)
+# define R300_ALU_SRC2A_CONST (1 << 17)
+# define R300_ALU_SRC_MASK 0x0003ffff
+# define R300_ALU_DSTA_SHIFT 18
+# define R300_ALU_DSTA_MASK (31 << 18)
+# define R300_ALU_DSTA_REG (1 << 23)
+# define R300_ALU_DSTA_OUTPUT (1 << 24)
+# define R300_ALU_DSTA_DEPTH (1 << 27)
+# define R300_ALPHA_ADDR0(x) ((x) << 0)
+# define R300_ALPHA_ADDR1(x) ((x) << 6)
+# define R300_ALPHA_ADDR2(x) ((x) << 12)
+# define R300_ALPHA_TARGET(x) ((x) << 25)
+
+#define R300_US_ALU_RGB_INST_0 0x48C0
+# define R300_ALU_ARGC_SRC0C_XYZ 0
+# define R300_ALU_ARGC_SRC0C_XXX 1
+# define R300_ALU_ARGC_SRC0C_YYY 2
+# define R300_ALU_ARGC_SRC0C_ZZZ 3
+# define R300_ALU_ARGC_SRC1C_XYZ 4
+# define R300_ALU_ARGC_SRC1C_XXX 5
+# define R300_ALU_ARGC_SRC1C_YYY 6
+# define R300_ALU_ARGC_SRC1C_ZZZ 7
+# define R300_ALU_ARGC_SRC2C_XYZ 8
+# define R300_ALU_ARGC_SRC2C_XXX 9
+# define R300_ALU_ARGC_SRC2C_YYY 10
+# define R300_ALU_ARGC_SRC2C_ZZZ 11
+# define R300_ALU_ARGC_SRC0A 12
+# define R300_ALU_ARGC_SRC1A 13
+# define R300_ALU_ARGC_SRC2A 14
+# define R300_ALU_ARGC_SRCP_XYZ 15
+# define R300_ALU_ARGC_SRCP_XXX 16
+# define R300_ALU_ARGC_SRCP_YYY 17
+# define R300_ALU_ARGC_SRCP_ZZZ 18
+# define R300_ALU_ARGC_SRCP_WWW 19
+# define R300_ALU_ARGC_ZERO 20
+# define R300_ALU_ARGC_ONE 21
+# define R300_ALU_ARGC_HALF 22
+# define R300_ALU_ARGC_SRC0C_YZX 23
+# define R300_ALU_ARGC_SRC1C_YZX 24
+# define R300_ALU_ARGC_SRC2C_YZX 25
+# define R300_ALU_ARGC_SRC0C_ZXY 26
+# define R300_ALU_ARGC_SRC1C_ZXY 27
+# define R300_ALU_ARGC_SRC2C_ZXY 28
+# define R300_ALU_ARGC_SRC0CA_WZY 29
+# define R300_ALU_ARGC_SRC1CA_WZY 30
+# define R300_ALU_ARGC_SRC2CA_WZY 31
+# define R300_RGB_SWIZA(x) ((x) << 0)
+# define R300_RGB_SWIZB(x) ((x) << 7)
+# define R300_RGB_SWIZC(x) ((x) << 14)
+
+# define R300_ALU_ARG0C_SHIFT 0
+# define R300_ALU_ARG0C_MASK (31 << 0)
+# define R300_ALU_ARG0C_NOP (0 << 5)
+# define R300_ALU_ARG0C_NEG (1 << 5)
+# define R300_ALU_ARG0C_ABS (2 << 5)
+# define R300_ALU_ARG0C_NAB (3 << 5)
+# define R300_ALU_ARG1C_SHIFT 7
+# define R300_ALU_ARG1C_MASK (31 << 7)
+# define R300_ALU_ARG1C_NOP (0 << 12)
+# define R300_ALU_ARG1C_NEG (1 << 12)
+# define R300_ALU_ARG1C_ABS (2 << 12)
+# define R300_ALU_ARG1C_NAB (3 << 12)
+# define R300_ALU_ARG2C_SHIFT 14
+# define R300_ALU_ARG2C_MASK (31 << 14)
+# define R300_ALU_ARG2C_NOP (0 << 19)
+# define R300_ALU_ARG2C_NEG (1 << 19)
+# define R300_ALU_ARG2C_ABS (2 << 19)
+# define R300_ALU_ARG2C_NAB (3 << 19)
+# define R300_ALU_SRCP_1_MINUS_2_SRC0 (0 << 21)
+# define R300_ALU_SRCP_SRC1_MINUS_SRC0 (1 << 21)
+# define R300_ALU_SRCP_SRC1_PLUS_SRC0 (2 << 21)
+# define R300_ALU_SRCP_1_MINUS_SRC0 (3 << 21)
+
+# define R300_ALU_OUTC_MAD (0 << 23)
+# define R300_ALU_OUTC_DP3 (1 << 23)
+# define R300_ALU_OUTC_DP4 (2 << 23)
+# define R300_ALU_OUTC_D2A (3 << 23)
+# define R300_ALU_OUTC_MIN (4 << 23)
+# define R300_ALU_OUTC_MAX (5 << 23)
+# define R300_ALU_OUTC_CMPH (7 << 23)
+# define R300_ALU_OUTC_CMP (8 << 23)
+# define R300_ALU_OUTC_FRC (9 << 23)
+# define R300_ALU_OUTC_REPL_ALPHA (10 << 23)
+
+# define R300_ALU_OUTC_MOD_NOP (0 << 27)
+# define R300_ALU_OUTC_MOD_MUL2 (1 << 27)
+# define R300_ALU_OUTC_MOD_MUL4 (2 << 27)
+# define R300_ALU_OUTC_MOD_MUL8 (3 << 27)
+# define R300_ALU_OUTC_MOD_DIV2 (4 << 27)
+# define R300_ALU_OUTC_MOD_DIV4 (5 << 27)
+# define R300_ALU_OUTC_MOD_DIV8 (6 << 27)
+
+# define R300_ALU_OUTC_CLAMP (1 << 30)
+# define R300_ALU_INSERT_NOP (1 << 31)
+
+#define R300_US_ALU_ALPHA_INST_0 0x49C0
+# define R300_ALU_ARGA_SRC0C_X 0
+# define R300_ALU_ARGA_SRC0C_Y 1
+# define R300_ALU_ARGA_SRC0C_Z 2
+# define R300_ALU_ARGA_SRC1C_X 3
+# define R300_ALU_ARGA_SRC1C_Y 4
+# define R300_ALU_ARGA_SRC1C_Z 5
+# define R300_ALU_ARGA_SRC2C_X 6
+# define R300_ALU_ARGA_SRC2C_Y 7
+# define R300_ALU_ARGA_SRC2C_Z 8
+# define R300_ALU_ARGA_SRC0A 9
+# define R300_ALU_ARGA_SRC1A 10
+# define R300_ALU_ARGA_SRC2A 11
+# define R300_ALU_ARGA_SRCP_X 12
+# define R300_ALU_ARGA_SRCP_Y 13
+# define R300_ALU_ARGA_SRCP_Z 14
+# define R300_ALU_ARGA_SRCP_W 15
+# define R300_ALU_ARGA_ZERO 16
+# define R300_ALU_ARGA_ONE 17
+# define R300_ALU_ARGA_HALF 18
+# define R300_ALPHA_SWIZA(x) ((x) << 0)
+# define R300_ALPHA_SWIZB(x) ((x) << 7)
+# define R300_ALPHA_SWIZC(x) ((x) << 14)
+
+# define R300_ALU_ARG0A_SHIFT 0
+# define R300_ALU_ARG0A_MASK (31 << 0)
+# define R300_ALU_ARG0A_NOP (0 << 5)
+# define R300_ALU_ARG0A_NEG (1 << 5)
+# define R300_ALU_ARG0A_ABS (2 << 5)
+# define R300_ALU_ARG0A_NAB (3 << 5)
+# define R300_ALU_ARG1A_SHIFT 7
+# define R300_ALU_ARG1A_MASK (31 << 7)
+# define R300_ALU_ARG1A_NOP (0 << 12)
+# define R300_ALU_ARG1A_NEG (1 << 12)
+# define R300_ALU_ARG1A_ABS (2 << 12)
+# define R300_ALU_ARG1A_NAB (3 << 12)
+# define R300_ALU_ARG2A_SHIFT 14
+# define R300_ALU_ARG2A_MASK (31 << 14)
+# define R300_ALU_ARG2A_NOP (0 << 19)
+# define R300_ALU_ARG2A_NEG (1 << 19)
+# define R300_ALU_ARG2A_ABS (2 << 19)
+# define R300_ALU_ARG2A_NAB (3 << 19)
+# define R300_ALU_SRCP_1_MINUS_2_SRC0 (0 << 21)
+# define R300_ALU_SRCP_SRC1_MINUS_SRC0 (1 << 21)
+# define R300_ALU_SRCP_SRC1_PLUS_SRC0 (2 << 21)
+# define R300_ALU_SRCP_1_MINUS_SRC0 (3 << 21)
+
+# define R300_ALU_OUTA_MAD (0 << 23)
+# define R300_ALU_OUTA_DP4 (1 << 23)
+# define R300_ALU_OUTA_MIN (2 << 23)
+# define R300_ALU_OUTA_MAX (3 << 23)
+# define R300_ALU_OUTA_CND (5 << 23)
+# define R300_ALU_OUTA_CMP (6 << 23)
+# define R300_ALU_OUTA_FRC (7 << 23)
+# define R300_ALU_OUTA_EX2 (8 << 23)
+# define R300_ALU_OUTA_LG2 (9 << 23)
+# define R300_ALU_OUTA_RCP (10 << 23)
+# define R300_ALU_OUTA_RSQ (11 << 23)
+
+# define R300_ALU_OUTA_MOD_NOP (0 << 27)
+# define R300_ALU_OUTA_MOD_MUL2 (1 << 27)
+# define R300_ALU_OUTA_MOD_MUL4 (2 << 27)
+# define R300_ALU_OUTA_MOD_MUL8 (3 << 27)
+# define R300_ALU_OUTA_MOD_DIV2 (4 << 27)
+# define R300_ALU_OUTA_MOD_DIV4 (5 << 27)
+# define R300_ALU_OUTA_MOD_DIV8 (6 << 27)
+
+# define R300_ALU_OUTA_CLAMP (1 << 30)
+/* END: Fragment program instruction set */
+
+/* R4xx extended fragment shader registers. */
+#define R400_US_ALU_EXT_ADDR_0 0x4ac0 /* up to 63 (0x4bbc) */
+# define R400_ADDR0_EXT_RGB_MSB_BIT 0x01
+# define R400_ADDR1_EXT_RGB_MSB_BIT 0x02
+# define R400_ADDR2_EXT_RGB_MSB_BIT 0x04
+# define R400_ADDRD_EXT_RGB_MSB_BIT 0x08
+# define R400_ADDR0_EXT_A_MSB_BIT 0x10
+# define R400_ADDR1_EXT_A_MSB_BIT 0x20
+# define R400_ADDR2_EXT_A_MSB_BIT 0x40
+# define R400_ADDRD_EXT_A_MSB_BIT 0x80
+#define R400_US_CODE_BANK 0x46b8
+# define R400_BANK_SHIFT 0
+# define R400_BANK_MASK 0xf
+# define R400_R390_MODE_ENABLE (1 << 4)
+#define R400_US_CODE_EXT 0x46bc
+# define R400_ALU_OFFSET_MSB_SHIFT 0
+# define R400_ALU_OFFSET_MSB_MASK (0x7 << 0)
+# define R400_ALU_SIZE_MSB_SHIFT 3
+# define R400_ALU_SIZE_MSB_MASK (0x7 << 3)
+# define R400_ALU_START0_MSB_SHIFT 6
+# define R400_ALU_START0_MSB_MASK (0x7 << 6)
+# define R400_ALU_SIZE0_MSB_SHIFT 9
+# define R400_ALU_SIZE0_MSB_MASK (0x7 << 9)
+# define R400_ALU_START1_MSB_SHIFT 12
+# define R400_ALU_START1_MSB_MASK (0x7 << 12)
+# define R400_ALU_SIZE1_MSB_SHIFT 15
+# define R400_ALU_SIZE1_MSB_MASK (0x7 << 15)
+# define R400_ALU_START2_MSB_SHIFT 18
+# define R400_ALU_START2_MSB_MASK (0x7 << 18)
+# define R400_ALU_SIZE2_MSB_SHIFT 21
+# define R400_ALU_SIZE2_MSB_MASK (0x7 << 21)
+# define R400_ALU_START3_MSB_SHIFT 24
+# define R400_ALU_START3_MSB_MASK (0x7 << 24)
+# define R400_ALU_SIZE3_MSB_SHIFT 27
+# define R400_ALU_SIZE3_MSB_MASK (0x7 << 27)
+/* END: R4xx extended fragment shader registers. */
+
+/* Fog: Fog Blending Enable */
+#define R300_FG_FOG_BLEND 0x4bc0
+# define R300_FG_FOG_BLEND_DISABLE (0 << 0)
+# define R300_FG_FOG_BLEND_ENABLE (1 << 0)
+# define R300_FG_FOG_BLEND_FN_LINEAR (0 << 1)
+# define R300_FG_FOG_BLEND_FN_EXP (1 << 1)
+# define R300_FG_FOG_BLEND_FN_EXP2 (2 << 1)
+# define R300_FG_FOG_BLEND_FN_CONSTANT (3 << 1)
+# define R300_FG_FOG_BLEND_FN_MASK (3 << 1)
+
+/* Fog: Red Component of Fog Color */
+#define R300_FG_FOG_COLOR_R 0x4bc8
+/* Fog: Green Component of Fog Color */
+#define R300_FG_FOG_COLOR_G 0x4bcc
+/* Fog: Blue Component of Fog Color */
+#define R300_FG_FOG_COLOR_B 0x4bd0
+# define R300_FG_FOG_COLOR_MASK 0x000003ff
+
+/* Fog: Constant Factor for Fog Blending */
+#define R300_FG_FOG_FACTOR 0x4bc4
+# define FG_FOG_FACTOR_MASK 0x000003ff
+
+/* Fog: Alpha function */
+#define R300_FG_ALPHA_FUNC 0x4bd4
+# define R300_FG_ALPHA_FUNC_VAL_MASK 0x000000ff
+# define R300_FG_ALPHA_FUNC_NEVER (0 << 8)
+# define R300_FG_ALPHA_FUNC_LESS (1 << 8)
+# define R300_FG_ALPHA_FUNC_EQUAL (2 << 8)
+# define R300_FG_ALPHA_FUNC_LE (3 << 8)
+# define R300_FG_ALPHA_FUNC_GREATER (4 << 8)
+# define R300_FG_ALPHA_FUNC_NOTEQUAL (5 << 8)
+# define R300_FG_ALPHA_FUNC_GE (6 << 8)
+# define R300_FG_ALPHA_FUNC_ALWAYS (7 << 8)
+# define R300_ALPHA_TEST_OP_MASK (7 << 8)
+# define R300_FG_ALPHA_FUNC_DISABLE (0 << 11)
+# define R300_FG_ALPHA_FUNC_ENABLE (1 << 11)
+
+# define R500_FG_ALPHA_FUNC_10BIT (0 << 12)
+# define R500_FG_ALPHA_FUNC_8BIT (1 << 12)
+
+# define R300_FG_ALPHA_FUNC_MASK_DISABLE (0 << 16)
+# define R300_FG_ALPHA_FUNC_MASK_ENABLE (1 << 16)
+# define R300_FG_ALPHA_FUNC_CFG_2_OF_4 (0 << 17)
+# define R300_FG_ALPHA_FUNC_CFG_3_OF_6 (1 << 17)
+
+# define R300_FG_ALPHA_FUNC_DITH_DISABLE (0 << 20)
+# define R300_FG_ALPHA_FUNC_DITH_ENABLE (1 << 20)
+
+# define R500_FG_ALPHA_FUNC_OFFSET_DISABLE (0 << 24)
+# define R500_FG_ALPHA_FUNC_OFFSET_ENABLE (1 << 24) /* Not supported in R520 */
+# define R500_FG_ALPHA_FUNC_DISC_ZERO_MASK_DISABLE (0 << 25)
+# define R500_FG_ALPHA_FUNC_DISC_ZERO_MASK_ENABLE (1 << 25)
+
+# define R500_FG_ALPHA_FUNC_FP16_DISABLE (0 << 28)
+# define R500_FG_ALPHA_FUNC_FP16_ENABLE (1 << 28)
+
+
+/* Fog: Where does the depth come from? */
+#define R300_FG_DEPTH_SRC 0x4bd8
+# define R300_FG_DEPTH_SRC_SCAN (0 << 0)
+# define R300_FG_DEPTH_SRC_SHADER (1 << 0)
+
+/* Fog: Alpha Compare Value */
+#define R500_FG_ALPHA_VALUE 0x4be0
+# define R500_FG_ALPHA_VALUE_MASK 0x0000ffff
+
+#define RV530_FG_ZBREG_DEST 0x4be8
+# define RV530_FG_ZBREG_DEST_PIPE_SELECT_0 (1 << 0)
+# define RV530_FG_ZBREG_DEST_PIPE_SELECT_1 (1 << 1)
+# define RV530_FG_ZBREG_DEST_PIPE_SELECT_ALL (3 << 0)
+/* gap */
+
+/* Fragment program parameters in 7.16 floating point */
+#define R300_PFS_PARAM_0_X 0x4C00
+#define R300_PFS_PARAM_0_Y 0x4C04
+#define R300_PFS_PARAM_0_Z 0x4C08
+#define R300_PFS_PARAM_0_W 0x4C0C
+/* last consts */
+#define R300_PFS_PARAM_31_X 0x4DF0
+#define R300_PFS_PARAM_31_Y 0x4DF4
+#define R300_PFS_PARAM_31_Z 0x4DF8
+#define R300_PFS_PARAM_31_W 0x4DFC
+
+/* Unpipelined. */
+#define R300_RB3D_CCTL 0x4e00
+# define R300_RB3D_CCTL_NUM_MULTIWRITES(x) (MAX2(((x)-1), 0) << 5)
+# define R300_RB3D_CCTL_NUM_MULTIWRITES_1_BUFFER (0 << 5)
+# define R300_RB3D_CCTL_NUM_MULTIWRITES_2_BUFFERS (1 << 5)
+# define R300_RB3D_CCTL_NUM_MULTIWRITES_3_BUFFERS (2 << 5)
+# define R300_RB3D_CCTL_NUM_MULTIWRITES_4_BUFFERS (3 << 5)
+# define R300_RB3D_CCTL_CLRCMP_FLIPE_DISABLE (0 << 7)
+# define R300_RB3D_CCTL_CLRCMP_FLIPE_ENABLE (1 << 7)
+# define R300_RB3D_CCTL_AA_COMPRESSION_DISABLE (0 << 9)
+# define R300_RB3D_CCTL_AA_COMPRESSION_ENABLE (1 << 9)
+# define R300_RB3D_CCTL_CMASK_DISABLE (0 << 10)
+# define R300_RB3D_CCTL_CMASK_ENABLE (1 << 10)
+/* reserved */
+# define R300_RB3D_CCTL_INDEPENDENT_COLOR_CHANNEL_MASK_DISABLE (0 << 12)
+# define R300_RB3D_CCTL_INDEPENDENT_COLOR_CHANNEL_MASK_ENABLE (1 << 12)
+# define R300_RB3D_CCTL_WRITE_COMPRESSION_ENABLE (0 << 13)
+# define R300_RB3D_CCTL_WRITE_COMPRESSION_DISABLE (1 << 13)
+# define R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_DISABLE (0 << 14)
+# define R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_ENABLE (1 << 14)
+
+
+/* Notes:
+ * - AFAIK fglrx always sets BLEND_UNKNOWN when blending is used in
+ * the application
+ * - AFAIK fglrx always sets BLEND_NO_SEPARATE when CBLEND and ABLEND
+ * are set to the same
+ * function (both registers are always set up completely in any case)
+ * - Most blend flags are simply copied from R200 and not tested yet
+ */
+#define R300_RB3D_CBLEND 0x4E04
+#define R300_RB3D_ABLEND 0x4E08
+/* the following only appear in CBLEND */
+# define R300_ALPHA_BLEND_ENABLE (1 << 0)
+# define R300_SEPARATE_ALPHA_ENABLE (1 << 1)
+# define R300_READ_ENABLE (1 << 2)
+# define R300_DISCARD_SRC_PIXELS_DIS (0 << 3)
+# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0 (1 << 3)
+# define R300_DISCARD_SRC_PIXELS_SRC_COLOR_0 (2 << 3)
+# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_0 (3 << 3)
+# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1 (4 << 3)
+# define R300_DISCARD_SRC_PIXELS_SRC_COLOR_1 (5 << 3)
+# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_1 (6 << 3)
+# define R500_SRC_ALPHA_0_NO_READ (1 << 30)
+# define R500_SRC_ALPHA_1_NO_READ (1 << 31)
+
+/* the following are shared between CBLEND and ABLEND */
+# define R300_FCN_MASK (3 << 12)
+# define R300_COMB_FCN_ADD_CLAMP (0 << 12)
+# define R300_COMB_FCN_ADD_NOCLAMP (1 << 12)
+# define R300_COMB_FCN_SUB_CLAMP (2 << 12)
+# define R300_COMB_FCN_SUB_NOCLAMP (3 << 12)
+# define R300_COMB_FCN_MIN (4 << 12)
+# define R300_COMB_FCN_MAX (5 << 12)
+# define R300_COMB_FCN_RSUB_CLAMP (6 << 12)
+# define R300_COMB_FCN_RSUB_NOCLAMP (7 << 12)
+# define R300_BLEND_GL_ZERO (32)
+# define R300_BLEND_GL_ONE (33)
+# define R300_BLEND_GL_SRC_COLOR (34)
+# define R300_BLEND_GL_ONE_MINUS_SRC_COLOR (35)
+# define R300_BLEND_GL_DST_COLOR (36)
+# define R300_BLEND_GL_ONE_MINUS_DST_COLOR (37)
+# define R300_BLEND_GL_SRC_ALPHA (38)
+# define R300_BLEND_GL_ONE_MINUS_SRC_ALPHA (39)
+# define R300_BLEND_GL_DST_ALPHA (40)
+# define R300_BLEND_GL_ONE_MINUS_DST_ALPHA (41)
+# define R300_BLEND_GL_SRC_ALPHA_SATURATE (42)
+# define R300_BLEND_GL_CONST_COLOR (43)
+# define R300_BLEND_GL_ONE_MINUS_CONST_COLOR (44)
+# define R300_BLEND_GL_CONST_ALPHA (45)
+# define R300_BLEND_GL_ONE_MINUS_CONST_ALPHA (46)
+# define R300_BLEND_MASK (63)
+# define R300_SRC_BLEND_SHIFT (16)
+# define R300_DST_BLEND_SHIFT (24)
+
+/* Constant color used by the blender. Pipelined through the blender.
+ * Note: For R520, this field is ignored, use RB3D_CONSTANT_COLOR_GB__BLUE,
+ * RB3D_CONSTANT_COLOR_GB__GREEN, etc. instead.
+ */
+#define R300_RB3D_BLEND_COLOR 0x4E10
+
+
+/* 3D Color Channel Mask. If all the channels used in the current color format
+ * are disabled, then the cb will discard all the incoming quads. Pipelined
+ * through the blender.
+ */
+#define RB3D_COLOR_CHANNEL_MASK 0x4E0C
+# define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 (1 << 0)
+# define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 (1 << 1)
+# define RB3D_COLOR_CHANNEL_MASK_RED_MASK0 (1 << 2)
+# define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 (1 << 3)
+# define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK1 (1 << 4)
+# define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK1 (1 << 5)
+# define RB3D_COLOR_CHANNEL_MASK_RED_MASK1 (1 << 6)
+# define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK1 (1 << 7)
+# define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK2 (1 << 8)
+# define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK2 (1 << 9)
+# define RB3D_COLOR_CHANNEL_MASK_RED_MASK2 (1 << 10)
+# define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK2 (1 << 11)
+# define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK3 (1 << 12)
+# define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK3 (1 << 13)
+# define RB3D_COLOR_CHANNEL_MASK_RED_MASK3 (1 << 14)
+# define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK3 (1 << 15)
+
+/* Clear color that is used when the color mask is set to 00. Unpipelined.
+ * Program this register with a 32-bit value in ARGB8888 or ARGB2101010
+ * formats, ignoring the fields.
+ */
+#define RB3D_COLOR_CLEAR_VALUE 0x4e14
+
+/* gap */
+
+/* Color Compare Color. Stalls the 2d/3d datapath until it is idle. */
+#define RB3D_CLRCMP_CLR 0x4e20
+
+/* Color Compare Mask. Stalls the 2d/3d datapath until it is idle. */
+#define RB3D_CLRCMP_MSK 0x4e24
+
+/* Color Buffer Address Offset of multibuffer 0. Unpipelined. */
+#define R300_RB3D_COLOROFFSET0 0x4E28
+# define R300_COLOROFFSET_MASK 0xFFFFFFE0
+/* Color Buffer Address Offset of multibuffer 1. Unpipelined. */
+#define R300_RB3D_COLOROFFSET1 0x4E2C
+/* Color Buffer Address Offset of multibuffer 2. Unpipelined. */
+#define R300_RB3D_COLOROFFSET2 0x4E30
+/* Color Buffer Address Offset of multibuffer 3. Unpipelined. */
+#define R300_RB3D_COLOROFFSET3 0x4E34
+
+/* Color buffer format and tiling control for all the multibuffers and the
+ * pitch of multibuffer 0 to 3. Unpipelined. The cache must be empty before any
+ * of the registers are changed.
+ *
+ * Bit 16: Larger tiles
+ * Bit 17: 4x2 tiles
+ * Bit 18: Extremely weird tile like, but some pixels duplicated?
+ */
+#define R300_RB3D_COLORPITCH0 0x4E38
+# define R300_COLORPITCH_MASK 0x00003FFE
+# define R300_COLOR_TILE_DISABLE (0 << 16)
+# define R300_COLOR_TILE_ENABLE (1 << 16)
+# define R300_COLOR_TILE(x) ((x) << 16)
+# define R300_COLOR_MICROTILE_DISABLE (0 << 17)
+# define R300_COLOR_MICROTILE_ENABLE (1 << 17)
+# define R300_COLOR_MICROTILE_ENABLE_SQUARE (2 << 17) /* Only available in 16-bit */
+# define R300_COLOR_MICROTILE(x) ((x) << 17)
+# define R300_COLOR_ENDIAN_NO_SWAP (0 << 19)
+# define R300_COLOR_ENDIAN_WORD_SWAP (1 << 19)
+# define R300_COLOR_ENDIAN_DWORD_SWAP (2 << 19)
+# define R300_COLOR_ENDIAN_HALF_DWORD_SWAP (3 << 19)
+# define R500_COLOR_FORMAT_ARGB10101010 (0 << 21)
+# define R500_COLOR_FORMAT_UV1010 (1 << 21)
+# define R500_COLOR_FORMAT_CI8 (2 << 21) /* 2D only */
+# define R300_COLOR_FORMAT_ARGB1555 (3 << 21)
+# define R300_COLOR_FORMAT_RGB565 (4 << 21)
+# define R500_COLOR_FORMAT_ARGB2101010 (5 << 21)
+# define R300_COLOR_FORMAT_ARGB8888 (6 << 21)
+# define R300_COLOR_FORMAT_ARGB32323232 (7 << 21)
+/* reserved */
+# define R300_COLOR_FORMAT_I8 (9 << 21)
+# define R300_COLOR_FORMAT_ARGB16161616 (10 << 21)
+# define R300_COLOR_FORMAT_VYUY (11 << 21)
+# define R300_COLOR_FORMAT_YVYU (12 << 21)
+# define R300_COLOR_FORMAT_UV88 (13 << 21)
+# define R500_COLOR_FORMAT_I10 (14 << 21)
+# define R300_COLOR_FORMAT_ARGB4444 (15 << 21)
+#define R300_RB3D_COLORPITCH1 0x4E3C
+#define R300_RB3D_COLORPITCH2 0x4E40
+#define R300_RB3D_COLORPITCH3 0x4E44
+
+/* gap */
+
+/* Destination Color Buffer Cache Control/Status. If the cb is in e2 mode, then
+ * a flush or free will not occur upon a write to this register, but a sync
+ * will be immediately sent if one is requested. If both DC_FLUSH and DC_FREE
+ * are zero but DC_FINISH is one, then a sync will be sent immediately -- the
+ * cb will not wait for all the previous operations to complete before sending
+ * the sync. Unpipelined except when DC_FINISH and DC_FREE are both set to
+ * zero.
+ *
+ * Set to 0A before 3D operations, set to 02 afterwards.
+ */
+#define R300_RB3D_DSTCACHE_CTLSTAT 0x4e4c
+# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_NO_EFFECT (0 << 0)
+# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_NO_EFFECT_1 (1 << 0)
+# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D (2 << 0)
+# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D_1 (3 << 0)
+# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_NO_EFFECT (0 << 2)
+# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_NO_EFFECT_1 (1 << 2)
+# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS (2 << 2)
+# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS_1 (3 << 2)
+# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_NO_SIGNAL (0 << 4)
+# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_SIGNAL (1 << 4)
+
+#define R300_RB3D_DITHER_CTL 0x4E50
+# define R300_RB3D_DITHER_CTL_DITHER_MODE_TRUNCATE (0 << 0)
+# define R300_RB3D_DITHER_CTL_DITHER_MODE_ROUND (1 << 0)
+# define R300_RB3D_DITHER_CTL_DITHER_MODE_LUT (2 << 0)
+/* reserved */
+# define R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_TRUNCATE (0 << 2)
+# define R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_ROUND (1 << 2)
+# define R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_LUT (2 << 2)
+/* reserved */
+
+/* Resolve buffer destination address. The cache must be empty before changing
+ * this register if the cb is in resolve mode. Unpipelined
+ */
+#define R300_RB3D_AARESOLVE_OFFSET 0x4e80
+# define R300_RB3D_AARESOLVE_OFFSET_SHIFT 5
+# define R300_RB3D_AARESOLVE_OFFSET_MASK 0xffffffe0 /* At least according to the calculations of Christoph Brill */
+
+/* Resolve Buffer Pitch and Tiling Control. The cache must be empty before
+ * changing this register if the cb is in resolve mode. Unpipelined
+ */
+#define R300_RB3D_AARESOLVE_PITCH 0x4e84
+# define R300_RB3D_AARESOLVE_PITCH_SHIFT 1
+# define R300_RB3D_AARESOLVE_PITCH_MASK 0x00003ffe /* At least according to the calculations of Christoph Brill */
+
+/* Resolve Buffer Control. Unpipelined */
+#define R300_RB3D_AARESOLVE_CTL 0x4e88
+# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_NORMAL (0 << 0)
+# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE (1 << 0)
+# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_GAMMA_10 (0 << 1)
+# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_GAMMA_22 (1 << 1)
+# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_SAMPLE0 (0 << 2)
+# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE (1 << 2)
+
+
+/* Discard src pixels less than or equal to threshold. */
+#define R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD 0x4ea0
+/* Discard src pixels greater than or equal to threshold. */
+#define R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD 0x4ea4
+# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_BLUE_SHIFT 0
+# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_BLUE_MASK 0x000000ff
+# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_GREEN_SHIFT 8
+# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_GREEN_MASK 0x0000ff00
+# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_RED_SHIFT 16
+# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_RED_MASK 0x00ff0000
+# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_ALPHA_SHIFT 24
+# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_ALPHA_MASK 0xff000000
+
+/* 3D ROP Control. Stalls the 2d/3d datapath until it is idle. */
+#define R300_RB3D_ROPCNTL 0x4e18
+# define R300_RB3D_ROPCNTL_ROP_ENABLE 0x00000004
+# define R300_RB3D_ROPCNTL_ROP_MASK (15 << 8)
+# define R300_RB3D_ROPCNTL_ROP_SHIFT 8
+
+/* Color Compare Flip. Stalls the 2d/3d datapath until it is idle. */
+#define R300_RB3D_CLRCMP_FLIPE 0x4e1c
+
+/* Sets the fifo sizes */
+#define R500_RB3D_FIFO_SIZE 0x4ef4
+# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_FULL (0 << 0)
+# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_HALF (1 << 0)
+# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_QUATER (2 << 0)
+# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_EIGTHS (3 << 0)
+
+/* Constant color used by the blender. Pipelined through the blender. */
+#define R500_RB3D_CONSTANT_COLOR_AR 0x4ef8
+# define R500_RB3D_CONSTANT_COLOR_AR_RED_MASK 0x0000ffff
+# define R500_RB3D_CONSTANT_COLOR_AR_RED_SHIFT 0
+# define R500_RB3D_CONSTANT_COLOR_AR_ALPHA_MASK 0xffff0000
+# define R500_RB3D_CONSTANT_COLOR_AR_ALPHA_SHIFT 16
+
+/* Constant color used by the blender. Pipelined through the blender. */
+#define R500_RB3D_CONSTANT_COLOR_GB 0x4efc
+# define R500_RB3D_CONSTANT_COLOR_AR_BLUE_MASK 0x0000ffff
+# define R500_RB3D_CONSTANT_COLOR_AR_BLUE_SHIFT 0
+# define R500_RB3D_CONSTANT_COLOR_AR_GREEN_MASK 0xffff0000
+# define R500_RB3D_CONSTANT_COLOR_AR_GREEN_SHIFT 16
+
+/* gap */
+/* There seems to be no "write only" setting, so use Z-test = ALWAYS
+ * for this.
+ * Bit (1<<8) is the "test" bit. so plain write is 6 - vd
+ */
+#define R300_ZB_CNTL 0x4F00
+# define R300_STENCIL_ENABLE (1 << 0)
+# define R300_Z_ENABLE (1 << 1)
+# define R300_Z_WRITE_ENABLE (1 << 2)
+# define R300_Z_SIGNED_COMPARE (1 << 3)
+# define R300_STENCIL_FRONT_BACK (1 << 4)
+# define R500_STENCIL_ZSIGNED_MAGNITUDE (1 << 5)
+# define R500_STENCIL_REFMASK_FRONT_BACK (1 << 6)
+
+#define R300_ZB_ZSTENCILCNTL 0x4f04
+ /* functions */
+# define R300_ZS_NEVER 0
+# define R300_ZS_LESS 1
+# define R300_ZS_LEQUAL 2
+# define R300_ZS_EQUAL 3
+# define R300_ZS_GEQUAL 4
+# define R300_ZS_GREATER 5
+# define R300_ZS_NOTEQUAL 6
+# define R300_ZS_ALWAYS 7
+# define R300_ZS_MASK 7
+ /* operations */
+# define R300_ZS_KEEP 0
+# define R300_ZS_ZERO 1
+# define R300_ZS_REPLACE 2
+# define R300_ZS_INCR 3
+# define R300_ZS_DECR 4
+# define R300_ZS_INVERT 5
+# define R300_ZS_INCR_WRAP 6
+# define R300_ZS_DECR_WRAP 7
+# define R300_Z_FUNC_SHIFT 0
+ /* front and back refer to operations done for front
+ and back faces, i.e. separate stencil function support */
+# define R300_S_FRONT_FUNC_SHIFT 3
+# define R300_S_FRONT_SFAIL_OP_SHIFT 6
+# define R300_S_FRONT_ZPASS_OP_SHIFT 9
+# define R300_S_FRONT_ZFAIL_OP_SHIFT 12
+# define R300_S_BACK_FUNC_SHIFT 15
+# define R300_S_BACK_SFAIL_OP_SHIFT 18
+# define R300_S_BACK_ZPASS_OP_SHIFT 21
+# define R300_S_BACK_ZFAIL_OP_SHIFT 24
+
+#define R300_ZB_STENCILREFMASK 0x4f08
+# define R300_STENCILREF_SHIFT 0
+# define R300_STENCILREF_MASK 0x000000ff
+# define R300_STENCILMASK_SHIFT 8
+# define R300_STENCILMASK_MASK 0x0000ff00
+# define R300_STENCILWRITEMASK_SHIFT 16
+# define R300_STENCILWRITEMASK_MASK 0x00ff0000
+
+/* gap */
+
+#define R300_ZB_FORMAT 0x4f10
+# define R300_DEPTHFORMAT_16BIT_INT_Z (0 << 0)
+# define R300_DEPTHFORMAT_16BIT_13E3 (1 << 0)
+# define R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL (2 << 0)
+/* reserved up to (15 << 0) */
+# define R300_INVERT_13E3_LEADING_ONES (0 << 4)
+# define R300_INVERT_13E3_LEADING_ZEROS (1 << 4)
+
+#define R300_ZB_ZTOP 0x4F14
+# define R300_ZTOP_DISABLE (0 << 0)
+# define R300_ZTOP_ENABLE (1 << 0)
+
+/* gap */
+
+#define R300_ZB_ZCACHE_CTLSTAT 0x4f18
+# define R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_NO_EFFECT (0 << 0)
+# define R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE (1 << 0)
+# define R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_NO_EFFECT (0 << 1)
+# define R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE (1 << 1)
+# define R300_ZB_ZCACHE_CTLSTAT_ZC_BUSY_IDLE (0 << 31)
+# define R300_ZB_ZCACHE_CTLSTAT_ZC_BUSY_BUSY (1 << 31)
+
+#define R300_ZB_BW_CNTL 0x4f1c
+# define R300_HIZ_DISABLE (0 << 0)
+# define R300_HIZ_ENABLE (1 << 0)
+# define R300_HIZ_MIN (0 << 1)
+# define R300_HIZ_MAX (1 << 1)
+# define R300_FAST_FILL_DISABLE (0 << 2)
+# define R300_FAST_FILL_ENABLE (1 << 2)
+# define R300_RD_COMP_DISABLE (0 << 3)
+# define R300_RD_COMP_ENABLE (1 << 3)
+# define R300_WR_COMP_DISABLE (0 << 4)
+# define R300_WR_COMP_ENABLE (1 << 4)
+# define R300_ZB_CB_CLEAR_RMW (0 << 5)
+# define R300_ZB_CB_CLEAR_CACHE_LINEAR (1 << 5)
+# define R300_FORCE_COMPRESSED_STENCIL_VALUE_DISABLE (0 << 6)
+# define R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE (1 << 6)
+
+# define R500_ZEQUAL_OPTIMIZE_ENABLE (0 << 7)
+# define R500_ZEQUAL_OPTIMIZE_DISABLE (1 << 7)
+# define R500_SEQUAL_OPTIMIZE_ENABLE (0 << 8)
+# define R500_SEQUAL_OPTIMIZE_DISABLE (1 << 8)
+
+# define R500_BMASK_ENABLE (0 << 10)
+# define R500_BMASK_DISABLE (1 << 10)
+# define R500_HIZ_EQUAL_REJECT_DISABLE (0 << 11)
+# define R500_HIZ_EQUAL_REJECT_ENABLE (1 << 11)
+# define R500_HIZ_FP_EXP_BITS_DISABLE (0 << 12)
+# define R500_HIZ_FP_EXP_BITS_1 (1 << 12)
+# define R500_HIZ_FP_EXP_BITS_2 (2 << 12)
+# define R500_HIZ_FP_EXP_BITS_3 (3 << 12)
+# define R500_HIZ_FP_EXP_BITS_4 (4 << 12)
+# define R500_HIZ_FP_EXP_BITS_5 (5 << 12)
+# define R500_HIZ_FP_INVERT_LEADING_ONES (0 << 15)
+# define R500_HIZ_FP_INVERT_LEADING_ZEROS (1 << 15)
+# define R500_TILE_OVERWRITE_RECOMPRESSION_ENABLE (0 << 16)
+# define R500_TILE_OVERWRITE_RECOMPRESSION_DISABLE (1 << 16)
+# define R500_CONTIGUOUS_6XAA_SAMPLES_ENABLE (0 << 17)
+# define R500_CONTIGUOUS_6XAA_SAMPLES_DISABLE (1 << 17)
+# define R500_PEQ_PACKING_DISABLE (0 << 18)
+# define R500_PEQ_PACKING_ENABLE (1 << 18)
+# define R500_COVERED_PTR_MASKING_DISABLE (0 << 18)
+# define R500_COVERED_PTR_MASKING_ENABLE (1 << 18)
+
+
+/* gap */
+
+/* Z Buffer Address Offset.
+ * Bits 31 to 5 are used for aligned Z buffer address offset for macro tiles.
+ */
+#define R300_ZB_DEPTHOFFSET 0x4f20
+
+/* Z Buffer Pitch and Endian Control */
+#define R300_ZB_DEPTHPITCH 0x4f24
+# define R300_DEPTHPITCH_MASK 0x00003FFC
+# define R300_DEPTHMACROTILE_DISABLE (0 << 16)
+# define R300_DEPTHMACROTILE_ENABLE (1 << 16)
+# define R300_DEPTHMACROTILE(x) ((x) << 16)
+# define R300_DEPTHMICROTILE_LINEAR (0 << 17)
+# define R300_DEPTHMICROTILE_TILED (1 << 17)
+# define R300_DEPTHMICROTILE_TILED_SQUARE (2 << 17)
+# define R300_DEPTHMICROTILE(x) ((x) << 17)
+# define R300_DEPTHENDIAN_NO_SWAP (0 << 18)
+# define R300_DEPTHENDIAN_WORD_SWAP (1 << 18)
+# define R300_DEPTHENDIAN_DWORD_SWAP (2 << 18)
+# define R300_DEPTHENDIAN_HALF_DWORD_SWAP (3 << 18)
+
+/* Z Buffer Clear Value */
+#define R300_ZB_DEPTHCLEARVALUE 0x4f28
+
+/* Hierarchical Z Memory Offset */
+#define R300_ZB_HIZ_OFFSET 0x4f44
+
+/* Hierarchical Z Write Index */
+#define R300_ZB_HIZ_WRINDEX 0x4f48
+
+/* Hierarchical Z Data */
+#define R300_ZB_HIZ_DWORD 0x4f4c
+
+/* Hierarchical Z Read Index */
+#define R300_ZB_HIZ_RDINDEX 0x4f50
+
+/* Hierarchical Z Pitch */
+#define R300_ZB_HIZ_PITCH 0x4f54
+
+/* Z Buffer Z Pass Counter Data */
+#define R300_ZB_ZPASS_DATA 0x4f58
+
+/* Z Buffer Z Pass Counter Address */
+#define R300_ZB_ZPASS_ADDR 0x4f5c
+
+/* Depth buffer X and Y coordinate offset */
+#define R300_ZB_DEPTHXY_OFFSET 0x4f60
+# define R300_DEPTHX_OFFSET_SHIFT 1
+# define R300_DEPTHX_OFFSET_MASK 0x000007FE
+# define R300_DEPTHY_OFFSET_SHIFT 17
+# define R300_DEPTHY_OFFSET_MASK 0x07FE0000
+
+/* Sets the fifo sizes */
+#define R500_ZB_FIFO_SIZE 0x4fd0
+# define R500_OP_FIFO_SIZE_FULL (0 << 0)
+# define R500_OP_FIFO_SIZE_HALF (1 << 0)
+# define R500_OP_FIFO_SIZE_QUATER (2 << 0)
+# define R500_OP_FIFO_SIZE_EIGTHS (4 << 0)
+
+/* Stencil Reference Value and Mask for backfacing quads */
+/* R300_ZB_STENCILREFMASK handles front face */
+#define R500_ZB_STENCILREFMASK_BF 0x4fd4
+# define R500_STENCILREF_SHIFT 0
+# define R500_STENCILREF_MASK 0x000000ff
+# define R500_STENCILMASK_SHIFT 8
+# define R500_STENCILMASK_MASK 0x0000ff00
+# define R500_STENCILWRITEMASK_SHIFT 16
+# define R500_STENCILWRITEMASK_MASK 0x00ff0000
+
+/**
+ * \defgroup R3XX_R5XX_PROGRAMMABLE_VERTEX_SHADER_DESCRIPTION R3XX-R5XX PROGRAMMABLE VERTEX SHADER DESCRIPTION
+ *
+ * The PVS_DST_MATH_INST is used to identify whether the instruction is a Vector
+ * Engine instruction or a Math Engine instruction.
+ */
+
+/*\{*/
+
+enum {
+ /* R3XX */
+ VECTOR_NO_OP = 0,
+ VE_DOT_PRODUCT = 1,
+ VE_MULTIPLY = 2,
+ VE_ADD = 3,
+ VE_MULTIPLY_ADD = 4,
+ VE_DISTANCE_VECTOR = 5,
+ VE_FRACTION = 6,
+ VE_MAXIMUM = 7,
+ VE_MINIMUM = 8,
+ VE_SET_GREATER_THAN_EQUAL = 9,
+ VE_SET_LESS_THAN = 10,
+ VE_MULTIPLYX2_ADD = 11,
+ VE_MULTIPLY_CLAMP = 12,
+ VE_FLT2FIX_DX = 13,
+ VE_FLT2FIX_DX_RND = 14,
+ /* R5XX */
+ VE_PRED_SET_EQ_PUSH = 15,
+ VE_PRED_SET_GT_PUSH = 16,
+ VE_PRED_SET_GTE_PUSH = 17,
+ VE_PRED_SET_NEQ_PUSH = 18,
+ VE_COND_WRITE_EQ = 19,
+ VE_COND_WRITE_GT = 20,
+ VE_COND_WRITE_GTE = 21,
+ VE_COND_WRITE_NEQ = 22,
+ VE_COND_MUX_EQ = 23,
+ VE_COND_MUX_GT = 24,
+ VE_COND_MUX_GTE = 25,
+ VE_SET_GREATER_THAN = 26,
+ VE_SET_EQUAL = 27,
+ VE_SET_NOT_EQUAL = 28
+};
+
+enum {
+ /* R3XX */
+ MATH_NO_OP = 0,
+ ME_EXP_BASE2_DX = 1,
+ ME_LOG_BASE2_DX = 2,
+ ME_EXP_BASEE_FF = 3,
+ ME_LIGHT_COEFF_DX = 4,
+ ME_POWER_FUNC_FF = 5,
+ ME_RECIP_DX = 6,
+ ME_RECIP_FF = 7,
+ ME_RECIP_SQRT_DX = 8,
+ ME_RECIP_SQRT_FF = 9,
+ ME_MULTIPLY = 10,
+ ME_EXP_BASE2_FULL_DX = 11,
+ ME_LOG_BASE2_FULL_DX = 12,
+ ME_POWER_FUNC_FF_CLAMP_B = 13,
+ ME_POWER_FUNC_FF_CLAMP_B1 = 14,
+ ME_POWER_FUNC_FF_CLAMP_01 = 15,
+ ME_SIN = 16,
+ ME_COS = 17,
+ /* R5XX */
+ ME_LOG_BASE2_IEEE = 18,
+ ME_RECIP_IEEE = 19,
+ ME_RECIP_SQRT_IEEE = 20,
+ ME_PRED_SET_EQ = 21,
+ ME_PRED_SET_GT = 22,
+ ME_PRED_SET_GTE = 23,
+ ME_PRED_SET_NEQ = 24,
+ ME_PRED_SET_CLR = 25,
+ ME_PRED_SET_INV = 26,
+ ME_PRED_SET_POP = 27,
+ ME_PRED_SET_RESTORE = 28
+};
+
+enum {
+ /* R3XX */
+ PVS_MACRO_OP_2CLK_MADD = 0,
+ PVS_MACRO_OP_2CLK_M2X_ADD = 1
+};
+
+enum {
+ PVS_SRC_REG_TEMPORARY = 0, /* Intermediate Storage */
+ PVS_SRC_REG_INPUT = 1, /* Input Vertex Storage */
+ PVS_SRC_REG_CONSTANT = 2, /* Constant State Storage */
+ PVS_SRC_REG_ALT_TEMPORARY = 3 /* Alternate Intermediate Storage */
+};
+
+enum {
+ PVS_DST_REG_TEMPORARY = 0, /* Intermediate Storage */
+ PVS_DST_REG_A0 = 1, /* Address Register Storage */
+ PVS_DST_REG_OUT = 2, /* Output Memory. Used for all outputs */
+ PVS_DST_REG_OUT_REPL_X = 3, /* Output Memory & Replicate X to all channels */
+ PVS_DST_REG_ALT_TEMPORARY = 4, /* Alternate Intermediate Storage */
+ PVS_DST_REG_INPUT = 5 /* Output Memory & Replicate X to all channels */
+};
+
+enum {
+ PVS_SRC_SELECT_X = 0, /* Select X Component */
+ PVS_SRC_SELECT_Y = 1, /* Select Y Component */
+ PVS_SRC_SELECT_Z = 2, /* Select Z Component */
+ PVS_SRC_SELECT_W = 3, /* Select W Component */
+ PVS_SRC_SELECT_FORCE_0 = 4, /* Force Component to 0.0 */
+ PVS_SRC_SELECT_FORCE_1 = 5 /* Force Component to 1.0 */
+};
+
+/* PVS Opcode & Destination Operand Description */
+
+enum {
+ PVS_DST_OPCODE_MASK = 0x3f,
+ PVS_DST_OPCODE_SHIFT = 0,
+ PVS_DST_MATH_INST_MASK = 0x1,
+ PVS_DST_MATH_INST_SHIFT = 6,
+ PVS_DST_MACRO_INST_MASK = 0x1,
+ PVS_DST_MACRO_INST_SHIFT = 7,
+ PVS_DST_REG_TYPE_MASK = 0xf,
+ PVS_DST_REG_TYPE_SHIFT = 8,
+ PVS_DST_ADDR_MODE_1_MASK = 0x1,
+ PVS_DST_ADDR_MODE_1_SHIFT = 12,
+ PVS_DST_OFFSET_MASK = 0x7f,
+ PVS_DST_OFFSET_SHIFT = 13,
+ PVS_DST_WE_X_MASK = 0x1,
+ PVS_DST_WE_X_SHIFT = 20,
+ PVS_DST_WE_Y_MASK = 0x1,
+ PVS_DST_WE_Y_SHIFT = 21,
+ PVS_DST_WE_Z_MASK = 0x1,
+ PVS_DST_WE_Z_SHIFT = 22,
+ PVS_DST_WE_W_MASK = 0x1,
+ PVS_DST_WE_W_SHIFT = 23,
+ PVS_DST_VE_SAT_MASK = 0x1,
+ PVS_DST_VE_SAT_SHIFT = 24,
+ PVS_DST_ME_SAT_MASK = 0x1,
+ PVS_DST_ME_SAT_SHIFT = 25,
+ PVS_DST_PRED_ENABLE_MASK = 0x1,
+ PVS_DST_PRED_ENABLE_SHIFT = 26,
+ PVS_DST_PRED_SENSE_MASK = 0x1,
+ PVS_DST_PRED_SENSE_SHIFT = 27,
+ PVS_DST_DUAL_MATH_OP_MASK = 0x3,
+ PVS_DST_DUAL_MATH_OP_SHIFT = 27,
+ PVS_DST_ADDR_SEL_MASK = 0x3,
+ PVS_DST_ADDR_SEL_SHIFT = 29,
+ PVS_DST_ADDR_MODE_0_MASK = 0x1,
+ PVS_DST_ADDR_MODE_0_SHIFT = 31
+};
+
+/* PVS Source Operand Description */
+
+enum {
+ PVS_SRC_REG_TYPE_MASK = 0x3,
+ PVS_SRC_REG_TYPE_SHIFT = 0,
+ SPARE_0_MASK = 0x1,
+ SPARE_0_SHIFT = 2,
+ PVS_SRC_ABS_XYZW_MASK = 0x1,
+ PVS_SRC_ABS_XYZW_SHIFT = 3,
+ PVS_SRC_ADDR_MODE_0_MASK = 0x1,
+ PVS_SRC_ADDR_MODE_0_SHIFT = 4,
+ PVS_SRC_OFFSET_MASK = 0xff,
+ PVS_SRC_OFFSET_SHIFT = 5,
+ PVS_SRC_SWIZZLE_X_MASK = 0x7,
+ PVS_SRC_SWIZZLE_X_SHIFT = 13,
+ PVS_SRC_SWIZZLE_Y_MASK = 0x7,
+ PVS_SRC_SWIZZLE_Y_SHIFT = 16,
+ PVS_SRC_SWIZZLE_Z_MASK = 0x7,
+ PVS_SRC_SWIZZLE_Z_SHIFT = 19,
+ PVS_SRC_SWIZZLE_W_MASK = 0x7,
+ PVS_SRC_SWIZZLE_W_SHIFT = 22,
+ PVS_SRC_MODIFIER_X_MASK = 0x1,
+ PVS_SRC_MODIFIER_X_SHIFT = 25,
+ PVS_SRC_MODIFIER_Y_MASK = 0x1,
+ PVS_SRC_MODIFIER_Y_SHIFT = 26,
+ PVS_SRC_MODIFIER_Z_MASK = 0x1,
+ PVS_SRC_MODIFIER_Z_SHIFT = 27,
+ PVS_SRC_MODIFIER_W_MASK = 0x1,
+ PVS_SRC_MODIFIER_W_SHIFT = 28,
+ PVS_SRC_ADDR_SEL_MASK = 0x3,
+ PVS_SRC_ADDR_SEL_SHIFT = 29,
+ PVS_SRC_ADDR_MODE_1_MASK = 0x0,
+ PVS_SRC_ADDR_MODE_1_SHIFT = 32
+};
+
+/*\}*/
+
+/* BEGIN: Packet 3 commands */
+
+/* A primitive emission dword. */
+#define R300_PRIM_TYPE_NONE (0 << 0)
+#define R300_PRIM_TYPE_POINT (1 << 0)
+#define R300_PRIM_TYPE_LINE (2 << 0)
+#define R300_PRIM_TYPE_LINE_STRIP (3 << 0)
+#define R300_PRIM_TYPE_TRI_LIST (4 << 0)
+#define R300_PRIM_TYPE_TRI_FAN (5 << 0)
+#define R300_PRIM_TYPE_TRI_STRIP (6 << 0)
+#define R300_PRIM_TYPE_TRI_TYPE2 (7 << 0)
+#define R300_PRIM_TYPE_RECT_LIST (8 << 0)
+#define R300_PRIM_TYPE_3VRT_POINT_LIST (9 << 0)
+#define R300_PRIM_TYPE_3VRT_LINE_LIST (10 << 0)
+ /* GUESS (based on r200) */
+#define R300_PRIM_TYPE_POINT_SPRITES (11 << 0)
+#define R300_PRIM_TYPE_LINE_LOOP (12 << 0)
+#define R300_PRIM_TYPE_QUADS (13 << 0)
+#define R300_PRIM_TYPE_QUAD_STRIP (14 << 0)
+#define R300_PRIM_TYPE_POLYGON (15 << 0)
+#define R300_PRIM_TYPE_MASK 0xF
+#define R300_PRIM_WALK_IND (1 << 4)
+#define R300_PRIM_WALK_LIST (2 << 4)
+#define R300_PRIM_WALK_RING (3 << 4)
+#define R300_PRIM_WALK_MASK (3 << 4)
+ /* GUESS (based on r200) */
+#define R300_PRIM_COLOR_ORDER_BGRA (0 << 6)
+#define R300_PRIM_COLOR_ORDER_RGBA (1 << 6)
+#define R300_PRIM_NUM_VERTICES_SHIFT 16
+#define R300_PRIM_NUM_VERTICES_MASK 0xffff
+
+
+
+/*
+ * The R500 unified shader (US) registers come in banks of 512 each, one
+ * for each instruction slot in the shader. You can't touch them directly.
+ * R500_US_VECTOR_INDEX() sets the base instruction to modify; successive
+ * writes to R500_GA_US_VECTOR_DATA autoincrement the index after the
+ * instruction is fully specified.
+ */
+#define R500_US_ALU_ALPHA_INST_0 0xa800
+# define R500_ALPHA_OP_MAD 0
+# define R500_ALPHA_OP_DP 1
+# define R500_ALPHA_OP_MIN 2
+# define R500_ALPHA_OP_MAX 3
+/* #define R500_ALPHA_OP_RESERVED 4 */
+# define R500_ALPHA_OP_CND 5
+# define R500_ALPHA_OP_CMP 6
+# define R500_ALPHA_OP_FRC 7
+# define R500_ALPHA_OP_EX2 8
+# define R500_ALPHA_OP_LN2 9
+# define R500_ALPHA_OP_RCP 10
+# define R500_ALPHA_OP_RSQ 11
+# define R500_ALPHA_OP_SIN 12
+# define R500_ALPHA_OP_COS 13
+# define R500_ALPHA_OP_MDH 14
+# define R500_ALPHA_OP_MDV 15
+# define R500_ALPHA_ADDRD(x) ((x) << 4)
+# define R500_ALPHA_ADDRD_REL (1 << 11)
+# define R500_ALPHA_SEL_A_SHIFT 12
+# define R500_ALPHA_SEL_A_SRC0 (0 << 12)
+# define R500_ALPHA_SEL_A_SRC1 (1 << 12)
+# define R500_ALPHA_SEL_A_SRC2 (2 << 12)
+# define R500_ALPHA_SEL_A_SRCP (3 << 12)
+# define R500_ALPHA_SWIZ_A_R (0 << 14)
+# define R500_ALPHA_SWIZ_A_G (1 << 14)
+# define R500_ALPHA_SWIZ_A_B (2 << 14)
+# define R500_ALPHA_SWIZ_A_A (3 << 14)
+# define R500_ALPHA_SWIZ_A_0 (4 << 14)
+# define R500_ALPHA_SWIZ_A_HALF (5 << 14)
+# define R500_ALPHA_SWIZ_A_1 (6 << 14)
+/* #define R500_ALPHA_SWIZ_A_UNUSED (7 << 14) */
+# define R500_ALPHA_MOD_A_NOP (0 << 17)
+# define R500_ALPHA_MOD_A_NEG (1 << 17)
+# define R500_ALPHA_MOD_A_ABS (2 << 17)
+# define R500_ALPHA_MOD_A_NAB (3 << 17)
+# define R500_ALPHA_SEL_B_SHIFT 19
+# define R500_ALPHA_SEL_B_SRC0 (0 << 19)
+# define R500_ALPHA_SEL_B_SRC1 (1 << 19)
+# define R500_ALPHA_SEL_B_SRC2 (2 << 19)
+# define R500_ALPHA_SEL_B_SRCP (3 << 19)
+# define R500_ALPHA_SWIZ_B_R (0 << 21)
+# define R500_ALPHA_SWIZ_B_G (1 << 21)
+# define R500_ALPHA_SWIZ_B_B (2 << 21)
+# define R500_ALPHA_SWIZ_B_A (3 << 21)
+# define R500_ALPHA_SWIZ_B_0 (4 << 21)
+# define R500_ALPHA_SWIZ_B_HALF (5 << 21)
+# define R500_ALPHA_SWIZ_B_1 (6 << 21)
+/* #define R500_ALPHA_SWIZ_B_UNUSED (7 << 21) */
+# define R500_ALPHA_MOD_B_NOP (0 << 24)
+# define R500_ALPHA_MOD_B_NEG (1 << 24)
+# define R500_ALPHA_MOD_B_ABS (2 << 24)
+# define R500_ALPHA_MOD_B_NAB (3 << 24)
+# define R500_ALPHA_OMOD_IDENTITY (0 << 26)
+# define R500_ALPHA_OMOD_MUL_2 (1 << 26)
+# define R500_ALPHA_OMOD_MUL_4 (2 << 26)
+# define R500_ALPHA_OMOD_MUL_8 (3 << 26)
+# define R500_ALPHA_OMOD_DIV_2 (4 << 26)
+# define R500_ALPHA_OMOD_DIV_4 (5 << 26)
+# define R500_ALPHA_OMOD_DIV_8 (6 << 26)
+# define R500_ALPHA_OMOD_DISABLE (7 << 26)
+# define R500_ALPHA_TARGET(x) ((x) << 29)
+# define R500_ALPHA_W_OMASK (1 << 31)
+#define R500_US_ALU_ALPHA_ADDR_0 0x9800
+# define R500_ALPHA_ADDR0(x) ((x) << 0)
+# define R500_ALPHA_ADDR0_CONST (1 << 8)
+# define R500_ALPHA_ADDR0_REL (1 << 9)
+# define R500_ALPHA_ADDR1(x) ((x) << 10)
+# define R500_ALPHA_ADDR1_CONST (1 << 18)
+# define R500_ALPHA_ADDR1_REL (1 << 19)
+# define R500_ALPHA_ADDR2(x) ((x) << 20)
+# define R500_ALPHA_ADDR2_CONST (1 << 28)
+# define R500_ALPHA_ADDR2_REL (1 << 29)
+# define R500_ALPHA_SRCP_OP_1_MINUS_2A0 (0 << 30)
+# define R500_ALPHA_SRCP_OP_A1_MINUS_A0 (1 << 30)
+# define R500_ALPHA_SRCP_OP_A1_PLUS_A0 (2 << 30)
+# define R500_ALPHA_SRCP_OP_1_MINUS_A0 (3 << 30)
+#define R500_US_ALU_RGBA_INST_0 0xb000
+# define R500_ALU_RGBA_OP_MAD (0 << 0)
+# define R500_ALU_RGBA_OP_DP3 (1 << 0)
+# define R500_ALU_RGBA_OP_DP4 (2 << 0)
+# define R500_ALU_RGBA_OP_D2A (3 << 0)
+# define R500_ALU_RGBA_OP_MIN (4 << 0)
+# define R500_ALU_RGBA_OP_MAX (5 << 0)
+/* #define R500_ALU_RGBA_OP_RESERVED (6 << 0) */
+# define R500_ALU_RGBA_OP_CND (7 << 0)
+# define R500_ALU_RGBA_OP_CMP (8 << 0)
+# define R500_ALU_RGBA_OP_FRC (9 << 0)
+# define R500_ALU_RGBA_OP_SOP (10 << 0)
+# define R500_ALU_RGBA_OP_MDH (11 << 0)
+# define R500_ALU_RGBA_OP_MDV (12 << 0)
+# define R500_ALU_RGBA_ADDRD(x) ((x) << 4)
+# define R500_ALU_RGBA_ADDRD_REL (1 << 11)
+# define R500_ALU_RGBA_SEL_C_SHIFT 12
+# define R500_ALU_RGBA_SEL_C_SRC0 (0 << 12)
+# define R500_ALU_RGBA_SEL_C_SRC1 (1 << 12)
+# define R500_ALU_RGBA_SEL_C_SRC2 (2 << 12)
+# define R500_ALU_RGBA_SEL_C_SRCP (3 << 12)
+# define R500_ALU_RGBA_R_SWIZ_R (0 << 14)
+# define R500_ALU_RGBA_R_SWIZ_G (1 << 14)
+# define R500_ALU_RGBA_R_SWIZ_B (2 << 14)
+# define R500_ALU_RGBA_R_SWIZ_A (3 << 14)
+# define R500_ALU_RGBA_R_SWIZ_0 (4 << 14)
+# define R500_ALU_RGBA_R_SWIZ_HALF (5 << 14)
+# define R500_ALU_RGBA_R_SWIZ_1 (6 << 14)
+/* #define R500_ALU_RGBA_R_SWIZ_UNUSED (7 << 14) */
+# define R500_ALU_RGBA_G_SWIZ_R (0 << 17)
+# define R500_ALU_RGBA_G_SWIZ_G (1 << 17)
+# define R500_ALU_RGBA_G_SWIZ_B (2 << 17)
+# define R500_ALU_RGBA_G_SWIZ_A (3 << 17)
+# define R500_ALU_RGBA_G_SWIZ_0 (4 << 17)
+# define R500_ALU_RGBA_G_SWIZ_HALF (5 << 17)
+# define R500_ALU_RGBA_G_SWIZ_1 (6 << 17)
+/* #define R500_ALU_RGBA_G_SWIZ_UNUSED (7 << 17) */
+# define R500_ALU_RGBA_B_SWIZ_R (0 << 20)
+# define R500_ALU_RGBA_B_SWIZ_G (1 << 20)
+# define R500_ALU_RGBA_B_SWIZ_B (2 << 20)
+# define R500_ALU_RGBA_B_SWIZ_A (3 << 20)
+# define R500_ALU_RGBA_B_SWIZ_0 (4 << 20)
+# define R500_ALU_RGBA_B_SWIZ_HALF (5 << 20)
+# define R500_ALU_RGBA_B_SWIZ_1 (6 << 20)
+/* #define R500_ALU_RGBA_B_SWIZ_UNUSED (7 << 20) */
+# define R500_ALU_RGBA_MOD_C_NOP (0 << 23)
+# define R500_ALU_RGBA_MOD_C_NEG (1 << 23)
+# define R500_ALU_RGBA_MOD_C_ABS (2 << 23)
+# define R500_ALU_RGBA_MOD_C_NAB (3 << 23)
+# define R500_ALU_RGBA_ALPHA_SEL_C_SHIFT 25
+# define R500_ALU_RGBA_ALPHA_SEL_C_SRC0 (0 << 25)
+# define R500_ALU_RGBA_ALPHA_SEL_C_SRC1 (1 << 25)
+# define R500_ALU_RGBA_ALPHA_SEL_C_SRC2 (2 << 25)
+# define R500_ALU_RGBA_ALPHA_SEL_C_SRCP (3 << 25)
+# define R500_ALU_RGBA_A_SWIZ_R (0 << 27)
+# define R500_ALU_RGBA_A_SWIZ_G (1 << 27)
+# define R500_ALU_RGBA_A_SWIZ_B (2 << 27)
+# define R500_ALU_RGBA_A_SWIZ_A (3 << 27)
+# define R500_ALU_RGBA_A_SWIZ_0 (4 << 27)
+# define R500_ALU_RGBA_A_SWIZ_HALF (5 << 27)
+# define R500_ALU_RGBA_A_SWIZ_1 (6 << 27)
+/* #define R500_ALU_RGBA_A_SWIZ_UNUSED (7 << 27) */
+# define R500_ALU_RGBA_ALPHA_MOD_C_NOP (0 << 30)
+# define R500_ALU_RGBA_ALPHA_MOD_C_NEG (1 << 30)
+# define R500_ALU_RGBA_ALPHA_MOD_C_ABS (2 << 30)
+# define R500_ALU_RGBA_ALPHA_MOD_C_NAB (3 << 30)
+#define R500_US_ALU_RGB_INST_0 0xa000
+# define R500_ALU_RGB_SEL_A_SHIFT 0
+# define R500_ALU_RGB_SEL_A_SRC0 (0 << 0)
+# define R500_ALU_RGB_SEL_A_SRC1 (1 << 0)
+# define R500_ALU_RGB_SEL_A_SRC2 (2 << 0)
+# define R500_ALU_RGB_SEL_A_SRCP (3 << 0)
+# define R500_ALU_RGB_R_SWIZ_A_R (0 << 2)
+# define R500_ALU_RGB_R_SWIZ_A_G (1 << 2)
+# define R500_ALU_RGB_R_SWIZ_A_B (2 << 2)
+# define R500_ALU_RGB_R_SWIZ_A_A (3 << 2)
+# define R500_ALU_RGB_R_SWIZ_A_0 (4 << 2)
+# define R500_ALU_RGB_R_SWIZ_A_HALF (5 << 2)
+# define R500_ALU_RGB_R_SWIZ_A_1 (6 << 2)
+/* #define R500_ALU_RGB_R_SWIZ_A_UNUSED (7 << 2) */
+# define R500_ALU_RGB_G_SWIZ_A_R (0 << 5)
+# define R500_ALU_RGB_G_SWIZ_A_G (1 << 5)
+# define R500_ALU_RGB_G_SWIZ_A_B (2 << 5)
+# define R500_ALU_RGB_G_SWIZ_A_A (3 << 5)
+# define R500_ALU_RGB_G_SWIZ_A_0 (4 << 5)
+# define R500_ALU_RGB_G_SWIZ_A_HALF (5 << 5)
+# define R500_ALU_RGB_G_SWIZ_A_1 (6 << 5)
+/* #define R500_ALU_RGB_G_SWIZ_A_UNUSED (7 << 5) */
+# define R500_ALU_RGB_B_SWIZ_A_R (0 << 8)
+# define R500_ALU_RGB_B_SWIZ_A_G (1 << 8)
+# define R500_ALU_RGB_B_SWIZ_A_B (2 << 8)
+# define R500_ALU_RGB_B_SWIZ_A_A (3 << 8)
+# define R500_ALU_RGB_B_SWIZ_A_0 (4 << 8)
+# define R500_ALU_RGB_B_SWIZ_A_HALF (5 << 8)
+# define R500_ALU_RGB_B_SWIZ_A_1 (6 << 8)
+/* #define R500_ALU_RGB_B_SWIZ_A_UNUSED (7 << 8) */
+# define R500_ALU_RGB_MOD_A_NOP (0 << 11)
+# define R500_ALU_RGB_MOD_A_NEG (1 << 11)
+# define R500_ALU_RGB_MOD_A_ABS (2 << 11)
+# define R500_ALU_RGB_MOD_A_NAB (3 << 11)
+# define R500_ALU_RGB_SEL_B_SHIFT 13
+# define R500_ALU_RGB_SEL_B_SRC0 (0 << 13)
+# define R500_ALU_RGB_SEL_B_SRC1 (1 << 13)
+# define R500_ALU_RGB_SEL_B_SRC2 (2 << 13)
+# define R500_ALU_RGB_SEL_B_SRCP (3 << 13)
+# define R500_ALU_RGB_R_SWIZ_B_R (0 << 15)
+# define R500_ALU_RGB_R_SWIZ_B_G (1 << 15)
+# define R500_ALU_RGB_R_SWIZ_B_B (2 << 15)
+# define R500_ALU_RGB_R_SWIZ_B_A (3 << 15)
+# define R500_ALU_RGB_R_SWIZ_B_0 (4 << 15)
+# define R500_ALU_RGB_R_SWIZ_B_HALF (5 << 15)
+# define R500_ALU_RGB_R_SWIZ_B_1 (6 << 15)
+/* #define R500_ALU_RGB_R_SWIZ_B_UNUSED (7 << 15) */
+# define R500_ALU_RGB_G_SWIZ_B_R (0 << 18)
+# define R500_ALU_RGB_G_SWIZ_B_G (1 << 18)
+# define R500_ALU_RGB_G_SWIZ_B_B (2 << 18)
+# define R500_ALU_RGB_G_SWIZ_B_A (3 << 18)
+# define R500_ALU_RGB_G_SWIZ_B_0 (4 << 18)
+# define R500_ALU_RGB_G_SWIZ_B_HALF (5 << 18)
+# define R500_ALU_RGB_G_SWIZ_B_1 (6 << 18)
+/* #define R500_ALU_RGB_G_SWIZ_B_UNUSED (7 << 18) */
+# define R500_ALU_RGB_B_SWIZ_B_R (0 << 21)
+# define R500_ALU_RGB_B_SWIZ_B_G (1 << 21)
+# define R500_ALU_RGB_B_SWIZ_B_B (2 << 21)
+# define R500_ALU_RGB_B_SWIZ_B_A (3 << 21)
+# define R500_ALU_RGB_B_SWIZ_B_0 (4 << 21)
+# define R500_ALU_RGB_B_SWIZ_B_HALF (5 << 21)
+# define R500_ALU_RGB_B_SWIZ_B_1 (6 << 21)
+/* #define R500_ALU_RGB_B_SWIZ_B_UNUSED (7 << 21) */
+# define R500_ALU_RGB_MOD_B_NOP (0 << 24)
+# define R500_ALU_RGB_MOD_B_NEG (1 << 24)
+# define R500_ALU_RGB_MOD_B_ABS (2 << 24)
+# define R500_ALU_RGB_MOD_B_NAB (3 << 24)
+# define R500_ALU_RGB_OMOD_IDENTITY (0 << 26)
+# define R500_ALU_RGB_OMOD_MUL_2 (1 << 26)
+# define R500_ALU_RGB_OMOD_MUL_4 (2 << 26)
+# define R500_ALU_RGB_OMOD_MUL_8 (3 << 26)
+# define R500_ALU_RGB_OMOD_DIV_2 (4 << 26)
+# define R500_ALU_RGB_OMOD_DIV_4 (5 << 26)
+# define R500_ALU_RGB_OMOD_DIV_8 (6 << 26)
+# define R500_ALU_RGB_OMOD_DISABLE (7 << 26)
+# define R500_ALU_RGB_TARGET(x) ((x) << 29)
+# define R500_ALU_RGB_WMASK (1 << 31)
+#define R500_US_ALU_RGB_ADDR_0 0x9000
+# define R500_RGB_ADDR0(x) ((x) << 0)
+# define R500_RGB_ADDR0_CONST (1 << 8)
+# define R500_RGB_ADDR0_REL (1 << 9)
+# define R500_RGB_ADDR1(x) ((x) << 10)
+# define R500_RGB_ADDR1_CONST (1 << 18)
+# define R500_RGB_ADDR1_REL (1 << 19)
+# define R500_RGB_ADDR2(x) ((x) << 20)
+# define R500_RGB_ADDR2_CONST (1 << 28)
+# define R500_RGB_ADDR2_REL (1 << 29)
+# define R500_RGB_SRCP_OP_1_MINUS_2RGB0 (0 << 30)
+# define R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 (1 << 30)
+# define R500_RGB_SRCP_OP_RGB1_PLUS_RGB0 (2 << 30)
+# define R500_RGB_SRCP_OP_1_MINUS_RGB0 (3 << 30)
+#define R500_US_CMN_INST_0 0xb800
+# define R500_INST_TYPE_MASK (3 << 0)
+# define R500_INST_TYPE_ALU (0 << 0)
+# define R500_INST_TYPE_OUT (1 << 0)
+# define R500_INST_TYPE_FC (2 << 0)
+# define R500_INST_TYPE_TEX (3 << 0)
+# define R500_INST_TEX_SEM_WAIT (1 << 2)
+# define R500_INST_RGB_PRED_SEL_NONE (0 << 3)
+# define R500_INST_RGB_PRED_SEL_RGBA (1 << 3)
+# define R500_INST_RGB_PRED_SEL_RRRR (2 << 3)
+# define R500_INST_RGB_PRED_SEL_GGGG (3 << 3)
+# define R500_INST_RGB_PRED_SEL_BBBB (4 << 3)
+# define R500_INST_RGB_PRED_SEL_AAAA (5 << 3)
+# define R500_INST_RGB_PRED_INV (1 << 6)
+# define R500_INST_WRITE_INACTIVE (1 << 7)
+# define R500_INST_LAST (1 << 8)
+# define R500_INST_NOP (1 << 9)
+# define R500_INST_ALU_WAIT (1 << 10)
+# define R500_INST_RGB_WMASK_R (1 << 11)
+# define R500_INST_RGB_WMASK_G (1 << 12)
+# define R500_INST_RGB_WMASK_B (1 << 13)
+# define R500_INST_RGB_WMASK_RGB (7 << 11)
+# define R500_INST_ALPHA_WMASK (1 << 14)
+# define R500_INST_RGB_OMASK_R (1 << 15)
+# define R500_INST_RGB_OMASK_G (1 << 16)
+# define R500_INST_RGB_OMASK_B (1 << 17)
+# define R500_INST_RGB_OMASK_RGB (7 << 15)
+# define R500_INST_ALPHA_OMASK (1 << 18)
+# define R500_INST_RGB_CLAMP (1 << 19)
+# define R500_INST_ALPHA_CLAMP (1 << 20)
+# define R500_INST_ALU_RESULT_SEL (1 << 21)
+# define R500_INST_ALPHA_PRED_INV (1 << 22)
+# define R500_INST_ALU_RESULT_OP_EQ (0 << 23)
+# define R500_INST_ALU_RESULT_OP_LT (1 << 23)
+# define R500_INST_ALU_RESULT_OP_GE (2 << 23)
+# define R500_INST_ALU_RESULT_OP_NE (3 << 23)
+# define R500_INST_ALPHA_PRED_SEL_NONE (0 << 25)
+# define R500_INST_ALPHA_PRED_SEL_RGBA (1 << 25)
+# define R500_INST_ALPHA_PRED_SEL_RRRR (2 << 25)
+# define R500_INST_ALPHA_PRED_SEL_GGGG (3 << 25)
+# define R500_INST_ALPHA_PRED_SEL_BBBB (4 << 25)
+# define R500_INST_ALPHA_PRED_SEL_AAAA (5 << 25)
+/* XXX next four are kind of guessed */
+# define R500_INST_STAT_WE_R (1 << 28)
+# define R500_INST_STAT_WE_G (1 << 29)
+# define R500_INST_STAT_WE_B (1 << 30)
+# define R500_INST_STAT_WE_A (1 << 31)
+
+/* note that these are 8 bit lengths, despite the offsets, at least for R500 */
+#define R500_US_CODE_ADDR 0x4630
+# define R500_US_CODE_START_ADDR(x) ((x) << 0)
+# define R500_US_CODE_END_ADDR(x) ((x) << 16)
+#define R500_US_CODE_OFFSET 0x4638
+# define R500_US_CODE_OFFSET_ADDR(x) ((x) << 0)
+#define R500_US_CODE_RANGE 0x4634
+# define R500_US_CODE_RANGE_ADDR(x) ((x) << 0)
+# define R500_US_CODE_RANGE_SIZE(x) ((x) << 16)
+#define R500_US_CONFIG 0x4600
+# define R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO (1 << 1)
+#define R500_US_FC_ADDR_0 0xa000
+# define R500_FC_BOOL_ADDR(x) ((x) << 0)
+# define R500_FC_INT_ADDR(x) ((x) << 8)
+# define R500_FC_JUMP_ADDR(x) ((x) << 16)
+# define R500_FC_JUMP_GLOBAL (1 << 31)
+#define R500_US_FC_BOOL_CONST 0x4620
+# define R500_FC_KBOOL(x) (x)
+#define R500_US_FC_CTRL 0x4624
+# define R500_FC_TEST_EN (1 << 30)
+# define R500_FC_FULL_FC_EN (1 << 31)
+#define R500_US_FC_INST_0 0x9800
+# define R500_FC_OP_JUMP (0 << 0)
+# define R500_FC_OP_LOOP (1 << 0)
+# define R500_FC_OP_ENDLOOP (2 << 0)
+# define R500_FC_OP_REP (3 << 0)
+# define R500_FC_OP_ENDREP (4 << 0)
+# define R500_FC_OP_BREAKLOOP (5 << 0)
+# define R500_FC_OP_BREAKREP (6 << 0)
+# define R500_FC_OP_CONTINUE (7 << 0)
+# define R500_FC_B_ELSE (1 << 4)
+# define R500_FC_JUMP_ANY (1 << 5)
+# define R500_FC_A_OP_NONE (0 << 6)
+# define R500_FC_A_OP_POP (1 << 6)
+# define R500_FC_A_OP_PUSH (2 << 6)
+# define R500_FC_JUMP_FUNC(x) ((x) << 8)
+# define R500_FC_B_POP_CNT(x) ((x) << 16)
+# define R500_FC_B_OP0_NONE (0 << 24)
+# define R500_FC_B_OP0_DECR (1 << 24)
+# define R500_FC_B_OP0_INCR (2 << 24)
+# define R500_FC_B_OP1_DECR (0 << 26)
+# define R500_FC_B_OP1_NONE (1 << 26)
+# define R500_FC_B_OP1_INCR (2 << 26)
+# define R500_FC_IGNORE_UNCOVERED (1 << 28)
+#define R500_US_FC_INT_CONST_0 0x4c00
+# define R500_FC_INT_CONST_KR(x) ((x) << 0)
+# define R500_FC_INT_CONST_KG(x) ((x) << 8)
+# define R500_FC_INT_CONST_KB(x) ((x) << 16)
+/* _0 through _15 */
+#define R500_US_FORMAT0_0 0x4640
+# define R500_FORMAT_TXWIDTH(x) ((x) << 0)
+# define R500_FORMAT_TXHEIGHT(x) ((x) << 11)
+# define R500_FORMAT_TXDEPTH(x) ((x) << 22)
+#define R500_US_PIXSIZE 0x4604
+# define R500_PIX_SIZE(x) (x)
+#define R500_US_TEX_ADDR_0 0x9800
+# define R500_TEX_SRC_ADDR(x) ((x) << 0)
+# define R500_TEX_SRC_ADDR_REL (1 << 7)
+# define R500_TEX_SRC_S_SWIZ_R (0 << 8)
+# define R500_TEX_SRC_S_SWIZ_G (1 << 8)
+# define R500_TEX_SRC_S_SWIZ_B (2 << 8)
+# define R500_TEX_SRC_S_SWIZ_A (3 << 8)
+# define R500_TEX_SRC_T_SWIZ_R (0 << 10)
+# define R500_TEX_SRC_T_SWIZ_G (1 << 10)
+# define R500_TEX_SRC_T_SWIZ_B (2 << 10)
+# define R500_TEX_SRC_T_SWIZ_A (3 << 10)
+# define R500_TEX_SRC_R_SWIZ_R (0 << 12)
+# define R500_TEX_SRC_R_SWIZ_G (1 << 12)
+# define R500_TEX_SRC_R_SWIZ_B (2 << 12)
+# define R500_TEX_SRC_R_SWIZ_A (3 << 12)
+# define R500_TEX_SRC_Q_SWIZ_R (0 << 14)
+# define R500_TEX_SRC_Q_SWIZ_G (1 << 14)
+# define R500_TEX_SRC_Q_SWIZ_B (2 << 14)
+# define R500_TEX_SRC_Q_SWIZ_A (3 << 14)
+# define R500_TEX_DST_ADDR(x) ((x) << 16)
+# define R500_TEX_DST_ADDR_REL (1 << 23)
+# define R500_TEX_DST_R_SWIZ_R (0 << 24)
+# define R500_TEX_DST_R_SWIZ_G (1 << 24)
+# define R500_TEX_DST_R_SWIZ_B (2 << 24)
+# define R500_TEX_DST_R_SWIZ_A (3 << 24)
+# define R500_TEX_DST_G_SWIZ_R (0 << 26)
+# define R500_TEX_DST_G_SWIZ_G (1 << 26)
+# define R500_TEX_DST_G_SWIZ_B (2 << 26)
+# define R500_TEX_DST_G_SWIZ_A (3 << 26)
+# define R500_TEX_DST_B_SWIZ_R (0 << 28)
+# define R500_TEX_DST_B_SWIZ_G (1 << 28)
+# define R500_TEX_DST_B_SWIZ_B (2 << 28)
+# define R500_TEX_DST_B_SWIZ_A (3 << 28)
+# define R500_TEX_DST_A_SWIZ_R (0 << 30)
+# define R500_TEX_DST_A_SWIZ_G (1 << 30)
+# define R500_TEX_DST_A_SWIZ_B (2 << 30)
+# define R500_TEX_DST_A_SWIZ_A (3 << 30)
+#define R500_US_TEX_ADDR_DXDY_0 0xa000
+# define R500_DX_ADDR(x) ((x) << 0)
+# define R500_DX_ADDR_REL (1 << 7)
+# define R500_DX_S_SWIZ_R (0 << 8)
+# define R500_DX_S_SWIZ_G (1 << 8)
+# define R500_DX_S_SWIZ_B (2 << 8)
+# define R500_DX_S_SWIZ_A (3 << 8)
+# define R500_DX_T_SWIZ_R (0 << 10)
+# define R500_DX_T_SWIZ_G (1 << 10)
+# define R500_DX_T_SWIZ_B (2 << 10)
+# define R500_DX_T_SWIZ_A (3 << 10)
+# define R500_DX_R_SWIZ_R (0 << 12)
+# define R500_DX_R_SWIZ_G (1 << 12)
+# define R500_DX_R_SWIZ_B (2 << 12)
+# define R500_DX_R_SWIZ_A (3 << 12)
+# define R500_DX_Q_SWIZ_R (0 << 14)
+# define R500_DX_Q_SWIZ_G (1 << 14)
+# define R500_DX_Q_SWIZ_B (2 << 14)
+# define R500_DX_Q_SWIZ_A (3 << 14)
+# define R500_DY_ADDR(x) ((x) << 16)
+# define R500_DY_ADDR_REL (1 << 17)
+# define R500_DY_S_SWIZ_R (0 << 24)
+# define R500_DY_S_SWIZ_G (1 << 24)
+# define R500_DY_S_SWIZ_B (2 << 24)
+# define R500_DY_S_SWIZ_A (3 << 24)
+# define R500_DY_T_SWIZ_R (0 << 26)
+# define R500_DY_T_SWIZ_G (1 << 26)
+# define R500_DY_T_SWIZ_B (2 << 26)
+# define R500_DY_T_SWIZ_A (3 << 26)
+# define R500_DY_R_SWIZ_R (0 << 28)
+# define R500_DY_R_SWIZ_G (1 << 28)
+# define R500_DY_R_SWIZ_B (2 << 28)
+# define R500_DY_R_SWIZ_A (3 << 28)
+# define R500_DY_Q_SWIZ_R (0 << 30)
+# define R500_DY_Q_SWIZ_G (1 << 30)
+# define R500_DY_Q_SWIZ_B (2 << 30)
+# define R500_DY_Q_SWIZ_A (3 << 30)
+#define R500_US_TEX_INST_0 0x9000
+# define R500_TEX_ID(x) ((x) << 16)
+# define R500_TEX_INST_NOP (0 << 22)
+# define R500_TEX_INST_LD (1 << 22)
+# define R500_TEX_INST_TEXKILL (2 << 22)
+# define R500_TEX_INST_PROJ (3 << 22)
+# define R500_TEX_INST_LODBIAS (4 << 22)
+# define R500_TEX_INST_LOD (5 << 22)
+# define R500_TEX_INST_DXDY (6 << 22)
+# define R500_TEX_SEM_ACQUIRE (1 << 25)
+# define R500_TEX_IGNORE_UNCOVERED (1 << 26)
+# define R500_TEX_UNSCALED (1 << 27)
+#define R300_US_W_FMT 0x46b4
+# define R300_W_FMT_W0 (0 << 0)
+# define R300_W_FMT_W24 (1 << 0)
+# define R300_W_FMT_W24FP (2 << 0)
+# define R300_W_SRC_US (0 << 2)
+# define R300_W_SRC_RAS (1 << 2)
+
+/* Draw a primitive from vertex data in arrays loaded via 3D_LOAD_VBPNTR.
+ * Two parameter dwords:
+ * 0. VAP_VTX_FMT: The first parameter is not written to hardware
+ * 1. VAP_VF_CTL: The second parameter is a standard primitive emission dword.
+ */
+#define R300_PACKET3_3D_DRAW_VBUF 0x00002800
+
+/* Draw a primitive from immediate vertices in this packet
+ * Up to 16382 dwords:
+ * 0. VAP_VTX_FMT: The first parameter is not written to hardware
+ * 1. VAP_VF_CTL: The second parameter is a standard primitive emission dword.
+ * 2 to end: Up to 16380 dwords of vertex data.
+ */
+#define R300_PACKET3_3D_DRAW_IMMD 0x00002900
+
+/* Draw a primitive from vertex data in arrays loaded via 3D_LOAD_VBPNTR and
+ * immediate vertices in this packet
+ * Up to 16382 dwords:
+ * 0. VAP_VTX_FMT: The first parameter is not written to hardware
+ * 1. VAP_VF_CTL: The second parameter is a standard primitive emission dword.
+ * 2 to end: Up to 16380 dwords of vertex data.
+ */
+#define R300_PACKET3_3D_DRAW_INDX 0x00002A00
+
+
+/* Specify the full set of vertex arrays as (address, stride).
+ * The first parameter is the number of vertex arrays specified.
+ * The rest of the command is a variable length list of blocks, where
+ * each block is three dwords long and specifies two arrays.
+ * The first dword of a block is split into two words, the lower significant
+ * word refers to the first array, the more significant word to the second
+ * array in the block.
+ * The low byte of each word contains the size of an array entry in dwords,
+ * the high byte contains the stride of the array.
+ * The second dword of a block contains the pointer to the first array,
+ * the third dword of a block contains the pointer to the second array.
+ * Note that if the total number of arrays is odd, the third dword of
+ * the last block is omitted.
+ */
+#define R300_PACKET3_3D_LOAD_VBPNTR 0x00002F00
+# define R300_VC_FORCE_PREFETCH (1 << 5)
+# define R300_VBPNTR_SIZE0(x) ((x) >> 2)
+# define R300_VBPNTR_STRIDE0(x) (((x) >> 2) << 8)
+# define R300_VBPNTR_SIZE1(x) (((x) >> 2) << 16)
+# define R300_VBPNTR_STRIDE1(x) (((x) >> 2) << 24)
+
+#define R300_PACKET3_INDX_BUFFER 0x00003300
+# define R300_INDX_BUFFER_DST_SHIFT 0
+# define R300_INDX_BUFFER_SKIP_SHIFT 16
+# define R300_INDX_BUFFER_ONE_REG_WR (1<<31)
+
+/* Same as R300_PACKET3_3D_DRAW_VBUF but without VAP_VTX_FMT */
+#define R300_PACKET3_3D_DRAW_VBUF_2 0x00003400
+/* Same as R300_PACKET3_3D_DRAW_IMMD but without VAP_VTX_FMT */
+#define R300_PACKET3_3D_DRAW_IMMD_2 0x00003500
+/* Same as R300_PACKET3_3D_DRAW_INDX but without VAP_VTX_FMT */
+#define R300_PACKET3_3D_DRAW_INDX_2 0x00003600
+
+/* Clears a portion of hierachical Z RAM
+ * 3 dword parameters
+ * 0. START
+ * 1. COUNT: 13:0 (max is 0x3FFF)
+ * 2. CLEAR_VALUE: Value to write into HIZ RAM.
+ */
+#define R300_PACKET3_3D_CLEAR_HIZ 0x00003700
+
+/* Draws a set of primitives using vertex buffers pointed by the state data.
+ * At least 2 Parameters:
+ * 0. VAP_VF_CNTL: The first parameter is a standard primitive emission dword.
+ * 2 to end: Data or indices (see other 3D_DRAW_* packets for details)
+ */
+#define R300_PACKET3_3D_DRAW_128 0x00003900
+
+/* END: Packet 3 commands */
+
+
+/* Color formats for 2d packets
+ */
+#define R300_CP_COLOR_FORMAT_CI8 2
+#define R300_CP_COLOR_FORMAT_ARGB1555 3
+#define R300_CP_COLOR_FORMAT_RGB565 4
+#define R300_CP_COLOR_FORMAT_ARGB8888 6
+#define R300_CP_COLOR_FORMAT_RGB332 7
+#define R300_CP_COLOR_FORMAT_RGB8 9
+#define R300_CP_COLOR_FORMAT_ARGB4444 15
+
+/*
+ * CP type-3 packets
+ */
+#define R300_CP_CMD_BITBLT_MULTI 0xC0009B00
+
+/* XXX Corbin's stuff from radeon and r200 */
+
+#define RADEON_WAIT_UNTIL 0x1720
+# define RADEON_WAIT_CRTC_PFLIP (1 << 0)
+# define RADEON_WAIT_2D_IDLECLEAN (1 << 16)
+# define RADEON_WAIT_3D_IDLECLEAN (1 << 17)
+# define RADEON_WAIT_HOST_IDLECLEAN (1 << 18)
+
+#define R200_3D_DRAW_IMMD_2 0xC0003500
+
+#define RADEON_CP_PACKET0 0x0 /* XXX stolen from radeon_reg.h */
+#define RADEON_CP_PACKET3 0xC0000000
+
+#define RADEON_ONE_REG_WR (1 << 15)
+
+#define CP_PACKET0(register, count) \
+ (RADEON_CP_PACKET0 | ((count) << 16) | ((register) >> 2))
+
+#define CP_PACKET3(op, count) \
+ (RADEON_CP_PACKET3 | (op) | ((count) << 16))
+
+#endif /* _R300_REG_H */
+
+/* *INDENT-ON* */
+
+/* vim: set foldenable foldmarker=\\{,\\} foldmethod=marker : */
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
new file mode 100644
index 0000000000..4afd124c0e
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -0,0 +1,1073 @@
+/*
+ * Copyright 2009 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Copyright 2010 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+/* r300_render: Vertex and index buffer primitive emission. Contains both
+ * HW TCL fastpath rendering, and SW TCL Draw-assisted rendering. */
+
+#include "draw/draw_context.h"
+#include "draw/draw_vbuf.h"
+
+#include "util/u_inlines.h"
+
+#include "util/u_format.h"
+#include "util/u_memory.h"
+#include "util/u_upload_mgr.h"
+#include "util/u_prim.h"
+
+#include "r300_cs.h"
+#include "r300_cb.h"
+#include "r300_context.h"
+#include "r300_screen_buffer.h"
+#include "r300_emit.h"
+#include "r300_reg.h"
+#include "r300_state_derived.h"
+
+#include <limits.h>
+
+#define IMMD_DWORDS 32
+
+static uint32_t r300_translate_primitive(unsigned prim)
+{
+ switch (prim) {
+ case PIPE_PRIM_POINTS:
+ return R300_VAP_VF_CNTL__PRIM_POINTS;
+ case PIPE_PRIM_LINES:
+ return R300_VAP_VF_CNTL__PRIM_LINES;
+ case PIPE_PRIM_LINE_LOOP:
+ return R300_VAP_VF_CNTL__PRIM_LINE_LOOP;
+ case PIPE_PRIM_LINE_STRIP:
+ return R300_VAP_VF_CNTL__PRIM_LINE_STRIP;
+ case PIPE_PRIM_TRIANGLES:
+ return R300_VAP_VF_CNTL__PRIM_TRIANGLES;
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ return R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP;
+ case PIPE_PRIM_TRIANGLE_FAN:
+ return R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN;
+ case PIPE_PRIM_QUADS:
+ return R300_VAP_VF_CNTL__PRIM_QUADS;
+ case PIPE_PRIM_QUAD_STRIP:
+ return R300_VAP_VF_CNTL__PRIM_QUAD_STRIP;
+ case PIPE_PRIM_POLYGON:
+ return R300_VAP_VF_CNTL__PRIM_POLYGON;
+ default:
+ return 0;
+ }
+}
+
+static uint32_t r300_provoking_vertex_fixes(struct r300_context *r300,
+ unsigned mode)
+{
+ struct r300_rs_state* rs = (struct r300_rs_state*)r300->rs_state.state;
+ uint32_t color_control = rs->color_control;
+
+ /* By default (see r300_state.c:r300_create_rs_state) color_control is
+ * initialized to provoking the first vertex.
+ *
+ * Triangle fans must be reduced to the second vertex, not the first, in
+ * Gallium flatshade-first mode, as per the GL spec.
+ * (http://www.opengl.org/registry/specs/ARB/provoking_vertex.txt)
+ *
+ * Quads never provoke correctly in flatshade-first mode. The first
+ * vertex is never considered as provoking, so only the second, third,
+ * and fourth vertices can be selected, and both "third" and "last" modes
+ * select the fourth vertex. This is probably due to D3D lacking quads.
+ *
+ * Similarly, polygons reduce to the first, not the last, vertex, when in
+ * "last" mode, and all other modes start from the second vertex.
+ *
+ * ~ C.
+ */
+
+ if (rs->rs.flatshade_first) {
+ switch (mode) {
+ case PIPE_PRIM_TRIANGLE_FAN:
+ color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_SECOND;
+ break;
+ case PIPE_PRIM_QUADS:
+ case PIPE_PRIM_QUAD_STRIP:
+ case PIPE_PRIM_POLYGON:
+ color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST;
+ break;
+ default:
+ color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_FIRST;
+ break;
+ }
+ } else {
+ color_control |= R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST;
+ }
+
+ return color_control;
+}
+
+static boolean index_bias_supported(struct r300_context *r300)
+{
+ return r300->screen->caps.is_r500 &&
+ r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0);
+}
+
+static void r500_emit_index_bias(struct r300_context *r300, int index_bias)
+{
+ CS_LOCALS(r300);
+
+ BEGIN_CS(2);
+ OUT_CS_REG(R500_VAP_INDEX_OFFSET,
+ (index_bias & 0xFFFFFF) | (index_bias < 0 ? 1<<24 : 0));
+ END_CS;
+}
+
+/* This function splits the index bias value into two parts:
+ * - buffer_offset: the value that can be safely added to buffer offsets
+ * in r300_emit_aos (it must yield a positive offset when added to
+ * a vertex buffer offset)
+ * - index_offset: the value that must be manually subtracted from indices
+ * in an index buffer to achieve negative offsets. */
+static void r300_split_index_bias(struct r300_context *r300, int index_bias,
+ int *buffer_offset, int *index_offset)
+{
+ struct pipe_vertex_buffer *vb, *vbufs = r300->vertex_buffer;
+ struct pipe_vertex_element *velem = r300->velems->velem;
+ unsigned i, size;
+ int max_neg_bias;
+
+ if (index_bias < 0) {
+ /* See how large index bias we may subtract. We must be careful
+ * here because negative buffer offsets are not allowed
+ * by the DRM API. */
+ max_neg_bias = INT_MAX;
+ for (i = 0; i < r300->velems->count; i++) {
+ vb = &vbufs[velem[i].vertex_buffer_index];
+ size = (vb->buffer_offset + velem[i].src_offset) / vb->stride;
+ max_neg_bias = MIN2(max_neg_bias, size);
+ }
+
+ /* Now set the minimum allowed value. */
+ *buffer_offset = MAX2(-max_neg_bias, index_bias);
+ } else {
+ /* A positive index bias is OK. */
+ *buffer_offset = index_bias;
+ }
+
+ *index_offset = index_bias - *buffer_offset;
+}
+
+enum r300_prepare_flags {
+ PREP_FIRST_DRAW = (1 << 0), /* call emit_dirty_state and friends? */
+ PREP_VALIDATE_VBOS = (1 << 1), /* validate VBOs? */
+ PREP_EMIT_AOS = (1 << 2), /* call emit_aos? */
+ PREP_EMIT_AOS_SWTCL = (1 << 3), /* call emit_aos_swtcl? */
+ PREP_INDEXED = (1 << 4) /* is this draw_elements? */
+};
+
+/**
+ * Check if the requested number of dwords is available in the CS and
+ * if not, flush. Then validate buffers and emit dirty state.
+ * \param r300 The context.
+ * \param flags See r300_prepare_flags.
+ * \param index_buffer The index buffer to validate. The parameter may be NULL.
+ * \param cs_dwords The number of dwords to reserve in CS.
+ * \param aos_offset The offset passed to emit_aos.
+ * \param index_bias The index bias to emit.
+ * \param end_cs_dwords The number of free dwords which must be available
+ * at the end of CS after drawing in case the CS space
+ * management is performed by a draw_* function manually.
+ * The parameter may be NULL.
+ */
+static void r300_prepare_for_rendering(struct r300_context *r300,
+ enum r300_prepare_flags flags,
+ struct pipe_resource *index_buffer,
+ unsigned cs_dwords,
+ int aos_offset,
+ int index_bias,
+ unsigned *end_cs_dwords)
+{
+ unsigned end_dwords = 0;
+ boolean flushed = FALSE;
+ boolean first_draw = flags & PREP_FIRST_DRAW;
+ boolean emit_aos = flags & PREP_EMIT_AOS;
+ boolean emit_aos_swtcl = flags & PREP_EMIT_AOS_SWTCL;
+ boolean indexed = flags & PREP_INDEXED;
+ boolean hw_index_bias = index_bias_supported(r300);
+
+ /* Add dirty state, index offset, and AOS. */
+ if (first_draw) {
+ cs_dwords += r300_get_num_dirty_dwords(r300);
+
+ if (hw_index_bias)
+ cs_dwords += 2; /* emit_index_offset */
+
+ if (emit_aos)
+ cs_dwords += 55; /* emit_aos */
+
+ if (emit_aos_swtcl)
+ cs_dwords += 7; /* emit_aos_swtcl */
+ }
+
+ /* Emitted in flush. */
+ end_dwords += 26; /* emit_query_end */
+
+ cs_dwords += end_dwords;
+
+ /* Reserve requested CS space. */
+ if (!r300_check_cs(r300, cs_dwords)) {
+ r300->context.flush(&r300->context, 0, NULL);
+ flushed = TRUE;
+ }
+
+ /* Validate buffers and emit dirty state if needed. */
+ if (first_draw || flushed) {
+ r300_emit_buffer_validate(r300, flags & PREP_VALIDATE_VBOS, index_buffer);
+ r300_emit_dirty_state(r300);
+ if (hw_index_bias) {
+ if (r300->screen->caps.has_tcl)
+ r500_emit_index_bias(r300, index_bias);
+ else
+ r500_emit_index_bias(r300, 0);
+ }
+
+ if (emit_aos)
+ r300_emit_aos(r300, aos_offset, indexed);
+
+ if (emit_aos_swtcl)
+ r300_emit_aos_swtcl(r300, indexed);
+ }
+
+ if (end_cs_dwords)
+ *end_cs_dwords = end_dwords;
+}
+
+static boolean immd_is_good_idea(struct r300_context *r300,
+ unsigned count)
+{
+ struct pipe_vertex_element* velem;
+ struct pipe_vertex_buffer* vbuf;
+ boolean checked[PIPE_MAX_ATTRIBS] = {0};
+ unsigned vertex_element_count = r300->velems->count;
+ unsigned i, vbi;
+
+ if (DBG_ON(r300, DBG_NO_IMMD)) {
+ return FALSE;
+ }
+
+ if (r300->draw) {
+ return FALSE;
+ }
+
+ if (count * r300->velems->vertex_size_dwords > IMMD_DWORDS) {
+ return FALSE;
+ }
+
+ /* We shouldn't map buffers referenced by CS, busy buffers,
+ * and ones placed in VRAM. */
+ /* XXX Check for VRAM buffers. */
+ for (i = 0; i < vertex_element_count; i++) {
+ velem = &r300->velems->velem[i];
+ vbi = velem->vertex_buffer_index;
+
+ if (!checked[vbi]) {
+ vbuf = &r300->vertex_buffer[vbi];
+
+ if (r300_buffer_is_referenced(&r300->context,
+ vbuf->buffer,
+ R300_REF_CS | R300_REF_HW)) {
+ /* It's a very bad idea to map it... */
+ return FALSE;
+ }
+ checked[vbi] = TRUE;
+ }
+ }
+ return TRUE;
+}
+
+/*****************************************************************************
+ * The emission of draw packets for r500. Older GPUs may use these functions *
+ * after resolving fallback issues (e.g. stencil ref two-sided). *
+ ****************************************************************************/
+
+static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
+ unsigned mode,
+ unsigned start,
+ unsigned count)
+{
+ struct pipe_vertex_element* velem;
+ struct pipe_vertex_buffer* vbuf;
+ unsigned vertex_element_count = r300->velems->count;
+ unsigned i, v, vbi, dwords;
+
+ /* Size of the vertex, in dwords. */
+ unsigned vertex_size = r300->velems->vertex_size_dwords;
+
+ /* Offsets of the attribute, in dwords, from the start of the vertex. */
+ unsigned offset[PIPE_MAX_ATTRIBS];
+
+ /* Size of the vertex element, in dwords. */
+ unsigned size[PIPE_MAX_ATTRIBS];
+
+ /* Stride to the same attrib in the next vertex in the vertex buffer,
+ * in dwords. */
+ unsigned stride[PIPE_MAX_ATTRIBS] = {0};
+
+ /* Mapped vertex buffers. */
+ uint32_t* map[PIPE_MAX_ATTRIBS] = {0};
+ struct pipe_transfer* transfer[PIPE_MAX_ATTRIBS] = {NULL};
+
+ CB_LOCALS;
+
+ /* Calculate the vertex size, offsets, strides etc. and map the buffers. */
+ for (i = 0; i < vertex_element_count; i++) {
+ velem = &r300->velems->velem[i];
+ offset[i] = velem->src_offset / 4;
+ size[i] = r300->velems->hw_format_size[i] / 4;
+ vbi = velem->vertex_buffer_index;
+
+ /* Map the buffer. */
+ if (!map[vbi]) {
+ vbuf = &r300->vertex_buffer[vbi];
+ map[vbi] = (uint32_t*)pipe_buffer_map(&r300->context,
+ vbuf->buffer,
+ PIPE_TRANSFER_READ,
+ &transfer[vbi]);
+ stride[vbi] = vbuf->stride / 4;
+ map[vbi] += vbuf->buffer_offset / 4 + stride[vbi] * start;
+ }
+ }
+
+ dwords = 9 + count * vertex_size;
+
+ r300_prepare_for_rendering(r300, PREP_FIRST_DRAW, NULL, dwords, 0, 0, NULL);
+
+ BEGIN_CS_AS_CB(r300, dwords);
+ OUT_CB_REG(R300_GA_COLOR_CONTROL,
+ r300_provoking_vertex_fixes(r300, mode));
+ OUT_CB_REG(R300_VAP_VTX_SIZE, vertex_size);
+ OUT_CB_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
+ OUT_CB(count - 1);
+ OUT_CB(0);
+ OUT_CB_PKT3(R300_PACKET3_3D_DRAW_IMMD_2, count * vertex_size);
+ OUT_CB(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED | (count << 16) |
+ r300_translate_primitive(mode));
+
+ /* Emit vertices. */
+ for (v = 0; v < count; v++) {
+ for (i = 0; i < vertex_element_count; i++) {
+ vbi = r300->velems->velem[i].vertex_buffer_index;
+
+ OUT_CB_TABLE(&map[vbi][offset[i] + stride[vbi] * v], size[i]);
+ }
+ }
+ END_CB;
+
+ /* Unmap buffers. */
+ for (i = 0; i < vertex_element_count; i++) {
+ vbi = r300->velems->velem[i].vertex_buffer_index;
+
+ if (map[vbi]) {
+ vbuf = &r300->vertex_buffer[vbi];
+ pipe_buffer_unmap(&r300->context, vbuf->buffer, transfer[vbi]);
+ map[vbi] = NULL;
+ }
+ }
+}
+
+static void r300_emit_draw_arrays(struct r300_context *r300,
+ unsigned mode,
+ unsigned count)
+{
+ boolean alt_num_verts = count > 65535;
+ CS_LOCALS(r300);
+
+ if (count >= (1 << 24)) {
+ fprintf(stderr, "r300: Got a huge number of vertices: %i, "
+ "refusing to render.\n", count);
+ return;
+ }
+
+ BEGIN_CS(7 + (alt_num_verts ? 2 : 0));
+ if (alt_num_verts) {
+ OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count);
+ }
+ OUT_CS_REG(R300_GA_COLOR_CONTROL,
+ r300_provoking_vertex_fixes(r300, mode));
+ OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
+ OUT_CS(count - 1);
+ OUT_CS(0);
+ OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
+ OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) |
+ r300_translate_primitive(mode) |
+ (alt_num_verts ? R500_VAP_VF_CNTL__USE_ALT_NUM_VERTS : 0));
+ END_CS;
+}
+
+static void r300_emit_draw_elements(struct r300_context *r300,
+ struct pipe_resource* indexBuffer,
+ unsigned indexSize,
+ unsigned minIndex,
+ unsigned maxIndex,
+ unsigned mode,
+ unsigned start,
+ unsigned count)
+{
+ uint32_t count_dwords;
+ uint32_t offset_dwords = indexSize * start / sizeof(uint32_t);
+ boolean alt_num_verts = count > 65535;
+ CS_LOCALS(r300);
+
+ if (count >= (1 << 24)) {
+ fprintf(stderr, "r300: Got a huge number of vertices: %i, "
+ "refusing to render.\n", count);
+ return;
+ }
+
+ maxIndex = MIN2(maxIndex, r300->vertex_buffer_max_index);
+
+ DBG(r300, DBG_DRAW, "r300: Indexbuf of %u indices, min %u max %u\n",
+ count, minIndex, maxIndex);
+
+ BEGIN_CS(13 + (alt_num_verts ? 2 : 0));
+ if (alt_num_verts) {
+ OUT_CS_REG(R500_VAP_ALT_NUM_VERTICES, count);
+ }
+ OUT_CS_REG(R300_GA_COLOR_CONTROL,
+ r300_provoking_vertex_fixes(r300, mode));
+ OUT_CS_REG_SEQ(R300_VAP_VF_MAX_VTX_INDX, 2);
+ OUT_CS(maxIndex);
+ OUT_CS(minIndex);
+ OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0);
+ if (indexSize == 4) {
+ count_dwords = count;
+ OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) |
+ R300_VAP_VF_CNTL__INDEX_SIZE_32bit |
+ r300_translate_primitive(mode) |
+ (alt_num_verts ? R500_VAP_VF_CNTL__USE_ALT_NUM_VERTS : 0));
+ } else {
+ count_dwords = (count + 1) / 2;
+ OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) |
+ r300_translate_primitive(mode) |
+ (alt_num_verts ? R500_VAP_VF_CNTL__USE_ALT_NUM_VERTS : 0));
+ }
+
+ /* INDX_BUFFER is a truly special packet3.
+ * Unlike most other packet3, where the offset is after the count,
+ * the order is reversed, so the relocation ends up carrying the
+ * size of the indexbuf instead of the offset.
+ */
+ OUT_CS_PKT3(R300_PACKET3_INDX_BUFFER, 2);
+ OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2) |
+ (0 << R300_INDX_BUFFER_SKIP_SHIFT));
+ OUT_CS(offset_dwords << 2);
+ OUT_CS_BUF_RELOC(indexBuffer, count_dwords,
+ r300_buffer(indexBuffer)->domain, 0, 0);
+
+ END_CS;
+}
+
+/* This is the fast-path drawing & emission for HW TCL. */
+static void r300_draw_range_elements(struct pipe_context* pipe,
+ struct pipe_resource* indexBuffer,
+ unsigned indexSize,
+ int indexBias,
+ unsigned minIndex,
+ unsigned maxIndex,
+ unsigned mode,
+ unsigned start,
+ unsigned count)
+{
+ struct r300_context* r300 = r300_context(pipe);
+ struct pipe_resource* orgIndexBuffer = indexBuffer;
+ boolean alt_num_verts = r300->screen->caps.is_r500 &&
+ count > 65536 &&
+ r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0);
+ unsigned short_count;
+ int buffer_offset = 0, index_offset = 0; /* for index bias emulation */
+ boolean translate = FALSE;
+
+ if (r300->skip_rendering) {
+ return;
+ }
+
+ if (!u_trim_pipe_prim(mode, &count)) {
+ return;
+ }
+
+ /* Set up fallback for incompatible vertex layout if needed. */
+ if (r300->incompatible_vb_layout || r300->velems->incompatible_layout) {
+ r300_begin_vertex_translate(r300);
+ translate = TRUE;
+ }
+
+ if (indexBias && !index_bias_supported(r300)) {
+ r300_split_index_bias(r300, indexBias, &buffer_offset, &index_offset);
+ }
+
+ r300_translate_index_buffer(r300, &indexBuffer, &indexSize, index_offset,
+ &start, count);
+
+ r300_update_derived_state(r300);
+ r300_upload_index_buffer(r300, &indexBuffer, indexSize, start, count);
+
+ /* 15 dwords for emit_draw_elements */
+ r300_prepare_for_rendering(r300,
+ PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS | PREP_INDEXED,
+ indexBuffer, 15, buffer_offset, indexBias, NULL);
+
+ u_upload_flush(r300->upload_vb);
+ u_upload_flush(r300->upload_ib);
+ if (alt_num_verts || count <= 65535) {
+ r300_emit_draw_elements(r300, indexBuffer, indexSize,
+ minIndex, maxIndex, mode, start, count);
+ } else {
+ do {
+ short_count = MIN2(count, 65534);
+ r300_emit_draw_elements(r300, indexBuffer, indexSize,
+ minIndex, maxIndex,
+ mode, start, short_count);
+
+ start += short_count;
+ count -= short_count;
+
+ /* 15 dwords for emit_draw_elements */
+ if (count) {
+ r300_prepare_for_rendering(r300,
+ PREP_VALIDATE_VBOS | PREP_EMIT_AOS | PREP_INDEXED,
+ indexBuffer, 15, buffer_offset, indexBias, NULL);
+ }
+ } while (count);
+ }
+
+ if (indexBuffer != orgIndexBuffer) {
+ pipe_resource_reference( &indexBuffer, NULL );
+ }
+
+ if (translate) {
+ r300_end_vertex_translate(r300);
+ }
+}
+
+/* Simple helpers for context setup. Should probably be moved to util. */
+static void r300_draw_elements(struct pipe_context* pipe,
+ struct pipe_resource* indexBuffer,
+ unsigned indexSize, int indexBias, unsigned mode,
+ unsigned start, unsigned count)
+{
+ struct r300_context *r300 = r300_context(pipe);
+
+ pipe->draw_range_elements(pipe, indexBuffer, indexSize, indexBias,
+ 0, r300->vertex_buffer_max_index,
+ mode, start, count);
+}
+
+static void r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
+ unsigned start, unsigned count)
+{
+ struct r300_context* r300 = r300_context(pipe);
+ boolean alt_num_verts = r300->screen->caps.is_r500 &&
+ count > 65536 &&
+ r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0);
+ unsigned short_count;
+ boolean translate = FALSE;
+
+ if (r300->skip_rendering) {
+ return;
+ }
+
+ if (!u_trim_pipe_prim(mode, &count)) {
+ return;
+ }
+
+ /* Set up fallback for incompatible vertex layout if needed. */
+ if (r300->incompatible_vb_layout || r300->velems->incompatible_layout) {
+ r300_begin_vertex_translate(r300);
+ translate = TRUE;
+ }
+
+ r300_update_derived_state(r300);
+
+ if (immd_is_good_idea(r300, count)) {
+ r300_emit_draw_arrays_immediate(r300, mode, start, count);
+ } else {
+ /* 9 spare dwords for emit_draw_arrays. */
+ r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_VALIDATE_VBOS | PREP_EMIT_AOS,
+ NULL, 9, start, 0, NULL);
+
+ if (alt_num_verts || count <= 65535) {
+ r300_emit_draw_arrays(r300, mode, count);
+ } else {
+ do {
+ short_count = MIN2(count, 65535);
+ r300_emit_draw_arrays(r300, mode, short_count);
+
+ start += short_count;
+ count -= short_count;
+
+ /* 9 spare dwords for emit_draw_arrays. */
+ if (count) {
+ r300_prepare_for_rendering(r300,
+ PREP_VALIDATE_VBOS | PREP_EMIT_AOS, NULL, 9,
+ start, 0, NULL);
+ }
+ } while (count);
+ }
+ u_upload_flush(r300->upload_vb);
+ }
+
+ if (translate) {
+ r300_end_vertex_translate(r300);
+ }
+}
+
+/****************************************************************************
+ * The rest of this file is for SW TCL rendering only. Please be polite and *
+ * keep these functions separated so that they are easier to locate. ~C. *
+ ***************************************************************************/
+
+/* SW TCL arrays, using Draw. */
+static void r300_swtcl_draw_arrays(struct pipe_context* pipe,
+ unsigned mode,
+ unsigned start,
+ unsigned count)
+{
+ struct r300_context* r300 = r300_context(pipe);
+ struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS];
+ int i;
+
+ if (r300->skip_rendering) {
+ return;
+ }
+
+ if (!u_trim_pipe_prim(mode, &count)) {
+ return;
+ }
+
+ r300_update_derived_state(r300);
+
+ for (i = 0; i < r300->vertex_buffer_count; i++) {
+ void* buf = pipe_buffer_map(pipe,
+ r300->vertex_buffer[i].buffer,
+ PIPE_TRANSFER_READ,
+ &vb_transfer[i]);
+ draw_set_mapped_vertex_buffer(r300->draw, i, buf);
+ }
+
+ draw_set_mapped_element_buffer(r300->draw, 0, 0, NULL);
+
+ draw_arrays(r300->draw, mode, start, count);
+
+ /* XXX Not sure whether this is the best fix.
+ * It prevents CS from being rejected and weird assertion failures. */
+ draw_flush(r300->draw);
+
+ for (i = 0; i < r300->vertex_buffer_count; i++) {
+ pipe_buffer_unmap(pipe, r300->vertex_buffer[i].buffer,
+ vb_transfer[i]);
+ draw_set_mapped_vertex_buffer(r300->draw, i, NULL);
+ }
+}
+
+/* SW TCL elements, using Draw. */
+static void r300_swtcl_draw_range_elements(struct pipe_context* pipe,
+ struct pipe_resource* indexBuffer,
+ unsigned indexSize,
+ int indexBias,
+ unsigned minIndex,
+ unsigned maxIndex,
+ unsigned mode,
+ unsigned start,
+ unsigned count)
+{
+ struct r300_context* r300 = r300_context(pipe);
+ struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS];
+ struct pipe_transfer *ib_transfer;
+ int i;
+ void* indices;
+
+ if (r300->skip_rendering) {
+ return;
+ }
+
+ if (!u_trim_pipe_prim(mode, &count)) {
+ return;
+ }
+
+ r300_update_derived_state(r300);
+
+ for (i = 0; i < r300->vertex_buffer_count; i++) {
+ void* buf = pipe_buffer_map(pipe,
+ r300->vertex_buffer[i].buffer,
+ PIPE_TRANSFER_READ,
+ &vb_transfer[i]);
+ draw_set_mapped_vertex_buffer(r300->draw, i, buf);
+ }
+
+ indices = pipe_buffer_map(pipe, indexBuffer,
+ PIPE_TRANSFER_READ, &ib_transfer);
+ draw_set_mapped_element_buffer_range(r300->draw, indexSize, indexBias,
+ minIndex, maxIndex, indices);
+
+ draw_arrays(r300->draw, mode, start, count);
+
+ /* XXX Not sure whether this is the best fix.
+ * It prevents CS from being rejected and weird assertion failures. */
+ draw_flush(r300->draw);
+
+ for (i = 0; i < r300->vertex_buffer_count; i++) {
+ pipe_buffer_unmap(pipe, r300->vertex_buffer[i].buffer,
+ vb_transfer[i]);
+ draw_set_mapped_vertex_buffer(r300->draw, i, NULL);
+ }
+
+ pipe_buffer_unmap(pipe, indexBuffer,
+ ib_transfer);
+ draw_set_mapped_element_buffer_range(r300->draw, 0, 0,
+ start, start + count - 1,
+ NULL);
+}
+
+/* Object for rendering using Draw. */
+struct r300_render {
+ /* Parent class */
+ struct vbuf_render base;
+
+ /* Pipe context */
+ struct r300_context* r300;
+
+ /* Vertex information */
+ size_t vertex_size;
+ unsigned prim;
+ unsigned hwprim;
+
+ /* VBO */
+ struct pipe_resource* vbo;
+ size_t vbo_size;
+ size_t vbo_offset;
+ size_t vbo_max_used;
+ void * vbo_ptr;
+
+ struct pipe_transfer *vbo_transfer;
+};
+
+static INLINE struct r300_render*
+r300_render(struct vbuf_render* render)
+{
+ return (struct r300_render*)render;
+}
+
+static const struct vertex_info*
+r300_render_get_vertex_info(struct vbuf_render* render)
+{
+ struct r300_render* r300render = r300_render(render);
+ struct r300_context* r300 = r300render->r300;
+
+ return &r300->vertex_info;
+}
+
+static boolean r300_render_allocate_vertices(struct vbuf_render* render,
+ ushort vertex_size,
+ ushort count)
+{
+ struct r300_render* r300render = r300_render(render);
+ struct r300_context* r300 = r300render->r300;
+ struct pipe_screen* screen = r300->context.screen;
+ size_t size = (size_t)vertex_size * (size_t)count;
+
+ if (size + r300render->vbo_offset > r300render->vbo_size)
+ {
+ pipe_resource_reference(&r300->vbo, NULL);
+ r300render->vbo = pipe_buffer_create(screen,
+ PIPE_BIND_VERTEX_BUFFER,
+ R300_MAX_DRAW_VBO_SIZE);
+ r300render->vbo_offset = 0;
+ r300render->vbo_size = R300_MAX_DRAW_VBO_SIZE;
+ }
+
+ r300render->vertex_size = vertex_size;
+ r300->vbo = r300render->vbo;
+ r300->vbo_offset = r300render->vbo_offset;
+
+ return (r300render->vbo) ? TRUE : FALSE;
+}
+
+static void* r300_render_map_vertices(struct vbuf_render* render)
+{
+ struct r300_render* r300render = r300_render(render);
+
+ assert(!r300render->vbo_transfer);
+
+ r300render->vbo_ptr = pipe_buffer_map(&r300render->r300->context,
+ r300render->vbo,
+ PIPE_TRANSFER_WRITE,
+ &r300render->vbo_transfer);
+
+ return ((uint8_t*)r300render->vbo_ptr + r300render->vbo_offset);
+}
+
+static void r300_render_unmap_vertices(struct vbuf_render* render,
+ ushort min,
+ ushort max)
+{
+ struct r300_render* r300render = r300_render(render);
+ struct pipe_context* context = &r300render->r300->context;
+
+ assert(r300render->vbo_transfer);
+
+ r300render->vbo_max_used = MAX2(r300render->vbo_max_used,
+ r300render->vertex_size * (max + 1));
+ pipe_buffer_unmap(context, r300render->vbo, r300render->vbo_transfer);
+
+ r300render->vbo_transfer = NULL;
+}
+
+static void r300_render_release_vertices(struct vbuf_render* render)
+{
+ struct r300_render* r300render = r300_render(render);
+
+ r300render->vbo_offset += r300render->vbo_max_used;
+ r300render->vbo_max_used = 0;
+}
+
+static boolean r300_render_set_primitive(struct vbuf_render* render,
+ unsigned prim)
+{
+ struct r300_render* r300render = r300_render(render);
+
+ r300render->prim = prim;
+ r300render->hwprim = r300_translate_primitive(prim);
+
+ return TRUE;
+}
+
+static void r300_render_draw_arrays(struct vbuf_render* render,
+ unsigned start,
+ unsigned count)
+{
+ struct r300_render* r300render = r300_render(render);
+ struct r300_context* r300 = r300render->r300;
+ uint8_t* ptr;
+ unsigned i;
+ unsigned dwords = 6;
+
+ CS_LOCALS(r300);
+
+ (void) i; (void) ptr;
+
+ r300_prepare_for_rendering(r300, PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL,
+ NULL, dwords, 0, 0, NULL);
+
+ DBG(r300, DBG_DRAW, "r300: Doing vbuf render, count %d\n", count);
+
+ /* Uncomment to dump all VBOs rendered through this interface.
+ * Slow and noisy!
+ ptr = pipe_buffer_map(&r300render->r300->context,
+ r300render->vbo, PIPE_TRANSFER_READ,
+ &r300render->vbo_transfer);
+
+ for (i = 0; i < count; i++) {
+ printf("r300: Vertex %d\n", i);
+ draw_dump_emitted_vertex(&r300->vertex_info, ptr);
+ ptr += r300->vertex_info.size * 4;
+ printf("\n");
+ }
+
+ pipe_buffer_unmap(&r300render->r300->context, r300render->vbo,
+ r300render->vbo_transfer);
+ */
+
+ BEGIN_CS(dwords);
+ OUT_CS_REG(R300_GA_COLOR_CONTROL,
+ r300_provoking_vertex_fixes(r300, r300render->prim));
+ OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, count - 1);
+ OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0);
+ OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) |
+ r300render->hwprim);
+ END_CS;
+}
+
+static void r300_render_draw_elements(struct vbuf_render* render,
+ const ushort* indices,
+ uint count)
+{
+ struct r300_render* r300render = r300_render(render);
+ struct r300_context* r300 = r300render->r300;
+ int i;
+ unsigned end_cs_dwords;
+ unsigned max_index = (r300render->vbo_size - r300render->vbo_offset) /
+ (r300render->r300->vertex_info.size * 4) - 1;
+ unsigned short_count;
+ unsigned free_dwords;
+
+ CS_LOCALS(r300);
+
+ /* Reserve at least 256 dwords.
+ *
+ * Below we manage the CS space manually because there may be more
+ * indices than it can fit in CS. */
+ r300_prepare_for_rendering(r300,
+ PREP_FIRST_DRAW | PREP_EMIT_AOS_SWTCL | PREP_INDEXED,
+ NULL, 256, 0, 0, &end_cs_dwords);
+
+ while (count) {
+ free_dwords = r300->rws->get_cs_free_dwords(r300->rws);
+
+ short_count = MIN2(count, (free_dwords - end_cs_dwords - 6) * 2);
+
+ BEGIN_CS(6 + (short_count+1)/2);
+ OUT_CS_REG(R300_GA_COLOR_CONTROL,
+ r300_provoking_vertex_fixes(r300, r300render->prim));
+ OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, max_index);
+ OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, (short_count+1)/2);
+ OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (short_count << 16) |
+ r300render->hwprim);
+ for (i = 0; i < short_count-1; i += 2) {
+ OUT_CS(indices[i+1] << 16 | indices[i]);
+ }
+ if (short_count % 2) {
+ OUT_CS(indices[short_count-1]);
+ }
+ END_CS;
+
+ /* OK now subtract the emitted indices and see if we need to emit
+ * another draw packet. */
+ indices += short_count;
+ count -= short_count;
+
+ if (count) {
+ r300_prepare_for_rendering(r300,
+ PREP_EMIT_AOS_SWTCL | PREP_INDEXED,
+ NULL, 256, 0, 0, &end_cs_dwords);
+ }
+ }
+}
+
+static void r300_render_destroy(struct vbuf_render* render)
+{
+ FREE(render);
+}
+
+static struct vbuf_render* r300_render_create(struct r300_context* r300)
+{
+ struct r300_render* r300render = CALLOC_STRUCT(r300_render);
+
+ r300render->r300 = r300;
+
+ /* XXX find real numbers plz */
+ r300render->base.max_vertex_buffer_bytes = 128 * 1024;
+ r300render->base.max_indices = 16 * 1024;
+
+ r300render->base.get_vertex_info = r300_render_get_vertex_info;
+ r300render->base.allocate_vertices = r300_render_allocate_vertices;
+ r300render->base.map_vertices = r300_render_map_vertices;
+ r300render->base.unmap_vertices = r300_render_unmap_vertices;
+ r300render->base.set_primitive = r300_render_set_primitive;
+ r300render->base.draw_elements = r300_render_draw_elements;
+ r300render->base.draw_arrays = r300_render_draw_arrays;
+ r300render->base.release_vertices = r300_render_release_vertices;
+ r300render->base.destroy = r300_render_destroy;
+
+ r300render->vbo = NULL;
+ r300render->vbo_size = 0;
+ r300render->vbo_offset = 0;
+
+ return &r300render->base;
+}
+
+struct draw_stage* r300_draw_stage(struct r300_context* r300)
+{
+ struct vbuf_render* render;
+ struct draw_stage* stage;
+
+ render = r300_render_create(r300);
+
+ if (!render) {
+ return NULL;
+ }
+
+ stage = draw_vbuf_stage(r300->draw, render);
+
+ if (!stage) {
+ render->destroy(render);
+ return NULL;
+ }
+
+ draw_set_render(r300->draw, render);
+
+ return stage;
+}
+
+/****************************************************************************
+ * End of SW TCL functions *
+ ***************************************************************************/
+
+static void r300_resource_resolve(struct pipe_context* pipe,
+ struct pipe_resource* dest,
+ struct pipe_subresource subdest,
+ struct pipe_resource* src,
+ struct pipe_subresource subsrc)
+{
+ struct r300_context* r300 = r300_context(pipe);
+ struct r300_surface* destsurf = r300_surface(
+ dest->screen->get_tex_surface(dest->screen,
+ dest, subdest.face, subdest.level, 0, 0));
+ struct pipe_surface* srcsurf = src->screen->get_tex_surface(src->screen,
+ src, subsrc.face, subsrc.level, 0, 0);
+ float color[] = {0, 0, 0, 0};
+ CS_LOCALS(r300);
+
+ DBG(r300, DBG_DRAW, "r300: Resolving resource...\n");
+
+ OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_OFFSET, 1);
+ OUT_CS_RELOC(destsurf->buffer, destsurf->offset, 0, destsurf->domain, 0);
+
+ OUT_CS_REG_SEQ(R300_RB3D_AARESOLVE_PITCH, 1);
+ OUT_CS_RELOC(destsurf->buffer, destsurf->pitch, 0, destsurf->domain, 0);
+
+ OUT_CS_REG(R300_RB3D_AARESOLVE_CTL,
+ R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE |
+ R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE);
+
+ r300->context.clear_render_target(pipe,
+ srcsurf, color, 0, 0, src->width0, src->height0);
+
+ OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0x0);
+
+ pipe_surface_reference((struct pipe_surface**)&srcsurf, NULL);
+ pipe_surface_reference((struct pipe_surface**)&destsurf, NULL);
+}
+
+void r300_init_render_functions(struct r300_context *r300)
+{
+ /* Set generic functions. */
+ r300->context.draw_elements = r300_draw_elements;
+
+ /* Set draw functions based on presence of HW TCL. */
+ if (r300->screen->caps.has_tcl) {
+ r300->context.draw_arrays = r300_draw_arrays;
+ r300->context.draw_range_elements = r300_draw_range_elements;
+ } else {
+ r300->context.draw_arrays = r300_swtcl_draw_arrays;
+ r300->context.draw_range_elements = r300_swtcl_draw_range_elements;
+ }
+
+ r300->context.resource_resolve = r300_resource_resolve;
+
+ /* Plug in the two-sided stencil reference value fallback if needed. */
+ if (!r300->screen->caps.is_r500)
+ r300_plug_in_stencil_ref_fallback(r300);
+}
diff --git a/src/gallium/drivers/r300/r300_render_stencilref.c b/src/gallium/drivers/r300/r300_render_stencilref.c
new file mode 100644
index 0000000000..d509ded3ec
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_render_stencilref.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright 2010 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+/**
+ * The two-sided stencil reference value fallback for r3xx-r4xx chips.
+ * These chips support two-sided stencil functions but they do not support
+ * a two-sided reference value.
+ *
+ * The functions below split every draw call which uses the two-sided
+ * reference value into two draw calls -- the first one renders front faces
+ * and the second renders back faces with the other reference value.
+ */
+
+#include "r300_context.h"
+#include "r300_reg.h"
+
+struct r300_stencilref_context {
+ void (*draw_arrays)(struct pipe_context *pipe,
+ unsigned mode, unsigned start, unsigned count);
+
+ void (*draw_range_elements)(
+ struct pipe_context *pipe, struct pipe_resource *indexBuffer,
+ unsigned indexSize, int indexBias, unsigned minIndex, unsigned maxIndex,
+ unsigned mode, unsigned start, unsigned count);
+
+ uint32_t rs_cull_mode;
+ uint32_t zb_stencilrefmask;
+ ubyte ref_value_front;
+};
+
+static boolean r300_stencilref_needed(struct r300_context *r300)
+{
+ struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state;
+
+ return dsa->two_sided_stencil_ref ||
+ (dsa->two_sided &&
+ r300->stencil_ref.ref_value[0] != r300->stencil_ref.ref_value[1]);
+}
+
+/* Set drawing for front faces. */
+static void r300_stencilref_begin(struct r300_context *r300)
+{
+ struct r300_stencilref_context *sr = r300->stencilref_fallback;
+ struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state;
+ struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state;
+
+ /* Save state. */
+ sr->rs_cull_mode = rs->cull_mode;
+ sr->zb_stencilrefmask = dsa->stencil_ref_mask;
+ sr->ref_value_front = r300->stencil_ref.ref_value[0];
+
+ /* We *cull* pixels, therefore no need to mask out the bits. */
+ rs->cull_mode |= R300_CULL_BACK;
+
+ r300->rs_state.dirty = TRUE;
+}
+
+/* Set drawing for back faces. */
+static void r300_stencilref_switch_side(struct r300_context *r300)
+{
+ struct r300_stencilref_context *sr = r300->stencilref_fallback;
+ struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state;
+ struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state;
+
+ rs->cull_mode = sr->rs_cull_mode | R300_CULL_FRONT;
+ dsa->stencil_ref_mask = dsa->stencil_ref_bf;
+ r300->stencil_ref.ref_value[0] = r300->stencil_ref.ref_value[1];
+
+ r300->rs_state.dirty = TRUE;
+ r300->dsa_state.dirty = TRUE;
+}
+
+/* Restore the original state. */
+static void r300_stencilref_end(struct r300_context *r300)
+{
+ struct r300_stencilref_context *sr = r300->stencilref_fallback;
+ struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state;
+ struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state;
+
+ /* Restore state. */
+ rs->cull_mode = sr->rs_cull_mode;
+ dsa->stencil_ref_mask = sr->zb_stencilrefmask;
+ r300->stencil_ref.ref_value[0] = sr->ref_value_front;
+
+ r300->rs_state.dirty = TRUE;
+ r300->dsa_state.dirty = TRUE;
+}
+
+static void r300_stencilref_draw_arrays(struct pipe_context *pipe, unsigned mode,
+ unsigned start, unsigned count)
+{
+ struct r300_context *r300 = r300_context(pipe);
+ struct r300_stencilref_context *sr = r300->stencilref_fallback;
+
+ if (!r300_stencilref_needed(r300)) {
+ sr->draw_arrays(pipe, mode, start, count);
+ } else {
+ r300_stencilref_begin(r300);
+ sr->draw_arrays(pipe, mode, start, count);
+ r300_stencilref_switch_side(r300);
+ sr->draw_arrays(pipe, mode, start, count);
+ r300_stencilref_end(r300);
+ }
+}
+
+static void r300_stencilref_draw_range_elements(
+ struct pipe_context *pipe, struct pipe_resource *indexBuffer,
+ unsigned indexSize, int indexBias, unsigned minIndex, unsigned maxIndex,
+ unsigned mode, unsigned start, unsigned count)
+{
+ struct r300_context *r300 = r300_context(pipe);
+ struct r300_stencilref_context *sr = r300->stencilref_fallback;
+
+ if (!r300_stencilref_needed(r300)) {
+ sr->draw_range_elements(pipe, indexBuffer, indexSize, indexBias,
+ minIndex, maxIndex, mode, start, count);
+ } else {
+ r300_stencilref_begin(r300);
+ sr->draw_range_elements(pipe, indexBuffer, indexSize, indexBias,
+ minIndex, maxIndex, mode, start, count);
+ r300_stencilref_switch_side(r300);
+ sr->draw_range_elements(pipe, indexBuffer, indexSize, indexBias,
+ minIndex, maxIndex, mode, start, count);
+ r300_stencilref_end(r300);
+ }
+}
+
+void r300_plug_in_stencil_ref_fallback(struct r300_context *r300)
+{
+ r300->stencilref_fallback = CALLOC_STRUCT(r300_stencilref_context);
+
+ /* Save original draw functions. */
+ r300->stencilref_fallback->draw_arrays = r300->context.draw_arrays;
+ r300->stencilref_fallback->draw_range_elements = r300->context.draw_range_elements;
+
+ /* Override the draw functions. */
+ r300->context.draw_arrays = r300_stencilref_draw_arrays;
+ r300->context.draw_range_elements = r300_stencilref_draw_range_elements;
+}
diff --git a/src/gallium/drivers/r300/r300_render_translate.c b/src/gallium/drivers/r300/r300_render_translate.c
new file mode 100644
index 0000000000..0ea11e5bfc
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_render_translate.c
@@ -0,0 +1,322 @@
+/*
+ * Copyright 2010 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+/**
+ * The functions below translate vertex and index buffers to the layout
+ * compatible with the hardware, so that all vertex and index fetches are
+ * DWORD-aligned and all used vertex and index formats are supported.
+ * For indices, an optional index offset is added to each index.
+ */
+
+#include "r300_context.h"
+#include "translate/translate.h"
+
+void r300_begin_vertex_translate(struct r300_context *r300)
+{
+ struct pipe_context *pipe = &r300->context;
+ struct translate_key key = {0};
+ struct translate_element *te;
+ unsigned tr_elem_index[PIPE_MAX_ATTRIBS] = {0};
+ struct translate *tr;
+ struct r300_vertex_element_state *ve = r300->velems;
+ boolean vb_translated[PIPE_MAX_ATTRIBS] = {0};
+ void *vb_map[PIPE_MAX_ATTRIBS] = {0}, *out_map;
+ struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0}, *out_transfer;
+ struct pipe_resource *out_buffer;
+ unsigned i, num_verts;
+
+ /* Initialize the translate key, i.e. the recipe how vertices should be
+ * translated. */
+ for (i = 0; i < ve->count; i++) {
+ struct pipe_vertex_buffer *vb =
+ &r300->vertex_buffer[ve->velem[i].vertex_buffer_index];
+ enum pipe_format output_format = ve->hw_format[i];
+ unsigned output_format_size = ve->hw_format_size[i];
+
+ /* Check for support. */
+ if (ve->velem[i].src_format == ve->hw_format[i] &&
+ (vb->buffer_offset + ve->velem[i].src_offset) % 4 == 0 &&
+ vb->stride % 4 == 0) {
+ continue;
+ }
+
+ /* Workaround for translate: output floats instead of halfs. */
+ switch (output_format) {
+ case PIPE_FORMAT_R16_FLOAT:
+ output_format = PIPE_FORMAT_R32_FLOAT;
+ output_format_size = 4;
+ break;
+ case PIPE_FORMAT_R16G16_FLOAT:
+ output_format = PIPE_FORMAT_R32G32_FLOAT;
+ output_format_size = 8;
+ break;
+ case PIPE_FORMAT_R16G16B16_FLOAT:
+ output_format = PIPE_FORMAT_R32G32B32_FLOAT;
+ output_format_size = 12;
+ break;
+ case PIPE_FORMAT_R16G16B16A16_FLOAT:
+ output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+ output_format_size = 16;
+ break;
+ default:;
+ }
+
+ /* Add this vertex element. */
+ te = &key.element[key.nr_elements];
+ /*te->type;
+ te->instance_divisor;*/
+ te->input_buffer = ve->velem[i].vertex_buffer_index;
+ te->input_format = ve->velem[i].src_format;
+ te->input_offset = vb->buffer_offset + ve->velem[i].src_offset;
+ te->output_format = output_format;
+ te->output_offset = key.output_stride;
+
+ key.output_stride += output_format_size;
+ vb_translated[ve->velem[i].vertex_buffer_index] = TRUE;
+ tr_elem_index[i] = key.nr_elements;
+ key.nr_elements++;
+ }
+
+ /* Get a translate object. */
+ tr = translate_cache_find(r300->tran.translate_cache, &key);
+
+ /* Map buffers we want to translate. */
+ for (i = 0; i < r300->vertex_buffer_count; i++) {
+ if (vb_translated[i]) {
+ struct pipe_vertex_buffer *vb = &r300->vertex_buffer[i];
+
+ vb_map[i] = pipe_buffer_map(pipe, vb->buffer,
+ PIPE_TRANSFER_READ, &vb_transfer[i]);
+
+ tr->set_buffer(tr, i, vb_map[i], vb->stride, vb->max_index);
+ }
+ }
+
+ /* Create and map the output buffer. */
+ num_verts = r300->vertex_buffer_max_index + 1;
+
+ out_buffer = pipe_buffer_create(&r300->screen->screen,
+ PIPE_BIND_VERTEX_BUFFER,
+ key.output_stride * num_verts);
+
+ out_map = pipe_buffer_map(pipe, out_buffer, PIPE_TRANSFER_WRITE,
+ &out_transfer);
+
+ /* Translate. */
+ tr->run(tr, 0, num_verts, 0, out_map);
+
+ /* Unmap all buffers. */
+ for (i = 0; i < r300->vertex_buffer_count; i++) {
+ if (vb_translated[i]) {
+ pipe_buffer_unmap(pipe, r300->vertex_buffer[i].buffer,
+ vb_transfer[i]);
+ }
+ }
+
+ pipe_buffer_unmap(pipe, out_buffer, out_transfer);
+
+ /* Setup the new vertex buffer in the first free slot. */
+ for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
+ struct pipe_vertex_buffer *vb = &r300->vertex_buffer[i];
+
+ if (!vb->buffer) {
+ pipe_resource_reference(&vb->buffer, out_buffer);
+ vb->buffer_offset = 0;
+ vb->max_index = num_verts - 1;
+ vb->stride = key.output_stride;
+ r300->tran.vb_slot = i;
+ break;
+ }
+ }
+
+ /* Save and replace vertex elements. */
+ {
+ struct pipe_vertex_element new_velems[PIPE_MAX_ATTRIBS];
+
+ r300->tran.saved_velems = r300->velems;
+
+ for (i = 0; i < ve->count; i++) {
+ if (vb_translated[ve->velem[i].vertex_buffer_index]) {
+ te = &key.element[tr_elem_index[i]];
+ new_velems[i].instance_divisor = ve->velem[i].instance_divisor;
+ new_velems[i].src_format = te->output_format;
+ new_velems[i].src_offset = te->output_offset;
+ new_velems[i].vertex_buffer_index = r300->tran.vb_slot;
+ } else {
+ memcpy(&new_velems[i], &ve->velem[i],
+ sizeof(struct pipe_vertex_element));
+ }
+ }
+
+ r300->tran.new_velems =
+ pipe->create_vertex_elements_state(pipe, ve->count, new_velems);
+ pipe->bind_vertex_elements_state(pipe, r300->tran.new_velems);
+ }
+
+ pipe_resource_reference(&out_buffer, NULL);
+}
+
+void r300_end_vertex_translate(struct r300_context *r300)
+{
+ struct pipe_context *pipe = &r300->context;
+
+ /* Restore vertex elements. */
+ pipe->bind_vertex_elements_state(pipe, r300->tran.saved_velems);
+ pipe->delete_vertex_elements_state(pipe, r300->tran.new_velems);
+
+ /* Delete the now-unused VBO. */
+ pipe_resource_reference(&r300->vertex_buffer[r300->tran.vb_slot].buffer,
+ NULL);
+}
+
+static void r300_shorten_ubyte_elts(struct r300_context* r300,
+ struct pipe_resource** elts,
+ int index_bias,
+ unsigned start,
+ unsigned count)
+{
+ struct pipe_context* context = &r300->context;
+ struct pipe_screen* screen = r300->context.screen;
+ struct pipe_resource* new_elts;
+ unsigned char *in_map;
+ unsigned short *out_map;
+ struct pipe_transfer *src_transfer, *dst_transfer;
+ unsigned i;
+
+ new_elts = pipe_buffer_create(screen,
+ PIPE_BIND_INDEX_BUFFER,
+ 2 * count);
+
+ in_map = pipe_buffer_map(context, *elts, PIPE_TRANSFER_READ, &src_transfer);
+ out_map = pipe_buffer_map(context, new_elts, PIPE_TRANSFER_WRITE, &dst_transfer);
+
+ in_map += start;
+
+ for (i = 0; i < count; i++) {
+ *out_map = (unsigned short)(*in_map + index_bias);
+ in_map++;
+ out_map++;
+ }
+
+ pipe_buffer_unmap(context, *elts, src_transfer);
+ pipe_buffer_unmap(context, new_elts, dst_transfer);
+
+ *elts = new_elts;
+}
+
+static void r300_rebuild_ushort_elts(struct r300_context *r300,
+ struct pipe_resource **elts,
+ int index_bias,
+ unsigned start, unsigned count)
+{
+ struct pipe_context *context = &r300->context;
+ struct pipe_transfer *in_transfer = NULL;
+ struct pipe_transfer *out_transfer = NULL;
+ struct pipe_resource *new_elts;
+ unsigned short *in_map;
+ unsigned short *out_map;
+ unsigned i;
+
+ new_elts = pipe_buffer_create(context->screen,
+ PIPE_BIND_INDEX_BUFFER,
+ 2 * count);
+
+ in_map = pipe_buffer_map(context, *elts,
+ PIPE_TRANSFER_READ, &in_transfer);
+ out_map = pipe_buffer_map(context, new_elts,
+ PIPE_TRANSFER_WRITE, &out_transfer);
+
+ in_map += start;
+ for (i = 0; i < count; i++) {
+ *out_map = (unsigned short)(*in_map + index_bias);
+ in_map++;
+ out_map++;
+ }
+
+ pipe_buffer_unmap(context, *elts, in_transfer);
+ pipe_buffer_unmap(context, new_elts, out_transfer);
+
+ *elts = new_elts;
+}
+
+static void r300_rebuild_uint_elts(struct r300_context *r300,
+ struct pipe_resource **elts,
+ int index_bias,
+ unsigned start, unsigned count)
+{
+ struct pipe_context *context = &r300->context;
+ struct pipe_transfer *in_transfer = NULL;
+ struct pipe_transfer *out_transfer = NULL;
+ struct pipe_resource *new_elts;
+ unsigned int *in_map;
+ unsigned int *out_map;
+ unsigned i;
+
+ new_elts = pipe_buffer_create(context->screen,
+ PIPE_BIND_INDEX_BUFFER,
+ 2 * count);
+
+ in_map = pipe_buffer_map(context, *elts,
+ PIPE_TRANSFER_READ, &in_transfer);
+ out_map = pipe_buffer_map(context, new_elts,
+ PIPE_TRANSFER_WRITE, &out_transfer);
+
+ in_map += start;
+ for (i = 0; i < count; i++) {
+ *out_map = (unsigned int)(*in_map + index_bias);
+ in_map++;
+ out_map++;
+ }
+
+ pipe_buffer_unmap(context, *elts, in_transfer);
+ pipe_buffer_unmap(context, new_elts, out_transfer);
+
+ *elts = new_elts;
+}
+
+void r300_translate_index_buffer(struct r300_context *r300,
+ struct pipe_resource **index_buffer,
+ unsigned *index_size, unsigned index_offset,
+ unsigned *start, unsigned count)
+{
+ switch (*index_size) {
+ case 1:
+ r300_shorten_ubyte_elts(r300, index_buffer, index_offset, *start, count);
+ *index_size = 2;
+ *start = 0;
+ break;
+
+ case 2:
+ if (*start % 2 != 0 || index_offset) {
+ r300_rebuild_ushort_elts(r300, index_buffer, index_offset, *start, count);
+ *start = 0;
+ }
+ break;
+
+ case 4:
+ if (index_offset) {
+ r300_rebuild_uint_elts(r300, index_buffer, index_offset, *start, count);
+ *start = 0;
+ }
+ break;
+ }
+}
diff --git a/src/gallium/drivers/r300/r300_resource.c b/src/gallium/drivers/r300/r300_resource.c
new file mode 100644
index 0000000000..f6f33028dc
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_resource.c
@@ -0,0 +1,73 @@
+/*
+ * Copyright 2010 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ */
+
+#include "r300_context.h"
+#include "r300_texture.h"
+#include "r300_screen_buffer.h"
+
+static struct pipe_resource *
+r300_resource_create(struct pipe_screen *screen,
+ const struct pipe_resource *templ)
+{
+ if (templ->target == PIPE_BUFFER)
+ return r300_buffer_create(screen, templ);
+ else
+ return r300_texture_create(screen, templ);
+
+}
+
+static struct pipe_resource *
+r300_resource_from_handle(struct pipe_screen * screen,
+ const struct pipe_resource *templ,
+ struct winsys_handle *whandle)
+{
+ if (templ->target == PIPE_BUFFER)
+ return NULL;
+ else
+ return r300_texture_from_handle(screen, templ, whandle);
+}
+
+void r300_init_resource_functions(struct r300_context *r300)
+{
+ r300->context.get_transfer = u_get_transfer_vtbl;
+ r300->context.transfer_map = u_transfer_map_vtbl;
+ r300->context.transfer_flush_region = u_transfer_flush_region_vtbl;
+ r300->context.transfer_unmap = u_transfer_unmap_vtbl;
+ r300->context.transfer_destroy = u_transfer_destroy_vtbl;
+ r300->context.transfer_inline_write = u_transfer_inline_write_vtbl;
+ r300->context.is_resource_referenced = u_is_resource_referenced_vtbl;
+}
+
+void r300_init_screen_resource_functions(struct r300_screen *r300screen)
+{
+ r300screen->screen.resource_create = r300_resource_create;
+ r300screen->screen.resource_from_handle = r300_resource_from_handle;
+ r300screen->screen.resource_get_handle = u_resource_get_handle_vtbl;
+ r300screen->screen.resource_destroy = u_resource_destroy_vtbl;
+ r300screen->screen.user_buffer_create = r300_user_buffer_create;
+
+ r300screen->screen.get_tex_surface = r300_get_tex_surface;
+ r300screen->screen.tex_surface_destroy = r300_tex_surface_destroy;
+}
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
new file mode 100644
index 0000000000..8f7c96b829
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -0,0 +1,431 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Copyright 2010 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "util/u_format.h"
+#include "util/u_format_s3tc.h"
+#include "util/u_memory.h"
+
+#include "r300_context.h"
+#include "r300_texture.h"
+#include "r300_screen_buffer.h"
+#include "r300_state_inlines.h"
+#include "r300_winsys.h"
+
+/* Return the identifier behind whom the brave coders responsible for this
+ * amalgamation of code, sweat, and duct tape, routinely obscure their names.
+ *
+ * ...I should have just put "Corbin Simpson", but I'm not that cool.
+ *
+ * (Or egotistical. Yet.) */
+static const char* r300_get_vendor(struct pipe_screen* pscreen)
+{
+ return "X.Org R300 Project";
+}
+
+static const char* chip_families[] = {
+ "R300",
+ "R350",
+ "R360",
+ "RV350",
+ "RV370",
+ "RV380",
+ "R420",
+ "R423",
+ "R430",
+ "R480",
+ "R481",
+ "RV410",
+ "RS400",
+ "RC410",
+ "RS480",
+ "RS482",
+ "RS600",
+ "RS690",
+ "RS740",
+ "RV515",
+ "R520",
+ "RV530",
+ "R580",
+ "RV560",
+ "RV570"
+};
+
+static const char* r300_get_name(struct pipe_screen* pscreen)
+{
+ struct r300_screen* r300screen = r300_screen(pscreen);
+
+ return chip_families[r300screen->caps.family];
+}
+
+static int r300_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
+{
+ struct r300_screen* r300screen = r300_screen(pscreen);
+ boolean is_r400 = r300screen->caps.is_r400;
+ boolean is_r500 = r300screen->caps.is_r500;
+
+ /* XXX extended shader capabilities of r400 unimplemented */
+ is_r400 = FALSE;
+
+ switch (param) {
+ /* Supported features (boolean caps). */
+ case PIPE_CAP_NPOT_TEXTURES:
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ case PIPE_CAP_GLSL:
+ /* I'll be frank. This is a lie.
+ *
+ * We don't truly support GLSL on any of this driver's chipsets.
+ * To be fair, no chipset supports the full GLSL specification
+ * to the best of our knowledge, but some of the less esoteric
+ * features are still missing here.
+ *
+ * Rather than cripple ourselves intentionally, I'm going to set
+ * this flag, and as Gallium's interface continues to change, I
+ * hope that this single monolithic GLSL enable can slowly get
+ * split down into many different pieces and the state tracker
+ * will handle fallbacks transparently, like it should.
+ *
+ * ~ C.
+ */
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ case PIPE_CAP_POINT_SPRITE:
+ case PIPE_CAP_OCCLUSION_QUERY:
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+ case PIPE_CAP_TEXTURE_MIRROR_REPEAT:
+ case PIPE_CAP_BLEND_EQUATION_SEPARATE:
+ case PIPE_CAP_TEXTURE_SWIZZLE:
+ return 1;
+
+ /* Unsupported features (boolean caps). */
+ case PIPE_CAP_TIMER_QUERY:
+ case PIPE_CAP_DUAL_SOURCE_BLEND:
+ case PIPE_CAP_TGSI_CONT_SUPPORTED:
+ case PIPE_CAP_INDEP_BLEND_ENABLE:
+ case PIPE_CAP_INDEP_BLEND_FUNC:
+ return 0;
+
+ /* Texturing. */
+ case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+ case PIPE_CAP_MAX_COMBINED_SAMPLERS:
+ return r300screen->caps.num_tex_units;
+ case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS:
+ return 0;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ /* 13 == 4096, 12 == 2048 */
+ return is_r500 ? 13 : 12;
+
+ /* Render targets. */
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 4;
+
+ /* General shader limits and features. */
+ case PIPE_CAP_SM3:
+ return is_r500 ? 1 : 0;
+ case PIPE_CAP_MAX_CONST_BUFFERS:
+ return 1;
+ case PIPE_CAP_MAX_CONST_BUFFER_SIZE:
+ return 256;
+
+ case PIPE_CAP_DEPTHSTENCIL_CLEAR_SEPARATE:
+ return 1;
+
+ /* Fragment coordinate conventions. */
+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
+ return 1;
+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
+ return 0;
+
+ /* Fragment shader limits. */
+ case PIPE_CAP_MAX_FS_INSTRUCTIONS:
+ return is_r500 || is_r400 ? 512 : 96;
+ case PIPE_CAP_MAX_FS_ALU_INSTRUCTIONS:
+ return is_r500 || is_r400 ? 512 : 64;
+ case PIPE_CAP_MAX_FS_TEX_INSTRUCTIONS:
+ return is_r500 || is_r400 ? 512 : 32;
+ case PIPE_CAP_MAX_FS_TEX_INDIRECTIONS:
+ return is_r500 ? 511 : 4;
+ case PIPE_CAP_MAX_FS_CONTROL_FLOW_DEPTH:
+ return is_r500 ? 64 : 0; /* Actually unlimited on r500. */
+ case PIPE_CAP_MAX_FS_INPUTS:
+ /* 2 colors + 8 texcoords are always supported
+ * (minus fog and wpos).
+ *
+ * R500 has the ability to turn 3rd and 4th color into
+ * additional texcoords but there is no two-sided color
+ * selection then. However the facing bit can be used instead. */
+ return 10;
+ case PIPE_CAP_MAX_FS_CONSTS:
+ return is_r500 ? 256 : 32;
+ case PIPE_CAP_MAX_FS_TEMPS:
+ return is_r500 ? 128 : is_r400 ? 64 : 32;
+ case PIPE_CAP_MAX_FS_ADDRS:
+ return 0;
+ case PIPE_CAP_MAX_FS_PREDS:
+ return is_r500 ? 1 : 0;
+
+ /* Vertex shader limits. */
+ case PIPE_CAP_MAX_VS_INSTRUCTIONS:
+ case PIPE_CAP_MAX_VS_ALU_INSTRUCTIONS:
+ return is_r500 ? 1024 : 256;
+ case PIPE_CAP_MAX_VS_TEX_INSTRUCTIONS:
+ case PIPE_CAP_MAX_VS_TEX_INDIRECTIONS:
+ return 0;
+ case PIPE_CAP_MAX_VS_CONTROL_FLOW_DEPTH:
+ return is_r500 ? 4 : 0; /* For loops; not sure about conditionals. */
+ case PIPE_CAP_MAX_VS_INPUTS:
+ return 16;
+ case PIPE_CAP_MAX_VS_CONSTS:
+ return 256;
+ case PIPE_CAP_MAX_VS_TEMPS:
+ return 32;
+ case PIPE_CAP_MAX_VS_ADDRS:
+ return 1; /* XXX guessed */
+ case PIPE_CAP_MAX_VS_PREDS:
+ return is_r500 ? 4 : 0; /* XXX guessed. */
+
+ default:
+ fprintf(stderr, "r300: Implementation error: Bad param %d\n",
+ param);
+ return 0;
+ }
+}
+
+static float r300_get_paramf(struct pipe_screen* pscreen, enum pipe_cap param)
+{
+ struct r300_screen* r300screen = r300_screen(pscreen);
+
+ switch (param) {
+ case PIPE_CAP_MAX_LINE_WIDTH:
+ case PIPE_CAP_MAX_LINE_WIDTH_AA:
+ case PIPE_CAP_MAX_POINT_WIDTH:
+ case PIPE_CAP_MAX_POINT_WIDTH_AA:
+ /* The maximum dimensions of the colorbuffer are our practical
+ * rendering limits. 2048 pixels should be enough for anybody. */
+ if (r300screen->caps.is_r500) {
+ return 4096.0f;
+ } else if (r300screen->caps.is_r400) {
+ return 4021.0f;
+ } else {
+ return 2560.0f;
+ }
+ case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
+ return 16.0f;
+ case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
+ return 16.0f;
+ default:
+ fprintf(stderr, "r300: Implementation error: Bad paramf %d\n",
+ param);
+ return 0.0f;
+ }
+}
+
+static boolean r300_is_format_supported(struct pipe_screen* screen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned sample_count,
+ unsigned usage,
+ unsigned geom_flags)
+{
+ uint32_t retval = 0;
+ boolean is_r500 = r300_screen(screen)->caps.is_r500;
+ boolean is_r400 = r300_screen(screen)->caps.is_r400;
+ boolean is_rv350 = r300_screen(screen)->caps.is_rv350;
+ boolean is_z24 = format == PIPE_FORMAT_X8Z24_UNORM ||
+ format == PIPE_FORMAT_S8_USCALED_Z24_UNORM;
+ boolean is_color2101010 = format == PIPE_FORMAT_R10G10B10A2_UNORM ||
+ format == PIPE_FORMAT_R10G10B10X2_SNORM ||
+ format == PIPE_FORMAT_B10G10R10A2_UNORM ||
+ format == PIPE_FORMAT_R10SG10SB10SA2U_NORM;
+ boolean is_ati1n = format == PIPE_FORMAT_RGTC1_UNORM ||
+ format == PIPE_FORMAT_RGTC1_SNORM;
+ boolean is_ati2n = format == PIPE_FORMAT_RGTC2_UNORM ||
+ format == PIPE_FORMAT_RGTC2_SNORM;
+ boolean is_half_float = format == PIPE_FORMAT_R16_FLOAT ||
+ format == PIPE_FORMAT_R16G16_FLOAT ||
+ format == PIPE_FORMAT_R16G16B16_FLOAT ||
+ format == PIPE_FORMAT_R16G16B16A16_FLOAT;
+
+ if (target >= PIPE_MAX_TEXTURE_TYPES) {
+ fprintf(stderr, "r300: Implementation error: Received bogus texture "
+ "target %d in %s\n", target, __FUNCTION__);
+ return FALSE;
+ }
+
+ switch (sample_count) {
+ case 0:
+ case 1:
+ break;
+ case 2:
+ case 3:
+ case 4:
+ case 6:
+ if (usage != PIPE_BIND_RENDER_TARGET ||
+ !util_format_is_rgba8_variant(
+ util_format_description(format))) {
+ return FALSE;
+ }
+ break;
+ default:
+ return FALSE;
+ }
+
+ /* Check sampler format support. */
+ if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
+ /* Z24 cannot be sampled from on non-r5xx. */
+ (is_r500 || !is_z24) &&
+ /* ATI1N is r5xx-only. */
+ (is_r500 || !is_ati1n) &&
+ /* ATI2N is supported on r4xx-r5xx. */
+ (is_r400 || is_r500 || !is_ati2n) &&
+ r300_is_sampler_format_supported(format)) {
+ retval |= PIPE_BIND_SAMPLER_VIEW;
+ }
+
+ /* Check colorbuffer format support. */
+ if ((usage & (PIPE_BIND_RENDER_TARGET |
+ PIPE_BIND_DISPLAY_TARGET |
+ PIPE_BIND_SCANOUT |
+ PIPE_BIND_SHARED)) &&
+ /* 2101010 cannot be rendered to on non-r5xx. */
+ (is_r500 || !is_color2101010) &&
+ r300_is_colorbuffer_format_supported(format)) {
+ retval |= usage &
+ (PIPE_BIND_RENDER_TARGET |
+ PIPE_BIND_DISPLAY_TARGET |
+ PIPE_BIND_SCANOUT |
+ PIPE_BIND_SHARED);
+ }
+
+ /* Check depth-stencil format support. */
+ if (usage & PIPE_BIND_DEPTH_STENCIL &&
+ r300_is_zs_format_supported(format)) {
+ retval |= PIPE_BIND_DEPTH_STENCIL;
+ }
+
+ /* Check vertex buffer format support. */
+ if (usage & PIPE_BIND_VERTEX_BUFFER &&
+ /* Half float is supported on >= RV350. */
+ (is_rv350 || !is_half_float) &&
+ r300_translate_vertex_data_type(format) != R300_INVALID_FORMAT) {
+ retval |= PIPE_BIND_VERTEX_BUFFER;
+ }
+
+ /* Transfers are always supported. */
+ if (usage & PIPE_BIND_TRANSFER_READ)
+ retval |= PIPE_BIND_TRANSFER_READ;
+ if (usage & PIPE_BIND_TRANSFER_WRITE)
+ retval |= PIPE_BIND_TRANSFER_WRITE;
+
+ return retval == usage;
+}
+
+static void r300_destroy_screen(struct pipe_screen* pscreen)
+{
+ struct r300_screen* r300screen = r300_screen(pscreen);
+ struct r300_winsys_screen *rws = r300_winsys_screen(pscreen);
+
+ if (rws)
+ rws->destroy(rws);
+
+ FREE(r300screen);
+}
+
+static void r300_fence_reference(struct pipe_screen *screen,
+ struct pipe_fence_handle **ptr,
+ struct pipe_fence_handle *fence)
+{
+ struct r300_fence **oldf = (struct r300_fence**)ptr;
+ struct r300_fence *newf = (struct r300_fence*)fence;
+
+ if (pipe_reference(&(*oldf)->reference, &newf->reference))
+ FREE(*oldf);
+
+ *ptr = fence;
+}
+
+static int r300_fence_signalled(struct pipe_screen *screen,
+ struct pipe_fence_handle *fence,
+ unsigned flags)
+{
+ struct r300_fence *rfence = (struct r300_fence*)fence;
+
+ return rfence->signalled ? 0 : 1; /* 0 == success */
+}
+
+static int r300_fence_finish(struct pipe_screen *screen,
+ struct pipe_fence_handle *fence,
+ unsigned flags)
+{
+ struct r300_fence *rfence = (struct r300_fence*)fence;
+
+ r300_finish(rfence->ctx);
+ rfence->signalled = TRUE;
+ return 0; /* 0 == success */
+}
+
+struct pipe_screen* r300_create_screen(struct r300_winsys_screen *rws)
+{
+ struct r300_screen *r300screen = CALLOC_STRUCT(r300_screen);
+
+ if (!r300screen) {
+ FREE(r300screen);
+ return NULL;
+ }
+
+ r300screen->caps.pci_id = rws->get_value(rws, R300_VID_PCI_ID);
+ r300screen->caps.num_frag_pipes = rws->get_value(rws, R300_VID_GB_PIPES);
+ r300screen->caps.num_z_pipes = rws->get_value(rws, R300_VID_Z_PIPES);
+
+ r300_init_debug(r300screen);
+ r300_parse_chipset(&r300screen->caps);
+
+ r300screen->rws = rws;
+ r300screen->screen.winsys = (struct pipe_winsys*)rws;
+ r300screen->screen.destroy = r300_destroy_screen;
+ r300screen->screen.get_name = r300_get_name;
+ r300screen->screen.get_vendor = r300_get_vendor;
+ r300screen->screen.get_param = r300_get_param;
+ r300screen->screen.get_paramf = r300_get_paramf;
+ r300screen->screen.is_format_supported = r300_is_format_supported;
+ r300screen->screen.context_create = r300_create_context;
+
+ r300screen->screen.fence_reference = r300_fence_reference;
+ r300screen->screen.fence_signalled = r300_fence_signalled;
+ r300screen->screen.fence_finish = r300_fence_finish;
+
+ r300_init_screen_resource_functions(r300screen);
+
+ util_format_s3tc_init();
+
+ return &r300screen->screen;
+}
+
+struct r300_winsys_screen *
+r300_winsys_screen(struct pipe_screen *screen)
+{
+ return r300_screen(screen)->rws;
+}
diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h
new file mode 100644
index 0000000000..29cd5dbe26
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_screen.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Copyright 2010 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef R300_SCREEN_H
+#define R300_SCREEN_H
+
+#include "pipe/p_screen.h"
+
+#include "r300_chipset.h"
+
+#include <stdio.h>
+
+struct r300_screen {
+ /* Parent class */
+ struct pipe_screen screen;
+
+ struct r300_winsys_screen *rws;
+
+ /* Chipset capabilities */
+ struct r300_capabilities caps;
+
+ /** Combination of DBG_xxx flags */
+ unsigned debug;
+};
+
+
+/* Convenience cast wrapper. */
+static INLINE struct r300_screen* r300_screen(struct pipe_screen* screen) {
+ return (struct r300_screen*)screen;
+}
+
+/* Debug functionality. */
+
+/**
+ * Debug flags to disable/enable certain groups of debugging outputs.
+ *
+ * \note These may be rather coarse, and the grouping may be impractical.
+ * If you find, while debugging the driver, that a different grouping
+ * of these flags would be beneficial, just feel free to change them
+ * but make sure to update the documentation in r300_debug.c to reflect
+ * those changes.
+ */
+/*@{*/
+#define DBG_HELP (1 << 0)
+/* Logging. */
+#define DBG_FP (1 << 1)
+#define DBG_VP (1 << 2)
+/* The bit (1 << 3) is unused. */
+#define DBG_DRAW (1 << 4)
+#define DBG_TEX (1 << 5)
+#define DBG_TEXALLOC (1 << 6)
+#define DBG_RS (1 << 7)
+#define DBG_FALL (1 << 8)
+#define DBG_FB (1 << 9)
+/* Features. */
+#define DBG_ANISOHQ (1 << 16)
+#define DBG_NO_TILING (1 << 17)
+#define DBG_NO_IMMD (1 << 18)
+#define DBG_FAKE_OCC (1 << 19)
+/* Statistics. */
+#define DBG_STATS (1 << 24)
+/*@}*/
+
+static INLINE boolean SCREEN_DBG_ON(struct r300_screen * screen, unsigned flags)
+{
+ return (screen->debug & flags) ? TRUE : FALSE;
+}
+
+static INLINE void SCREEN_DBG(struct r300_screen * screen, unsigned flags,
+ const char * fmt, ...)
+{
+ if (SCREEN_DBG_ON(screen, flags)) {
+ va_list va;
+ va_start(va, fmt);
+ vfprintf(stderr, fmt, va);
+ va_end(va);
+ }
+}
+
+void r300_init_debug(struct r300_screen* ctx);
+
+void r300_init_screen_resource_functions(struct r300_screen *r300screen);
+
+#endif /* R300_SCREEN_H */
diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c
new file mode 100644
index 0000000000..7959e6a2f9
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_screen_buffer.c
@@ -0,0 +1,314 @@
+/*
+ * Copyright 2010 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ */
+
+#include <stdio.h>
+
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "util/u_upload_mgr.h"
+#include "util/u_math.h"
+
+#include "r300_screen_buffer.h"
+#include "r300_winsys.h"
+
+unsigned r300_buffer_is_referenced(struct pipe_context *context,
+ struct pipe_resource *buf,
+ enum r300_reference_domain domain)
+{
+ struct r300_context *r300 = r300_context(context);
+ struct r300_buffer *rbuf = r300_buffer(buf);
+
+ if (r300_buffer_is_user_buffer(buf))
+ return PIPE_UNREFERENCED;
+
+ if (r300->rws->is_buffer_referenced(r300->rws, rbuf->buf, domain))
+ return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
+
+ return PIPE_UNREFERENCED;
+}
+
+static unsigned r300_buffer_is_referenced_by_cs(struct pipe_context *context,
+ struct pipe_resource *buf,
+ unsigned face, unsigned level)
+{
+ return r300_buffer_is_referenced(context, buf, R300_REF_CS);
+}
+
+/* External helper, not required to implent u_resource_vtbl:
+ */
+int r300_upload_index_buffer(struct r300_context *r300,
+ struct pipe_resource **index_buffer,
+ unsigned index_size,
+ unsigned start,
+ unsigned count)
+{
+ struct pipe_resource *upload_buffer = NULL;
+ unsigned index_offset = start * index_size;
+ int ret = 0;
+
+ if (r300_buffer_is_user_buffer(*index_buffer)) {
+ ret = u_upload_buffer(r300->upload_ib,
+ index_offset,
+ count * index_size,
+ *index_buffer,
+ &index_offset,
+ &upload_buffer);
+ if (ret) {
+ goto done;
+ }
+ *index_buffer = upload_buffer;
+ }
+ done:
+ // if (upload_buffer)
+ // pipe_resource_reference(&upload_buffer, NULL);
+ return ret;
+}
+
+/* External helper, not required to implement u_resource_vtbl:
+ */
+int r300_upload_user_buffers(struct r300_context *r300)
+{
+ enum pipe_error ret = PIPE_OK;
+ int i, nr;
+
+ nr = r300->velems->count;
+
+ for (i = 0; i < nr; i++) {
+ struct pipe_vertex_buffer *vb =
+ &r300->vertex_buffer[r300->velems->velem[i].vertex_buffer_index];
+
+ if (r300_buffer_is_user_buffer(vb->buffer)) {
+ struct pipe_resource *upload_buffer = NULL;
+ unsigned offset = 0; /*vb->buffer_offset * 4;*/
+ unsigned size = vb->buffer->width0;
+ unsigned upload_offset;
+ ret = u_upload_buffer(r300->upload_vb,
+ offset, size,
+ vb->buffer,
+ &upload_offset, &upload_buffer);
+ if (ret)
+ return ret;
+
+ pipe_resource_reference(&vb->buffer, NULL);
+ vb->buffer = upload_buffer;
+ vb->buffer_offset = upload_offset;
+ }
+ }
+ return ret;
+}
+
+static void r300_winsys_buffer_destroy(struct r300_screen *r300screen,
+ struct r300_buffer *rbuf)
+{
+ struct r300_winsys_screen *rws = r300screen->rws;
+
+ if (rbuf->buf) {
+ rws->buffer_reference(rws, &rbuf->buf, NULL);
+ rbuf->buf = NULL;
+ }
+}
+
+static void r300_buffer_destroy(struct pipe_screen *screen,
+ struct pipe_resource *buf)
+{
+ struct r300_screen *r300screen = r300_screen(screen);
+ struct r300_buffer *rbuf = r300_buffer(buf);
+
+ r300_winsys_buffer_destroy(r300screen, rbuf);
+ FREE(rbuf);
+}
+
+static void *
+r300_buffer_transfer_map( struct pipe_context *pipe,
+ struct pipe_transfer *transfer )
+{
+ struct r300_screen *r300screen = r300_screen(pipe->screen);
+ struct r300_winsys_screen *rws = r300screen->rws;
+ struct r300_buffer *rbuf = r300_buffer(transfer->resource);
+ uint8_t *map;
+ boolean flush = FALSE;
+ unsigned i;
+
+ if (rbuf->user_buffer)
+ return (uint8_t *) rbuf->user_buffer + transfer->box.x;
+
+ if (rbuf->b.b.bind & PIPE_BIND_CONSTANT_BUFFER) {
+ goto just_map;
+ }
+
+ /* check if the mapping is to a range we already flushed */
+ if (transfer->usage & PIPE_TRANSFER_DISCARD) {
+ for (i = 0; i < rbuf->num_ranges; i++) {
+ if ((transfer->box.x >= rbuf->ranges[i].start) &&
+ (transfer->box.x < rbuf->ranges[i].end))
+ flush = TRUE;
+
+ if (flush) {
+ /* unreference this hw buffer and allocate a new one */
+ rws->buffer_reference(rws, &rbuf->buf, NULL);
+
+ rbuf->num_ranges = 0;
+ rbuf->buf = r300screen->rws->buffer_create(r300screen->rws, 16,
+ rbuf->b.b.bind,
+ rbuf->domain,
+ rbuf->b.b.width0);
+ break;
+ }
+ }
+ }
+just_map:
+ map = rws->buffer_map(rws, rbuf->buf, transfer->usage);
+
+ if (map == NULL)
+ return NULL;
+
+ /* map_buffer() returned a pointer to the beginning of the buffer,
+ * but transfers are expected to return a pointer to just the
+ * region specified in the box.
+ */
+ return map + transfer->box.x;
+}
+
+static void r300_buffer_transfer_flush_region( struct pipe_context *pipe,
+ struct pipe_transfer *transfer,
+ const struct pipe_box *box)
+{
+ struct r300_buffer *rbuf = r300_buffer(transfer->resource);
+ unsigned i;
+ unsigned offset = transfer->box.x + box->x;
+ unsigned length = box->width;
+
+ assert(box->x + box->width <= transfer->box.width);
+
+ if (rbuf->user_buffer)
+ return;
+
+ if (rbuf->b.b.bind & PIPE_BIND_CONSTANT_BUFFER)
+ return;
+
+ /* mark the range as used */
+ for(i = 0; i < rbuf->num_ranges; ++i) {
+ if(offset <= rbuf->ranges[i].end && rbuf->ranges[i].start <= (offset+box->width)) {
+ rbuf->ranges[i].start = MIN2(rbuf->ranges[i].start, offset);
+ rbuf->ranges[i].end = MAX2(rbuf->ranges[i].end, (offset+length));
+ return;
+ }
+ }
+
+ rbuf->ranges[rbuf->num_ranges].start = offset;
+ rbuf->ranges[rbuf->num_ranges].end = offset+length;
+ rbuf->num_ranges++;
+}
+
+static void r300_buffer_transfer_unmap( struct pipe_context *pipe,
+ struct pipe_transfer *transfer )
+{
+ struct r300_screen *r300screen = r300_screen(pipe->screen);
+ struct r300_winsys_screen *rws = r300screen->rws;
+ struct r300_buffer *rbuf = r300_buffer(transfer->resource);
+
+ if (rbuf->buf) {
+ rws->buffer_unmap(rws, rbuf->buf);
+ }
+}
+
+struct u_resource_vtbl r300_buffer_vtbl =
+{
+ u_default_resource_get_handle, /* get_handle */
+ r300_buffer_destroy, /* resource_destroy */
+ r300_buffer_is_referenced_by_cs, /* is_buffer_referenced */
+ u_default_get_transfer, /* get_transfer */
+ u_default_transfer_destroy, /* transfer_destroy */
+ r300_buffer_transfer_map, /* transfer_map */
+ r300_buffer_transfer_flush_region, /* transfer_flush_region */
+ r300_buffer_transfer_unmap, /* transfer_unmap */
+ u_default_transfer_inline_write /* transfer_inline_write */
+};
+
+struct pipe_resource *r300_buffer_create(struct pipe_screen *screen,
+ const struct pipe_resource *templ)
+{
+ struct r300_screen *r300screen = r300_screen(screen);
+ struct r300_buffer *rbuf;
+ unsigned alignment = 16;
+
+ rbuf = CALLOC_STRUCT(r300_buffer);
+ if (!rbuf)
+ goto error1;
+
+ rbuf->magic = R300_BUFFER_MAGIC;
+
+ rbuf->b.b = *templ;
+ rbuf->b.vtbl = &r300_buffer_vtbl;
+ pipe_reference_init(&rbuf->b.b.reference, 1);
+ rbuf->b.b.screen = screen;
+ rbuf->domain = R300_DOMAIN_GTT;
+
+ rbuf->buf = r300screen->rws->buffer_create(r300screen->rws,
+ alignment,
+ rbuf->b.b.bind,
+ rbuf->domain,
+ rbuf->b.b.width0);
+
+ if (!rbuf->buf)
+ goto error2;
+
+ return &rbuf->b.b;
+error2:
+ FREE(rbuf);
+error1:
+ return NULL;
+}
+
+struct pipe_resource *r300_user_buffer_create(struct pipe_screen *screen,
+ void *ptr,
+ unsigned bytes,
+ unsigned bind)
+{
+ struct r300_buffer *rbuf;
+
+ rbuf = CALLOC_STRUCT(r300_buffer);
+ if (!rbuf)
+ goto no_rbuf;
+
+ rbuf->magic = R300_BUFFER_MAGIC;
+
+ pipe_reference_init(&rbuf->b.b.reference, 1);
+ rbuf->b.vtbl = &r300_buffer_vtbl;
+ rbuf->b.b.screen = screen;
+ rbuf->b.b.format = PIPE_FORMAT_R8_UNORM;
+ rbuf->b.b.usage = PIPE_USAGE_IMMUTABLE;
+ rbuf->b.b.bind = bind;
+ rbuf->b.b.width0 = bytes;
+ rbuf->b.b.height0 = 1;
+ rbuf->b.b.depth0 = 1;
+ rbuf->domain = R300_DOMAIN_GTT;
+
+ rbuf->user_buffer = ptr;
+ return &rbuf->b.b;
+
+no_rbuf:
+ return NULL;
+}
diff --git a/src/gallium/drivers/r300/r300_screen_buffer.h b/src/gallium/drivers/r300/r300_screen_buffer.h
new file mode 100644
index 0000000000..ff35585870
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_screen_buffer.h
@@ -0,0 +1,119 @@
+/*
+ * Copyright 2010 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ */
+
+#ifndef R300_SCREEN_BUFFER_H
+#define R300_SCREEN_BUFFER_H
+
+#include <stdio.h>
+#include "pipe/p_compiler.h"
+#include "pipe/p_state.h"
+#include "util/u_transfer.h"
+
+#include "r300_screen.h"
+#include "r300_winsys.h"
+#include "r300_context.h"
+
+#define R300_BUFFER_MAGIC 0xabcd1234
+#define R300_BUFFER_MAX_RANGES 32
+
+struct r300_buffer_range {
+ uint32_t start;
+ uint32_t end;
+};
+
+/* Vertex buffer. */
+struct r300_buffer
+{
+ struct u_resource b;
+
+ uint32_t magic;
+
+ struct r300_winsys_buffer *buf;
+
+ enum r300_buffer_domain domain;
+
+ void *user_buffer;
+ struct r300_buffer_range ranges[R300_BUFFER_MAX_RANGES];
+ unsigned num_ranges;
+};
+
+/* Functions. */
+
+int r300_upload_user_buffers(struct r300_context *r300);
+
+int r300_upload_index_buffer(struct r300_context *r300,
+ struct pipe_resource **index_buffer,
+ unsigned index_size,
+ unsigned start,
+ unsigned count);
+
+struct pipe_resource *r300_buffer_create(struct pipe_screen *screen,
+ const struct pipe_resource *templ);
+
+struct pipe_resource *r300_user_buffer_create(struct pipe_screen *screen,
+ void *ptr,
+ unsigned bytes,
+ unsigned usage);
+
+unsigned r300_buffer_is_referenced(struct pipe_context *context,
+ struct pipe_resource *buf,
+ enum r300_reference_domain domain);
+
+/* Inline functions. */
+
+static INLINE struct r300_buffer *r300_buffer(struct pipe_resource *buffer)
+{
+ if (buffer) {
+ assert(((struct r300_buffer *)buffer)->magic == R300_BUFFER_MAGIC);
+ return (struct r300_buffer *)buffer;
+ }
+ return NULL;
+}
+
+static INLINE boolean r300_buffer_is_user_buffer(struct pipe_resource *buffer)
+{
+ return r300_buffer(buffer)->user_buffer ? true : false;
+}
+
+static INLINE boolean r300_add_buffer(struct r300_winsys_screen *rws,
+ struct pipe_resource *buffer,
+ int rd, int wr)
+{
+ struct r300_buffer *buf = r300_buffer(buffer);
+
+ if (!buf->buf)
+ return true;
+
+ return rws->add_buffer(rws, buf->buf, rd, wr);
+}
+
+static INLINE boolean r300_add_texture(struct r300_winsys_screen *rws,
+ struct r300_texture *tex,
+ int rd, int wr)
+{
+ return rws->add_buffer(rws, tex->buffer, rd, wr);
+}
+
+#endif
diff --git a/src/gallium/drivers/r300/r300_shader_semantics.h b/src/gallium/drivers/r300/r300_shader_semantics.h
new file mode 100644
index 0000000000..cb7a37033f
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_shader_semantics.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright 2009 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef R300_SHADER_SEMANTICS_H
+#define R300_SHADER_SEMANTICS_H
+
+#define ATTR_UNUSED (-1)
+#define ATTR_COLOR_COUNT 2
+#define ATTR_GENERIC_COUNT 32
+
+/* This structure contains information about what attributes are written by VS
+ * or read by FS. (but not both) It's much easier to work with than
+ * tgsi_shader_info.
+ *
+ * The variables contain indices to tgsi_shader_info semantics and those
+ * indices are nothing else than input/output register numbers. */
+struct r300_shader_semantics {
+ int pos;
+ int psize;
+ int color[ATTR_COLOR_COUNT];
+ int bcolor[ATTR_COLOR_COUNT];
+ int generic[ATTR_GENERIC_COUNT];
+ int fog;
+ int wpos;
+};
+
+static INLINE void r300_shader_semantics_reset(
+ struct r300_shader_semantics* info)
+{
+ int i;
+
+ info->pos = ATTR_UNUSED;
+ info->psize = ATTR_UNUSED;
+ info->fog = ATTR_UNUSED;
+ info->wpos = ATTR_UNUSED;
+
+ for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+ info->color[i] = ATTR_UNUSED;
+ info->bcolor[i] = ATTR_UNUSED;
+ }
+
+ for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
+ info->generic[i] = ATTR_UNUSED;
+ }
+}
+
+#endif
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
new file mode 100644
index 0000000000..bc2b62ba54
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -0,0 +1,1750 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Copyright 2009 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "draw/draw_context.h"
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_pack_color.h"
+
+#include "tgsi/tgsi_parse.h"
+
+#include "pipe/p_config.h"
+
+#include "r300_cb.h"
+#include "r300_context.h"
+#include "r300_emit.h"
+#include "r300_reg.h"
+#include "r300_screen.h"
+#include "r300_screen_buffer.h"
+#include "r300_state_inlines.h"
+#include "r300_fs.h"
+#include "r300_texture.h"
+#include "r300_vs.h"
+#include "r300_winsys.h"
+
+/* r300_state: Functions used to intialize state context by translating
+ * Gallium state objects into semi-native r300 state objects. */
+
+#define UPDATE_STATE(cso, atom) \
+ if (cso != atom.state) { \
+ atom.state = cso; \
+ atom.dirty = TRUE; \
+ }
+
+static boolean blend_discard_if_src_alpha_0(unsigned srcRGB, unsigned srcA,
+ unsigned dstRGB, unsigned dstA)
+{
+ /* If the blend equation is ADD or REVERSE_SUBTRACT,
+ * SRC_ALPHA == 0, and the following state is set, the colorbuffer
+ * will not be changed.
+ * Notice that the dst factors are the src factors inverted. */
+ return (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
+ srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
+ srcRGB == PIPE_BLENDFACTOR_ZERO) &&
+ (srcA == PIPE_BLENDFACTOR_SRC_COLOR ||
+ srcA == PIPE_BLENDFACTOR_SRC_ALPHA ||
+ srcA == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
+ srcA == PIPE_BLENDFACTOR_ZERO) &&
+ (dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+ dstRGB == PIPE_BLENDFACTOR_ONE) &&
+ (dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
+ dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+ dstA == PIPE_BLENDFACTOR_ONE);
+}
+
+static boolean blend_discard_if_src_alpha_1(unsigned srcRGB, unsigned srcA,
+ unsigned dstRGB, unsigned dstA)
+{
+ /* If the blend equation is ADD or REVERSE_SUBTRACT,
+ * SRC_ALPHA == 1, and the following state is set, the colorbuffer
+ * will not be changed.
+ * Notice that the dst factors are the src factors inverted. */
+ return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+ srcRGB == PIPE_BLENDFACTOR_ZERO) &&
+ (srcA == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
+ srcA == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+ srcA == PIPE_BLENDFACTOR_ZERO) &&
+ (dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
+ dstRGB == PIPE_BLENDFACTOR_ONE) &&
+ (dstA == PIPE_BLENDFACTOR_SRC_COLOR ||
+ dstA == PIPE_BLENDFACTOR_SRC_ALPHA ||
+ dstA == PIPE_BLENDFACTOR_ONE);
+}
+
+static boolean blend_discard_if_src_color_0(unsigned srcRGB, unsigned srcA,
+ unsigned dstRGB, unsigned dstA)
+{
+ /* If the blend equation is ADD or REVERSE_SUBTRACT,
+ * SRC_COLOR == (0,0,0), and the following state is set, the colorbuffer
+ * will not be changed.
+ * Notice that the dst factors are the src factors inverted. */
+ return (srcRGB == PIPE_BLENDFACTOR_SRC_COLOR ||
+ srcRGB == PIPE_BLENDFACTOR_ZERO) &&
+ (srcA == PIPE_BLENDFACTOR_ZERO) &&
+ (dstRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
+ dstRGB == PIPE_BLENDFACTOR_ONE) &&
+ (dstA == PIPE_BLENDFACTOR_ONE);
+}
+
+static boolean blend_discard_if_src_color_1(unsigned srcRGB, unsigned srcA,
+ unsigned dstRGB, unsigned dstA)
+{
+ /* If the blend equation is ADD or REVERSE_SUBTRACT,
+ * SRC_COLOR == (1,1,1), and the following state is set, the colorbuffer
+ * will not be changed.
+ * Notice that the dst factors are the src factors inverted. */
+ return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
+ srcRGB == PIPE_BLENDFACTOR_ZERO) &&
+ (srcA == PIPE_BLENDFACTOR_ZERO) &&
+ (dstRGB == PIPE_BLENDFACTOR_SRC_COLOR ||
+ dstRGB == PIPE_BLENDFACTOR_ONE) &&
+ (dstA == PIPE_BLENDFACTOR_ONE);
+}
+
+static boolean blend_discard_if_src_alpha_color_0(unsigned srcRGB, unsigned srcA,
+ unsigned dstRGB, unsigned dstA)
+{
+ /* If the blend equation is ADD or REVERSE_SUBTRACT,
+ * SRC_ALPHA_COLOR == (0,0,0,0), and the following state is set,
+ * the colorbuffer will not be changed.
+ * Notice that the dst factors are the src factors inverted. */
+ return (srcRGB == PIPE_BLENDFACTOR_SRC_COLOR ||
+ srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
+ srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
+ srcRGB == PIPE_BLENDFACTOR_ZERO) &&
+ (srcA == PIPE_BLENDFACTOR_SRC_COLOR ||
+ srcA == PIPE_BLENDFACTOR_SRC_ALPHA ||
+ srcA == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
+ srcA == PIPE_BLENDFACTOR_ZERO) &&
+ (dstRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
+ dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+ dstRGB == PIPE_BLENDFACTOR_ONE) &&
+ (dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
+ dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+ dstA == PIPE_BLENDFACTOR_ONE);
+}
+
+static boolean blend_discard_if_src_alpha_color_1(unsigned srcRGB, unsigned srcA,
+ unsigned dstRGB, unsigned dstA)
+{
+ /* If the blend equation is ADD or REVERSE_SUBTRACT,
+ * SRC_ALPHA_COLOR == (1,1,1,1), and the following state is set,
+ * the colorbuffer will not be changed.
+ * Notice that the dst factors are the src factors inverted. */
+ return (srcRGB == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
+ srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+ srcRGB == PIPE_BLENDFACTOR_ZERO) &&
+ (srcA == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
+ srcA == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+ srcA == PIPE_BLENDFACTOR_ZERO) &&
+ (dstRGB == PIPE_BLENDFACTOR_SRC_COLOR ||
+ dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
+ dstRGB == PIPE_BLENDFACTOR_ONE) &&
+ (dstA == PIPE_BLENDFACTOR_SRC_COLOR ||
+ dstA == PIPE_BLENDFACTOR_SRC_ALPHA ||
+ dstA == PIPE_BLENDFACTOR_ONE);
+}
+
+static unsigned bgra_cmask(unsigned mask)
+{
+ /* Gallium uses RGBA color ordering while R300 expects BGRA. */
+
+ return ((mask & PIPE_MASK_R) << 2) |
+ ((mask & PIPE_MASK_B) >> 2) |
+ (mask & (PIPE_MASK_G | PIPE_MASK_A));
+}
+
+/* Create a new blend state based on the CSO blend state.
+ *
+ * This encompasses alpha blending, logic/raster ops, and blend dithering. */
+static void* r300_create_blend_state(struct pipe_context* pipe,
+ const struct pipe_blend_state* state)
+{
+ struct r300_screen* r300screen = r300_screen(pipe->screen);
+ struct r300_blend_state* blend = CALLOC_STRUCT(r300_blend_state);
+ uint32_t blend_control = 0; /* R300_RB3D_CBLEND: 0x4e04 */
+ uint32_t alpha_blend_control = 0; /* R300_RB3D_ABLEND: 0x4e08 */
+ uint32_t color_channel_mask = 0; /* R300_RB3D_COLOR_CHANNEL_MASK: 0x4e0c */
+ uint32_t rop = 0; /* R300_RB3D_ROPCNTL: 0x4e18 */
+ uint32_t dither = 0; /* R300_RB3D_DITHER_CTL: 0x4e50 */
+ CB_LOCALS;
+
+ if (state->rt[0].blend_enable)
+ {
+ unsigned eqRGB = state->rt[0].rgb_func;
+ unsigned srcRGB = state->rt[0].rgb_src_factor;
+ unsigned dstRGB = state->rt[0].rgb_dst_factor;
+
+ unsigned eqA = state->rt[0].alpha_func;
+ unsigned srcA = state->rt[0].alpha_src_factor;
+ unsigned dstA = state->rt[0].alpha_dst_factor;
+
+ /* despite the name, ALPHA_BLEND_ENABLE has nothing to do with alpha,
+ * this is just the crappy D3D naming */
+ blend_control = R300_ALPHA_BLEND_ENABLE |
+ r300_translate_blend_function(eqRGB) |
+ ( r300_translate_blend_factor(srcRGB) << R300_SRC_BLEND_SHIFT) |
+ ( r300_translate_blend_factor(dstRGB) << R300_DST_BLEND_SHIFT);
+
+ /* Optimization: some operations do not require the destination color.
+ *
+ * When SRC_ALPHA_SATURATE is used, colorbuffer reads must be enabled,
+ * otherwise blending gives incorrect results. It seems to be
+ * a hardware bug. */
+ if (eqRGB == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MIN ||
+ eqRGB == PIPE_BLEND_MAX || eqA == PIPE_BLEND_MAX ||
+ dstRGB != PIPE_BLENDFACTOR_ZERO ||
+ dstA != PIPE_BLENDFACTOR_ZERO ||
+ srcRGB == PIPE_BLENDFACTOR_DST_COLOR ||
+ srcRGB == PIPE_BLENDFACTOR_DST_ALPHA ||
+ srcRGB == PIPE_BLENDFACTOR_INV_DST_COLOR ||
+ srcRGB == PIPE_BLENDFACTOR_INV_DST_ALPHA ||
+ srcA == PIPE_BLENDFACTOR_DST_COLOR ||
+ srcA == PIPE_BLENDFACTOR_DST_ALPHA ||
+ srcA == PIPE_BLENDFACTOR_INV_DST_COLOR ||
+ srcA == PIPE_BLENDFACTOR_INV_DST_ALPHA ||
+ srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) {
+ /* Enable reading from the colorbuffer. */
+ blend_control |= R300_READ_ENABLE;
+
+ if (r300screen->caps.is_r500) {
+ /* Optimization: Depending on incoming pixels, we can
+ * conditionally disable the reading in hardware... */
+ if (eqRGB != PIPE_BLEND_MIN && eqA != PIPE_BLEND_MIN &&
+ eqRGB != PIPE_BLEND_MAX && eqA != PIPE_BLEND_MAX) {
+ /* Disable reading if SRC_ALPHA == 0. */
+ if ((dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA ||
+ dstRGB == PIPE_BLENDFACTOR_ZERO) &&
+ (dstA == PIPE_BLENDFACTOR_SRC_COLOR ||
+ dstA == PIPE_BLENDFACTOR_SRC_ALPHA ||
+ dstA == PIPE_BLENDFACTOR_ZERO)) {
+ blend_control |= R500_SRC_ALPHA_0_NO_READ;
+ }
+
+ /* Disable reading if SRC_ALPHA == 1. */
+ if ((dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+ dstRGB == PIPE_BLENDFACTOR_ZERO) &&
+ (dstA == PIPE_BLENDFACTOR_INV_SRC_COLOR ||
+ dstA == PIPE_BLENDFACTOR_INV_SRC_ALPHA ||
+ dstA == PIPE_BLENDFACTOR_ZERO)) {
+ blend_control |= R500_SRC_ALPHA_1_NO_READ;
+ }
+ }
+ }
+ }
+
+ /* Optimization: discard pixels which don't change the colorbuffer.
+ *
+ * The code below is non-trivial and some math is involved.
+ *
+ * Discarding pixels must be disabled when FP16 AA is enabled.
+ * This is a hardware bug. Also, this implementation wouldn't work
+ * with FP blending enabled and equation clamping disabled.
+ *
+ * Equations other than ADD are rarely used and therefore won't be
+ * optimized. */
+ if ((eqRGB == PIPE_BLEND_ADD || eqRGB == PIPE_BLEND_REVERSE_SUBTRACT) &&
+ (eqA == PIPE_BLEND_ADD || eqA == PIPE_BLEND_REVERSE_SUBTRACT)) {
+ /* ADD: X+Y
+ * REVERSE_SUBTRACT: Y-X
+ *
+ * The idea is:
+ * If X = src*srcFactor = 0 and Y = dst*dstFactor = 1,
+ * then CB will not be changed.
+ *
+ * Given the srcFactor and dstFactor variables, we can derive
+ * what src and dst should be equal to and discard appropriate
+ * pixels.
+ */
+ if (blend_discard_if_src_alpha_0(srcRGB, srcA, dstRGB, dstA)) {
+ blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0;
+ } else if (blend_discard_if_src_alpha_1(srcRGB, srcA,
+ dstRGB, dstA)) {
+ blend_control |= R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1;
+ } else if (blend_discard_if_src_color_0(srcRGB, srcA,
+ dstRGB, dstA)) {
+ blend_control |= R300_DISCARD_SRC_PIXELS_SRC_COLOR_0;
+ } else if (blend_discard_if_src_color_1(srcRGB, srcA,
+ dstRGB, dstA)) {
+ blend_control |= R300_DISCARD_SRC_PIXELS_SRC_COLOR_1;
+ } else if (blend_discard_if_src_alpha_color_0(srcRGB, srcA,
+ dstRGB, dstA)) {
+ blend_control |=
+ R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_0;
+ } else if (blend_discard_if_src_alpha_color_1(srcRGB, srcA,
+ dstRGB, dstA)) {
+ blend_control |=
+ R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_1;
+ }
+ }
+
+ /* separate alpha */
+ if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) {
+ blend_control |= R300_SEPARATE_ALPHA_ENABLE;
+ alpha_blend_control =
+ r300_translate_blend_function(eqA) |
+ (r300_translate_blend_factor(srcA) << R300_SRC_BLEND_SHIFT) |
+ (r300_translate_blend_factor(dstA) << R300_DST_BLEND_SHIFT);
+ }
+ }
+
+ /* PIPE_LOGICOP_* don't need to be translated, fortunately. */
+ if (state->logicop_enable) {
+ rop = R300_RB3D_ROPCNTL_ROP_ENABLE |
+ (state->logicop_func) << R300_RB3D_ROPCNTL_ROP_SHIFT;
+ }
+
+ /* Color channel masks for all MRTs. */
+ color_channel_mask = bgra_cmask(state->rt[0].colormask);
+ if (r300screen->caps.is_r500 && state->independent_blend_enable) {
+ if (state->rt[1].blend_enable) {
+ color_channel_mask |= bgra_cmask(state->rt[1].colormask) << 4;
+ }
+ if (state->rt[2].blend_enable) {
+ color_channel_mask |= bgra_cmask(state->rt[2].colormask) << 8;
+ }
+ if (state->rt[3].blend_enable) {
+ color_channel_mask |= bgra_cmask(state->rt[3].colormask) << 12;
+ }
+ }
+
+ /* Neither fglrx nor classic r300 ever set this, regardless of dithering
+ * state. Since it's an optional implementation detail, we can leave it
+ * out and never dither.
+ *
+ * This could be revisited if we ever get quality or conformance hints.
+ *
+ if (state->dither) {
+ dither = R300_RB3D_DITHER_CTL_DITHER_MODE_LUT |
+ R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_LUT;
+ }
+ */
+
+ /* Build a command buffer. */
+ BEGIN_CB(blend->cb, 8);
+ OUT_CB_REG(R300_RB3D_ROPCNTL, rop);
+ OUT_CB_REG_SEQ(R300_RB3D_CBLEND, 3);
+ OUT_CB(blend_control);
+ OUT_CB(alpha_blend_control);
+ OUT_CB(color_channel_mask);
+ OUT_CB_REG(R300_RB3D_DITHER_CTL, dither);
+ END_CB;
+
+ /* The same as above, but with no colorbuffer reads and writes. */
+ BEGIN_CB(blend->cb_no_readwrite, 8);
+ OUT_CB_REG(R300_RB3D_ROPCNTL, rop);
+ OUT_CB_REG_SEQ(R300_RB3D_CBLEND, 3);
+ OUT_CB(0);
+ OUT_CB(0);
+ OUT_CB(0);
+ OUT_CB_REG(R300_RB3D_DITHER_CTL, dither);
+ END_CB;
+
+ return (void*)blend;
+}
+
+/* Bind blend state. */
+static void r300_bind_blend_state(struct pipe_context* pipe,
+ void* state)
+{
+ struct r300_context* r300 = r300_context(pipe);
+
+ UPDATE_STATE(state, r300->blend_state);
+}
+
+/* Free blend state. */
+static void r300_delete_blend_state(struct pipe_context* pipe,
+ void* state)
+{
+ FREE(state);
+}
+
+/* Convert float to 10bit integer */
+static unsigned float_to_fixed10(float f)
+{
+ return CLAMP((unsigned)(f * 1023.9f), 0, 1023);
+}
+
+/* Set blend color.
+ * Setup both R300 and R500 registers, figure out later which one to write. */
+static void r300_set_blend_color(struct pipe_context* pipe,
+ const struct pipe_blend_color* color)
+{
+ struct r300_context* r300 = r300_context(pipe);
+ struct r300_blend_color_state* state =
+ (struct r300_blend_color_state*)r300->blend_color_state.state;
+ CB_LOCALS;
+
+ if (r300->screen->caps.is_r500) {
+ /* XXX if FP16 blending is enabled, we should use the FP16 format */
+ BEGIN_CB(state->cb, 3);
+ OUT_CB_REG_SEQ(R500_RB3D_CONSTANT_COLOR_AR, 2);
+ OUT_CB(float_to_fixed10(color->color[0]) |
+ (float_to_fixed10(color->color[3]) << 16));
+ OUT_CB(float_to_fixed10(color->color[2]) |
+ (float_to_fixed10(color->color[1]) << 16));
+ END_CB;
+ } else {
+ union util_color uc;
+ util_pack_color(color->color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
+
+ BEGIN_CB(state->cb, 2);
+ OUT_CB_REG(R300_RB3D_BLEND_COLOR, uc.ui);
+ END_CB;
+ }
+
+ r300->blend_color_state.dirty = TRUE;
+}
+
+static void r300_set_clip_state(struct pipe_context* pipe,
+ const struct pipe_clip_state* state)
+{
+ struct r300_context* r300 = r300_context(pipe);
+ struct r300_clip_state *clip =
+ (struct r300_clip_state*)r300->clip_state.state;
+ CB_LOCALS;
+
+ clip->clip = *state;
+
+ if (r300->screen->caps.has_tcl) {
+ BEGIN_CB(clip->cb, 29);
+ OUT_CB_REG(R300_VAP_PVS_VECTOR_INDX_REG,
+ (r300->screen->caps.is_r500 ?
+ R500_PVS_UCP_START : R300_PVS_UCP_START));
+ OUT_CB_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, 6 * 4);
+ OUT_CB_TABLE(state->ucp, 6 * 4);
+ OUT_CB_REG(R300_VAP_CLIP_CNTL, ((1 << state->nr) - 1) |
+ R300_PS_UCP_MODE_CLIP_AS_TRIFAN);
+ END_CB;
+
+ r300->clip_state.dirty = TRUE;
+ } else {
+ draw_flush(r300->draw);
+ draw_set_clip_state(r300->draw, state);
+ }
+}
+
+static void
+r300_set_sample_mask(struct pipe_context *pipe,
+ unsigned sample_mask)
+{
+}
+
+
+/* Create a new depth, stencil, and alpha state based on the CSO dsa state.
+ *
+ * This contains the depth buffer, stencil buffer, alpha test, and such.
+ * On the Radeon, depth and stencil buffer setup are intertwined, which is
+ * the reason for some of the strange-looking assignments across registers. */
+static void*
+ r300_create_dsa_state(struct pipe_context* pipe,
+ const struct pipe_depth_stencil_alpha_state* state)
+{
+ struct r300_capabilities *caps = &r300_screen(pipe->screen)->caps;
+ struct r300_dsa_state* dsa = CALLOC_STRUCT(r300_dsa_state);
+ CB_LOCALS;
+
+ dsa->dsa = *state;
+
+ /* Depth test setup. */
+ if (state->depth.enabled) {
+ dsa->z_buffer_control |= R300_Z_ENABLE;
+
+ if (state->depth.writemask) {
+ dsa->z_buffer_control |= R300_Z_WRITE_ENABLE;
+ }
+
+ dsa->z_stencil_control |=
+ (r300_translate_depth_stencil_function(state->depth.func) <<
+ R300_Z_FUNC_SHIFT);
+ }
+
+ /* Stencil buffer setup. */
+ if (state->stencil[0].enabled) {
+ dsa->z_buffer_control |= R300_STENCIL_ENABLE;
+ dsa->z_stencil_control |=
+ (r300_translate_depth_stencil_function(state->stencil[0].func) <<
+ R300_S_FRONT_FUNC_SHIFT) |
+ (r300_translate_stencil_op(state->stencil[0].fail_op) <<
+ R300_S_FRONT_SFAIL_OP_SHIFT) |
+ (r300_translate_stencil_op(state->stencil[0].zpass_op) <<
+ R300_S_FRONT_ZPASS_OP_SHIFT) |
+ (r300_translate_stencil_op(state->stencil[0].zfail_op) <<
+ R300_S_FRONT_ZFAIL_OP_SHIFT);
+
+ dsa->stencil_ref_mask =
+ (state->stencil[0].valuemask << R300_STENCILMASK_SHIFT) |
+ (state->stencil[0].writemask << R300_STENCILWRITEMASK_SHIFT);
+
+ if (state->stencil[1].enabled) {
+ dsa->two_sided = TRUE;
+
+ dsa->z_buffer_control |= R300_STENCIL_FRONT_BACK;
+ dsa->z_stencil_control |=
+ (r300_translate_depth_stencil_function(state->stencil[1].func) <<
+ R300_S_BACK_FUNC_SHIFT) |
+ (r300_translate_stencil_op(state->stencil[1].fail_op) <<
+ R300_S_BACK_SFAIL_OP_SHIFT) |
+ (r300_translate_stencil_op(state->stencil[1].zpass_op) <<
+ R300_S_BACK_ZPASS_OP_SHIFT) |
+ (r300_translate_stencil_op(state->stencil[1].zfail_op) <<
+ R300_S_BACK_ZFAIL_OP_SHIFT);
+
+ dsa->stencil_ref_bf =
+ (state->stencil[1].valuemask << R300_STENCILMASK_SHIFT) |
+ (state->stencil[1].writemask << R300_STENCILWRITEMASK_SHIFT);
+
+ if (caps->is_r500) {
+ dsa->z_buffer_control |= R500_STENCIL_REFMASK_FRONT_BACK;
+ } else {
+ dsa->two_sided_stencil_ref =
+ (state->stencil[0].valuemask != state->stencil[1].valuemask ||
+ state->stencil[0].writemask != state->stencil[1].writemask);
+ }
+ }
+ }
+
+ /* Alpha test setup. */
+ if (state->alpha.enabled) {
+ dsa->alpha_function =
+ r300_translate_alpha_function(state->alpha.func) |
+ R300_FG_ALPHA_FUNC_ENABLE;
+
+ /* We could use 10bit alpha ref but who needs that? */
+ dsa->alpha_function |= float_to_ubyte(state->alpha.ref_value);
+
+ if (caps->is_r500)
+ dsa->alpha_function |= R500_FG_ALPHA_FUNC_8BIT;
+ }
+
+ BEGIN_CB(&dsa->cb_begin, 8);
+ OUT_CB_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function);
+ OUT_CB_REG_SEQ(R300_ZB_CNTL, 3);
+ OUT_CB(dsa->z_buffer_control);
+ OUT_CB(dsa->z_stencil_control);
+ OUT_CB(dsa->stencil_ref_mask);
+ OUT_CB_REG(R500_ZB_STENCILREFMASK_BF, dsa->stencil_ref_bf);
+ END_CB;
+
+ BEGIN_CB(dsa->cb_no_readwrite, 8);
+ OUT_CB_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function);
+ OUT_CB_REG_SEQ(R300_ZB_CNTL, 3);
+ OUT_CB(0);
+ OUT_CB(0);
+ OUT_CB(0);
+ OUT_CB_REG(R500_ZB_STENCILREFMASK_BF, 0);
+ END_CB;
+
+ return (void*)dsa;
+}
+
+static void r300_dsa_inject_stencilref(struct r300_context *r300)
+{
+ struct r300_dsa_state *dsa =
+ (struct r300_dsa_state*)r300->dsa_state.state;
+
+ if (!dsa)
+ return;
+
+ dsa->stencil_ref_mask =
+ (dsa->stencil_ref_mask & ~R300_STENCILREF_MASK) |
+ r300->stencil_ref.ref_value[0];
+ dsa->stencil_ref_bf =
+ (dsa->stencil_ref_bf & ~R300_STENCILREF_MASK) |
+ r300->stencil_ref.ref_value[1];
+}
+
+/* Bind DSA state. */
+static void r300_bind_dsa_state(struct pipe_context* pipe,
+ void* state)
+{
+ struct r300_context* r300 = r300_context(pipe);
+
+ if (!state) {
+ return;
+ }
+
+ UPDATE_STATE(state, r300->dsa_state);
+
+ r300_dsa_inject_stencilref(r300);
+}
+
+/* Free DSA state. */
+static void r300_delete_dsa_state(struct pipe_context* pipe,
+ void* state)
+{
+ FREE(state);
+}
+
+static void r300_set_stencil_ref(struct pipe_context* pipe,
+ const struct pipe_stencil_ref* sr)
+{
+ struct r300_context* r300 = r300_context(pipe);
+
+ r300->stencil_ref = *sr;
+
+ r300_dsa_inject_stencilref(r300);
+ r300->dsa_state.dirty = TRUE;
+}
+
+/* This switcheroo is needed just because of goddamned MACRO_SWITCH. */
+static void r300_fb_set_tiling_flags(struct r300_context *r300,
+ const struct pipe_framebuffer_state *old_state,
+ const struct pipe_framebuffer_state *new_state)
+{
+ struct r300_texture *tex;
+ unsigned i, level;
+
+ /* Set tiling flags for new surfaces. */
+ for (i = 0; i < new_state->nr_cbufs; i++) {
+ tex = r300_texture(new_state->cbufs[i]->texture);
+ level = new_state->cbufs[i]->level;
+
+ r300->rws->buffer_set_tiling(r300->rws, tex->buffer,
+ tex->pitch[0] * util_format_get_blocksize(tex->b.b.format),
+ tex->microtile,
+ tex->mip_macrotile[level]);
+ }
+ if (new_state->zsbuf) {
+ tex = r300_texture(new_state->zsbuf->texture);
+ level = new_state->zsbuf->level;
+
+ r300->rws->buffer_set_tiling(r300->rws, tex->buffer,
+ tex->pitch[0] * util_format_get_blocksize(tex->b.b.format),
+ tex->microtile,
+ tex->mip_macrotile[level]);
+ }
+}
+
+static void r300_print_fb_surf_info(struct pipe_surface *surf, unsigned index,
+ const char *binding)
+{
+ struct pipe_resource *tex = surf->texture;
+ struct r300_texture *rtex = r300_texture(tex);
+
+ fprintf(stderr,
+ "r300: %s[%i] Dim: %ix%i, Offset: %i, ZSlice: %i, "
+ "Face: %i, Level: %i, Format: %s\n"
+
+ "r300: TEX: Macro: %s, Micro: %s, Pitch: %i, "
+ "Dim: %ix%ix%i, LastLevel: %i, Format: %s\n",
+
+ binding, index, surf->width, surf->height, surf->offset,
+ surf->zslice, surf->face, surf->level,
+ util_format_short_name(surf->format),
+
+ rtex->macrotile ? "YES" : " NO", rtex->microtile ? "YES" : " NO",
+ rtex->hwpitch[0], tex->width0, tex->height0, tex->depth0,
+ tex->last_level, util_format_short_name(tex->format));
+}
+
+static void
+ r300_set_framebuffer_state(struct pipe_context* pipe,
+ const struct pipe_framebuffer_state* state)
+{
+ struct r300_context* r300 = r300_context(pipe);
+ struct pipe_framebuffer_state *old_state = r300->fb_state.state;
+ unsigned max_width, max_height, i;
+ uint32_t zbuffer_bpp = 0;
+
+ if (state->nr_cbufs > 4) {
+ fprintf(stderr, "r300: Implementation error: Too many MRTs in %s, "
+ "refusing to bind framebuffer state!\n", __FUNCTION__);
+ return;
+ }
+
+ if (r300->screen->caps.is_r500) {
+ max_width = max_height = 4096;
+ } else if (r300->screen->caps.is_r400) {
+ max_width = max_height = 4021;
+ } else {
+ max_width = max_height = 2560;
+ }
+
+ if (state->width > max_width || state->height > max_height) {
+ fprintf(stderr, "r300: Implementation error: Render targets are too "
+ "big in %s, refusing to bind framebuffer state!\n", __FUNCTION__);
+ return;
+ }
+
+ if (r300->draw) {
+ draw_flush(r300->draw);
+ }
+
+ r300->fb_state.dirty = TRUE;
+
+ /* If nr_cbufs is changed from zero to non-zero or vice versa... */
+ if (!!old_state->nr_cbufs != !!state->nr_cbufs) {
+ r300->blend_state.dirty = TRUE;
+ }
+ /* If zsbuf is set from NULL to non-NULL or vice versa.. */
+ if (!!old_state->zsbuf != !!state->zsbuf) {
+ r300->dsa_state.dirty = TRUE;
+ }
+
+ /* The tiling flags are dependent on the surface miplevel, unfortunately. */
+ r300_fb_set_tiling_flags(r300, r300->fb_state.state, state);
+
+ memcpy(r300->fb_state.state, state, sizeof(struct pipe_framebuffer_state));
+
+ r300->fb_state.size = (10 * state->nr_cbufs) + (2 * (4 - state->nr_cbufs)) +
+ (state->zsbuf ? 10 : 0) + 11;
+
+ /* Polygon offset depends on the zbuffer bit depth. */
+ if (state->zsbuf && r300->polygon_offset_enabled) {
+ switch (util_format_get_blocksize(state->zsbuf->texture->format)) {
+ case 2:
+ zbuffer_bpp = 16;
+ break;
+ case 4:
+ zbuffer_bpp = 24;
+ break;
+ }
+
+ if (r300->zbuffer_bpp != zbuffer_bpp) {
+ r300->zbuffer_bpp = zbuffer_bpp;
+ r300->rs_state.dirty = TRUE;
+ }
+ }
+
+ if (DBG_ON(r300, DBG_FB)) {
+ fprintf(stderr, "r300: set_framebuffer_state:\n");
+ for (i = 0; i < state->nr_cbufs; i++) {
+ r300_print_fb_surf_info(state->cbufs[i], i, "CB");
+ }
+ if (state->zsbuf) {
+ r300_print_fb_surf_info(state->zsbuf, 0, "ZB");
+ }
+ }
+}
+
+/* Create fragment shader state. */
+static void* r300_create_fs_state(struct pipe_context* pipe,
+ const struct pipe_shader_state* shader)
+{
+ struct r300_fragment_shader* fs = NULL;
+
+ fs = (struct r300_fragment_shader*)CALLOC_STRUCT(r300_fragment_shader);
+
+ /* Copy state directly into shader. */
+ fs->state = *shader;
+ fs->state.tokens = tgsi_dup_tokens(shader->tokens);
+
+ return (void*)fs;
+}
+
+void r300_mark_fs_code_dirty(struct r300_context *r300)
+{
+ struct r300_fragment_shader* fs = r300_fs(r300);
+
+ r300->fs.dirty = TRUE;
+ r300->fs_rc_constant_state.dirty = TRUE;
+ r300->fs_constants.dirty = TRUE;
+ r300->fs.size = fs->shader->cb_code_size;
+
+ if (r300->screen->caps.is_r500) {
+ r300->fs_rc_constant_state.size = fs->shader->rc_state_count * 7;
+ r300->fs_constants.size = fs->shader->externals_count * 4 + 3;
+ } else {
+ r300->fs_rc_constant_state.size = fs->shader->rc_state_count * 5;
+ r300->fs_constants.size = fs->shader->externals_count * 4 + 1;
+ }
+}
+
+/* Bind fragment shader state. */
+static void r300_bind_fs_state(struct pipe_context* pipe, void* shader)
+{
+ struct r300_context* r300 = r300_context(pipe);
+ struct r300_fragment_shader* fs = (struct r300_fragment_shader*)shader;
+
+ if (fs == NULL) {
+ r300->fs.state = NULL;
+ return;
+ }
+
+ r300->fs.state = fs;
+ r300_pick_fragment_shader(r300);
+ r300_mark_fs_code_dirty(r300);
+
+ r300->rs_block_state.dirty = TRUE; /* Will be updated before the emission. */
+}
+
+/* Delete fragment shader state. */
+static void r300_delete_fs_state(struct pipe_context* pipe, void* shader)
+{
+ struct r300_fragment_shader* fs = (struct r300_fragment_shader*)shader;
+ struct r300_fragment_shader_code *tmp, *ptr = fs->first;
+
+ while (ptr) {
+ tmp = ptr;
+ ptr = ptr->next;
+ rc_constants_destroy(&tmp->code.constants);
+ FREE(tmp->cb_code);
+ FREE(tmp);
+ }
+ FREE((void*)fs->state.tokens);
+ FREE(shader);
+}
+
+static void r300_set_polygon_stipple(struct pipe_context* pipe,
+ const struct pipe_poly_stipple* state)
+{
+ /* XXX no idea how to set this up, but not terribly important */
+}
+
+/* Create a new rasterizer state based on the CSO rasterizer state.
+ *
+ * This is a very large chunk of state, and covers most of the graphics
+ * backend (GB), geometry assembly (GA), and setup unit (SU) blocks.
+ *
+ * In a not entirely unironic sidenote, this state has nearly nothing to do
+ * with the actual block on the Radeon called the rasterizer (RS). */
+static void* r300_create_rs_state(struct pipe_context* pipe,
+ const struct pipe_rasterizer_state* state)
+{
+ struct r300_rs_state* rs = CALLOC_STRUCT(r300_rs_state);
+ int i;
+ float psiz;
+
+ /* Copy rasterizer state. */
+ rs->rs = *state;
+ rs->rs_draw = *state;
+
+ /* Override some states for Draw. */
+ rs->rs_draw.sprite_coord_enable = 0; /* We can do this in HW. */
+
+#ifdef PIPE_ARCH_LITTLE_ENDIAN
+ rs->vap_control_status = R300_VC_NO_SWAP;
+#else
+ rs->vap_control_status = R300_VC_32BIT_SWAP;
+#endif
+
+ /* If no TCL engine is present, turn off the HW TCL. */
+ if (!r300_screen(pipe->screen)->caps.has_tcl) {
+ rs->vap_control_status |= R300_VAP_TCL_BYPASS;
+ }
+
+ /* Point size width and height. */
+ rs->point_size =
+ pack_float_16_6x(state->point_size) |
+ (pack_float_16_6x(state->point_size) << R300_POINTSIZE_X_SHIFT);
+
+ /* Point size clamping. */
+ if (state->point_size_per_vertex) {
+ /* Per-vertex point size.
+ * Clamp to [0, max FB size] */
+ psiz = pipe->screen->get_paramf(pipe->screen,
+ PIPE_CAP_MAX_POINT_WIDTH);
+ rs->point_minmax =
+ pack_float_16_6x(psiz) << R300_GA_POINT_MINMAX_MAX_SHIFT;
+ } else {
+ /* We cannot disable the point-size vertex output,
+ * so clamp it. */
+ psiz = state->point_size;
+ rs->point_minmax =
+ (pack_float_16_6x(psiz) << R300_GA_POINT_MINMAX_MIN_SHIFT) |
+ (pack_float_16_6x(psiz) << R300_GA_POINT_MINMAX_MAX_SHIFT);
+ }
+
+ /* Line control. */
+ rs->line_control = pack_float_16_6x(state->line_width) |
+ R300_GA_LINE_CNTL_END_TYPE_COMP;
+
+ /* Enable polygon mode */
+ if (state->fill_front != PIPE_POLYGON_MODE_FILL ||
+ state->fill_back != PIPE_POLYGON_MODE_FILL) {
+ rs->polygon_mode = R300_GA_POLY_MODE_DUAL;
+ }
+
+ /* Front face */
+ if (state->front_ccw)
+ rs->cull_mode = R300_FRONT_FACE_CCW;
+ else
+ rs->cull_mode = R300_FRONT_FACE_CW;
+
+ /* Polygon offset */
+ if (util_get_offset(state, state->fill_front)) {
+ rs->polygon_offset_enable |= R300_FRONT_ENABLE;
+ }
+ if (util_get_offset(state, state->fill_back)) {
+ rs->polygon_offset_enable |= R300_BACK_ENABLE;
+ }
+
+ /* Polygon mode */
+ if (rs->polygon_mode) {
+ rs->polygon_mode |=
+ r300_translate_polygon_mode_front(state->fill_front);
+ rs->polygon_mode |=
+ r300_translate_polygon_mode_back(state->fill_back);
+ }
+
+ if (state->cull_face & PIPE_FACE_FRONT) {
+ rs->cull_mode |= R300_CULL_FRONT;
+ }
+ if (state->cull_face & PIPE_FACE_BACK) {
+ rs->cull_mode |= R300_CULL_BACK;
+ }
+
+ if (rs->polygon_offset_enable) {
+ rs->depth_offset = state->offset_units;
+ rs->depth_scale = state->offset_scale;
+ }
+
+ if (state->line_stipple_enable) {
+ rs->line_stipple_config =
+ R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_LINE |
+ (fui((float)state->line_stipple_factor) &
+ R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_MASK);
+ /* XXX this might need to be scaled up */
+ rs->line_stipple_value = state->line_stipple_pattern;
+ }
+
+ if (state->flatshade) {
+ rs->color_control = R300_SHADE_MODEL_FLAT;
+ } else {
+ rs->color_control = R300_SHADE_MODEL_SMOOTH;
+ }
+
+ rs->clip_rule = state->scissor ? 0xAAAA : 0xFFFF;
+
+ /* Point sprites */
+ if (state->sprite_coord_enable) {
+ rs->stuffing_enable = R300_GB_POINT_STUFF_ENABLE;
+ for (i = 0; i < 8; i++) {
+ if (state->sprite_coord_enable & (1 << i))
+ rs->stuffing_enable |=
+ R300_GB_TEX_STR << (R300_GB_TEX0_SOURCE_SHIFT + (i*2));
+ }
+
+ rs->point_texcoord_left = 0.0f;
+ rs->point_texcoord_right = 1.0f;
+
+ switch (state->sprite_coord_mode) {
+ case PIPE_SPRITE_COORD_UPPER_LEFT:
+ rs->point_texcoord_top = 0.0f;
+ rs->point_texcoord_bottom = 1.0f;
+ break;
+ case PIPE_SPRITE_COORD_LOWER_LEFT:
+ rs->point_texcoord_top = 1.0f;
+ rs->point_texcoord_bottom = 0.0f;
+ break;
+ }
+ }
+
+ if (state->gl_rasterization_rules) {
+ rs->multisample_position_0 = 0x66666666;
+ rs->multisample_position_1 = 0x6666666;
+ }
+
+ return (void*)rs;
+}
+
+/* Bind rasterizer state. */
+static void r300_bind_rs_state(struct pipe_context* pipe, void* state)
+{
+ struct r300_context* r300 = r300_context(pipe);
+ struct r300_rs_state* rs = (struct r300_rs_state*)state;
+ int last_sprite_coord_enable = r300->sprite_coord_enable;
+ boolean last_two_sided_color = r300->two_sided_color;
+
+ if (r300->draw && rs) {
+ draw_flush(r300->draw);
+ draw_set_rasterizer_state(r300->draw, &rs->rs_draw, state);
+ }
+
+ if (rs) {
+ r300->polygon_offset_enabled = (rs->rs.offset_point ||
+ rs->rs.offset_line ||
+ rs->rs.offset_tri);
+ r300->sprite_coord_enable = rs->rs.sprite_coord_enable;
+ r300->two_sided_color = rs->rs.light_twoside;
+ } else {
+ r300->polygon_offset_enabled = FALSE;
+ r300->sprite_coord_enable = 0;
+ r300->two_sided_color = FALSE;
+ }
+
+ UPDATE_STATE(state, r300->rs_state);
+ r300->rs_state.size = 25 + (r300->polygon_offset_enabled ? 5 : 0) +
+ (r300->rws->get_value(r300->rws, R300_VID_DRM_2_3_0) ? 5 : 0);
+
+ if (last_sprite_coord_enable != r300->sprite_coord_enable ||
+ last_two_sided_color != r300->two_sided_color) {
+ r300->rs_block_state.dirty = TRUE;
+ }
+}
+
+/* Free rasterizer state. */
+static void r300_delete_rs_state(struct pipe_context* pipe, void* state)
+{
+ FREE(state);
+}
+
+static void*
+ r300_create_sampler_state(struct pipe_context* pipe,
+ const struct pipe_sampler_state* state)
+{
+ struct r300_context* r300 = r300_context(pipe);
+ struct r300_sampler_state* sampler = CALLOC_STRUCT(r300_sampler_state);
+ boolean is_r500 = r300->screen->caps.is_r500;
+ int lod_bias;
+ union util_color uc;
+
+ sampler->state = *state;
+
+ /* r300 doesn't handle CLAMP and MIRROR_CLAMP correctly when either MAG
+ * or MIN filter is NEAREST. Since texwrap produces same results
+ * for CLAMP and CLAMP_TO_EDGE, we use them instead. */
+ if (sampler->state.min_img_filter == PIPE_TEX_FILTER_NEAREST ||
+ sampler->state.mag_img_filter == PIPE_TEX_FILTER_NEAREST) {
+ /* Wrap S. */
+ if (sampler->state.wrap_s == PIPE_TEX_WRAP_CLAMP)
+ sampler->state.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ else if (sampler->state.wrap_s == PIPE_TEX_WRAP_MIRROR_CLAMP)
+ sampler->state.wrap_s = PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE;
+
+ /* Wrap T. */
+ if (sampler->state.wrap_t == PIPE_TEX_WRAP_CLAMP)
+ sampler->state.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ else if (sampler->state.wrap_t == PIPE_TEX_WRAP_MIRROR_CLAMP)
+ sampler->state.wrap_t = PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE;
+
+ /* Wrap R. */
+ if (sampler->state.wrap_r == PIPE_TEX_WRAP_CLAMP)
+ sampler->state.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
+ else if (sampler->state.wrap_r == PIPE_TEX_WRAP_MIRROR_CLAMP)
+ sampler->state.wrap_r = PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE;
+ }
+
+ sampler->filter0 |=
+ (r300_translate_wrap(sampler->state.wrap_s) << R300_TX_WRAP_S_SHIFT) |
+ (r300_translate_wrap(sampler->state.wrap_t) << R300_TX_WRAP_T_SHIFT) |
+ (r300_translate_wrap(sampler->state.wrap_r) << R300_TX_WRAP_R_SHIFT);
+
+ sampler->filter0 |= r300_translate_tex_filters(state->min_img_filter,
+ state->mag_img_filter,
+ state->min_mip_filter,
+ state->max_anisotropy > 0);
+
+ sampler->filter0 |= r300_anisotropy(state->max_anisotropy);
+
+ /* Unfortunately, r300-r500 don't support floating-point mipmap lods. */
+ /* We must pass these to the merge function to clamp them properly. */
+ sampler->min_lod = MAX2((unsigned)state->min_lod, 0);
+ sampler->max_lod = MAX2((unsigned)ceilf(state->max_lod), 0);
+
+ lod_bias = CLAMP((int)(state->lod_bias * 32 + 1), -(1 << 9), (1 << 9) - 1);
+
+ sampler->filter1 |= lod_bias << R300_LOD_BIAS_SHIFT;
+
+ /* This is very high quality anisotropic filtering for R5xx.
+ * It's good for benchmarking the performance of texturing but
+ * in practice we don't want to slow down the driver because it's
+ * a pretty good performance killer. Feel free to play with it. */
+ if (DBG_ON(r300, DBG_ANISOHQ) && is_r500) {
+ sampler->filter1 |= r500_anisotropy(state->max_anisotropy);
+ }
+
+ util_pack_color(state->border_color, PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
+ sampler->border_color = uc.ui;
+
+ /* R500-specific fixups and optimizations */
+ if (r300->screen->caps.is_r500) {
+ sampler->filter1 |= R500_BORDER_FIX;
+ }
+
+ return (void*)sampler;
+}
+
+static void r300_bind_sampler_states(struct pipe_context* pipe,
+ unsigned count,
+ void** states)
+{
+ struct r300_context* r300 = r300_context(pipe);
+ struct r300_textures_state* state =
+ (struct r300_textures_state*)r300->textures_state.state;
+ unsigned tex_units = r300->screen->caps.num_tex_units;
+
+ if (count > tex_units) {
+ return;
+ }
+
+ memcpy(state->sampler_states, states, sizeof(void*) * count);
+ state->sampler_state_count = count;
+
+ r300->textures_state.dirty = TRUE;
+}
+
+static void r300_lacks_vertex_textures(struct pipe_context* pipe,
+ unsigned count,
+ void** states)
+{
+}
+
+static void r300_delete_sampler_state(struct pipe_context* pipe, void* state)
+{
+ FREE(state);
+}
+
+static uint32_t r300_assign_texture_cache_region(unsigned index, unsigned num)
+{
+ /* This looks like a hack, but I believe it's suppose to work like
+ * that. To illustrate how this works, let's assume you have 5 textures.
+ * From docs, 5 and the successive numbers are:
+ *
+ * FOURTH_1 = 5
+ * FOURTH_2 = 6
+ * FOURTH_3 = 7
+ * EIGHTH_0 = 8
+ * EIGHTH_1 = 9
+ *
+ * First 3 textures will get 3/4 of size of the cache, divived evenly
+ * between them. The last 1/4 of the cache must be divided between
+ * the last 2 textures, each will therefore get 1/8 of the cache.
+ * Why not just to use "5 + texture_index" ?
+ *
+ * This simple trick works for all "num" <= 16.
+ */
+ if (num <= 1)
+ return R300_TX_CACHE(R300_TX_CACHE_WHOLE);
+ else
+ return R300_TX_CACHE(num + index);
+}
+
+static void r300_set_fragment_sampler_views(struct pipe_context* pipe,
+ unsigned count,
+ struct pipe_sampler_view** views)
+{
+ struct r300_context* r300 = r300_context(pipe);
+ struct r300_textures_state* state =
+ (struct r300_textures_state*)r300->textures_state.state;
+ struct r300_texture *texture;
+ unsigned i, real_num_views = 0, view_index = 0;
+ unsigned tex_units = r300->screen->caps.num_tex_units;
+ boolean dirty_tex = FALSE;
+
+ if (count > tex_units) {
+ return;
+ }
+
+ /* Calculate the real number of views. */
+ for (i = 0; i < count; i++) {
+ if (views[i])
+ real_num_views++;
+ }
+
+ for (i = 0; i < count; i++) {
+ if (&state->sampler_views[i]->base != views[i]) {
+ pipe_sampler_view_reference(
+ (struct pipe_sampler_view**)&state->sampler_views[i],
+ views[i]);
+
+ if (!views[i]) {
+ continue;
+ }
+
+ /* A new sampler view (= texture)... */
+ dirty_tex = TRUE;
+
+ /* Set the texrect factor in the fragment shader.
+ * Needed for RECT and NPOT fallback. */
+ texture = r300_texture(views[i]->texture);
+ if (texture->uses_pitch) {
+ r300->fs_rc_constant_state.dirty = TRUE;
+ }
+
+ state->sampler_views[i]->texcache_region =
+ r300_assign_texture_cache_region(view_index, real_num_views);
+ view_index++;
+ }
+ }
+
+ for (i = count; i < tex_units; i++) {
+ if (state->sampler_views[i]) {
+ pipe_sampler_view_reference(
+ (struct pipe_sampler_view**)&state->sampler_views[i],
+ NULL);
+ }
+ }
+
+ state->sampler_view_count = count;
+
+ r300->textures_state.dirty = TRUE;
+
+ if (dirty_tex) {
+ r300->texture_cache_inval.dirty = TRUE;
+ }
+}
+
+static struct pipe_sampler_view *
+r300_create_sampler_view(struct pipe_context *pipe,
+ struct pipe_resource *texture,
+ const struct pipe_sampler_view *templ)
+{
+ struct r300_sampler_view *view = CALLOC_STRUCT(r300_sampler_view);
+ struct r300_texture *tex = r300_texture(texture);
+
+ if (view) {
+ view->base = *templ;
+ view->base.reference.count = 1;
+ view->base.context = pipe;
+ view->base.texture = NULL;
+ pipe_resource_reference(&view->base.texture, texture);
+
+ view->swizzle[0] = templ->swizzle_r;
+ view->swizzle[1] = templ->swizzle_g;
+ view->swizzle[2] = templ->swizzle_b;
+ view->swizzle[3] = templ->swizzle_a;
+
+ view->format = tex->tx_format;
+ view->format.format1 |= r300_translate_texformat(templ->format,
+ view->swizzle);
+ if (r300_screen(pipe->screen)->caps.is_r500) {
+ view->format.format2 |= r500_tx_format_msb_bit(templ->format);
+ }
+ }
+
+ return (struct pipe_sampler_view*)view;
+}
+
+static void
+r300_sampler_view_destroy(struct pipe_context *pipe,
+ struct pipe_sampler_view *view)
+{
+ pipe_resource_reference(&view->texture, NULL);
+ FREE(view);
+}
+
+static void r300_set_scissor_state(struct pipe_context* pipe,
+ const struct pipe_scissor_state* state)
+{
+ struct r300_context* r300 = r300_context(pipe);
+
+ memcpy(r300->scissor_state.state, state,
+ sizeof(struct pipe_scissor_state));
+
+ r300->scissor_state.dirty = TRUE;
+}
+
+static void r300_set_viewport_state(struct pipe_context* pipe,
+ const struct pipe_viewport_state* state)
+{
+ struct r300_context* r300 = r300_context(pipe);
+ struct r300_viewport_state* viewport =
+ (struct r300_viewport_state*)r300->viewport_state.state;
+
+ r300->viewport = *state;
+
+ if (r300->draw) {
+ draw_flush(r300->draw);
+ draw_set_viewport_state(r300->draw, state);
+ viewport->vte_control = R300_VTX_XY_FMT | R300_VTX_Z_FMT;
+ return;
+ }
+
+ /* Do the transform in HW. */
+ viewport->vte_control = R300_VTX_W0_FMT;
+
+ if (state->scale[0] != 1.0f) {
+ viewport->xscale = state->scale[0];
+ viewport->vte_control |= R300_VPORT_X_SCALE_ENA;
+ }
+ if (state->scale[1] != 1.0f) {
+ viewport->yscale = state->scale[1];
+ viewport->vte_control |= R300_VPORT_Y_SCALE_ENA;
+ }
+ if (state->scale[2] != 1.0f) {
+ viewport->zscale = state->scale[2];
+ viewport->vte_control |= R300_VPORT_Z_SCALE_ENA;
+ }
+ if (state->translate[0] != 0.0f) {
+ viewport->xoffset = state->translate[0];
+ viewport->vte_control |= R300_VPORT_X_OFFSET_ENA;
+ }
+ if (state->translate[1] != 0.0f) {
+ viewport->yoffset = state->translate[1];
+ viewport->vte_control |= R300_VPORT_Y_OFFSET_ENA;
+ }
+ if (state->translate[2] != 0.0f) {
+ viewport->zoffset = state->translate[2];
+ viewport->vte_control |= R300_VPORT_Z_OFFSET_ENA;
+ }
+
+ r300->viewport_state.dirty = TRUE;
+ if (r300->fs.state && r300_fs(r300)->shader->inputs.wpos != ATTR_UNUSED) {
+ r300->fs_rc_constant_state.dirty = TRUE;
+ }
+}
+
+static void r300_set_vertex_buffers(struct pipe_context* pipe,
+ unsigned count,
+ const struct pipe_vertex_buffer* buffers)
+{
+ struct r300_context* r300 = r300_context(pipe);
+ struct pipe_vertex_buffer *vbo;
+ unsigned i, max_index = (1 << 24) - 1;
+ boolean any_user_buffer = FALSE;
+
+ if (count == r300->vertex_buffer_count &&
+ memcmp(r300->vertex_buffer, buffers,
+ sizeof(struct pipe_vertex_buffer) * count) == 0) {
+ return;
+ }
+
+ if (r300->screen->caps.has_tcl) {
+ /* HW TCL. */
+ r300->incompatible_vb_layout = FALSE;
+
+ /* Check if the strides and offsets are aligned to the size of DWORD. */
+ for (i = 0; i < count; i++) {
+ if (buffers[i].buffer) {
+ if (buffers[i].stride % 4 != 0 ||
+ buffers[i].buffer_offset % 4 != 0) {
+ r300->incompatible_vb_layout = TRUE;
+ break;
+ }
+ }
+ }
+
+ for (i = 0; i < count; i++) {
+ /* Why, yes, I AM casting away constness. How did you know? */
+ vbo = (struct pipe_vertex_buffer*)&buffers[i];
+
+ /* Skip NULL buffers */
+ if (!buffers[i].buffer) {
+ continue;
+ }
+
+ if (r300_buffer_is_user_buffer(vbo->buffer)) {
+ any_user_buffer = TRUE;
+ }
+
+ if (vbo->max_index == ~0) {
+ /* if no VBO stride then only one vertex value so max index is 1 */
+ /* should think about converting to VS constants like svga does */
+ if (!vbo->stride)
+ vbo->max_index = 1;
+ else
+ vbo->max_index =
+ (vbo->buffer->width0 - vbo->buffer_offset) / vbo->stride;
+ }
+
+ max_index = MIN2(vbo->max_index, max_index);
+ }
+
+ r300->any_user_vbs = any_user_buffer;
+ r300->vertex_buffer_max_index = max_index;
+
+ } else {
+ /* SW TCL. */
+ draw_flush(r300->draw);
+ draw_set_vertex_buffers(r300->draw, count, buffers);
+ }
+
+ /* Common code. */
+ for (i = 0; i < count; i++) {
+ /* Reference our buffer. */
+ pipe_resource_reference(&r300->vertex_buffer[i].buffer, buffers[i].buffer);
+ }
+ for (; i < r300->vertex_buffer_count; i++) {
+ /* Dereference any old buffers. */
+ pipe_resource_reference(&r300->vertex_buffer[i].buffer, NULL);
+ }
+
+ memcpy(r300->vertex_buffer, buffers,
+ sizeof(struct pipe_vertex_buffer) * count);
+ r300->vertex_buffer_count = count;
+}
+
+/* Initialize the PSC tables. */
+static void r300_vertex_psc(struct r300_vertex_element_state *velems)
+{
+ struct r300_vertex_stream_state *vstream = &velems->vertex_stream;
+ uint16_t type, swizzle;
+ enum pipe_format format;
+ unsigned i;
+
+ if (velems->count > 16) {
+ fprintf(stderr, "r300: More than 16 vertex elements are not supported,"
+ " requested %i, using 16.\n", velems->count);
+ velems->count = 16;
+ }
+
+ /* Vertex shaders have no semantics on their inputs,
+ * so PSC should just route stuff based on the vertex elements,
+ * and not on attrib information. */
+ for (i = 0; i < velems->count; i++) {
+ format = velems->hw_format[i];
+
+ type = r300_translate_vertex_data_type(format);
+ if (type == R300_INVALID_FORMAT) {
+ fprintf(stderr, "r300: Bad vertex format %s.\n",
+ util_format_short_name(format));
+ assert(0);
+ abort();
+ }
+
+ type |= i << R300_DST_VEC_LOC_SHIFT;
+ swizzle = r300_translate_vertex_data_swizzle(format);
+
+ if (i & 1) {
+ vstream->vap_prog_stream_cntl[i >> 1] |= type << 16;
+ vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16;
+ } else {
+ vstream->vap_prog_stream_cntl[i >> 1] |= type;
+ vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle;
+ }
+ }
+
+ /* Set the last vector in the PSC. */
+ if (i) {
+ i -= 1;
+ }
+ vstream->vap_prog_stream_cntl[i >> 1] |=
+ (R300_LAST_VEC << (i & 1 ? 16 : 0));
+
+ vstream->count = (i >> 1) + 1;
+}
+
+#define FORMAT_REPLACE(what, withwhat) \
+ case PIPE_FORMAT_##what: *format = PIPE_FORMAT_##withwhat; break
+
+static void* r300_create_vertex_elements_state(struct pipe_context* pipe,
+ unsigned count,
+ const struct pipe_vertex_element* attribs)
+{
+ struct r300_vertex_element_state *velems;
+ unsigned i;
+ enum pipe_format *format;
+
+ assert(count <= PIPE_MAX_ATTRIBS);
+ velems = CALLOC_STRUCT(r300_vertex_element_state);
+ if (velems != NULL) {
+ velems->count = count;
+ memcpy(velems->velem, attribs, sizeof(struct pipe_vertex_element) * count);
+
+ if (r300_screen(pipe->screen)->caps.has_tcl) {
+ /* Set the best hw format in case the original format is not
+ * supported by hw. */
+ for (i = 0; i < count; i++) {
+ velems->hw_format[i] = velems->velem[i].src_format;
+ format = &velems->hw_format[i];
+
+ /* This is basically the list of unsupported formats.
+ * For now we don't care about the alignment, that's going to
+ * be sorted out after the PSC setup. */
+ switch (*format) {
+ FORMAT_REPLACE(R64_FLOAT, R32_FLOAT);
+ FORMAT_REPLACE(R64G64_FLOAT, R32G32_FLOAT);
+ FORMAT_REPLACE(R64G64B64_FLOAT, R32G32B32_FLOAT);
+ FORMAT_REPLACE(R64G64B64A64_FLOAT, R32G32B32A32_FLOAT);
+
+ FORMAT_REPLACE(R32_UNORM, R32_FLOAT);
+ FORMAT_REPLACE(R32G32_UNORM, R32G32_FLOAT);
+ FORMAT_REPLACE(R32G32B32_UNORM, R32G32B32_FLOAT);
+ FORMAT_REPLACE(R32G32B32A32_UNORM, R32G32B32A32_FLOAT);
+
+ FORMAT_REPLACE(R32_USCALED, R32_FLOAT);
+ FORMAT_REPLACE(R32G32_USCALED, R32G32_FLOAT);
+ FORMAT_REPLACE(R32G32B32_USCALED, R32G32B32_FLOAT);
+ FORMAT_REPLACE(R32G32B32A32_USCALED,R32G32B32A32_FLOAT);
+
+ FORMAT_REPLACE(R32_SNORM, R32_FLOAT);
+ FORMAT_REPLACE(R32G32_SNORM, R32G32_FLOAT);
+ FORMAT_REPLACE(R32G32B32_SNORM, R32G32B32_FLOAT);
+ FORMAT_REPLACE(R32G32B32A32_SNORM, R32G32B32A32_FLOAT);
+
+ FORMAT_REPLACE(R32_SSCALED, R32_FLOAT);
+ FORMAT_REPLACE(R32G32_SSCALED, R32G32_FLOAT);
+ FORMAT_REPLACE(R32G32B32_SSCALED, R32G32B32_FLOAT);
+ FORMAT_REPLACE(R32G32B32A32_SSCALED,R32G32B32A32_FLOAT);
+
+ FORMAT_REPLACE(R32_FIXED, R32_FLOAT);
+ FORMAT_REPLACE(R32G32_FIXED, R32G32_FLOAT);
+ FORMAT_REPLACE(R32G32B32_FIXED, R32G32B32_FLOAT);
+ FORMAT_REPLACE(R32G32B32A32_FIXED, R32G32B32A32_FLOAT);
+
+ default:;
+ }
+
+ velems->incompatible_layout =
+ velems->incompatible_layout ||
+ velems->velem[i].src_format != velems->hw_format[i] ||
+ velems->velem[i].src_offset % 4 != 0;
+ }
+
+ /* Now setup PSC.
+ * The unused components will be replaced by (..., 0, 1). */
+ r300_vertex_psc(velems);
+
+ /* Align the formats to the size of DWORD.
+ * We only care about the blocksizes of the formats since
+ * swizzles are already set up.
+ * Also compute the vertex size. */
+ for (i = 0; i < count; i++) {
+ /* This is OK because we check for aligned strides too. */
+ velems->hw_format_size[i] =
+ align(util_format_get_blocksize(velems->hw_format[i]), 4);
+ velems->vertex_size_dwords += velems->hw_format_size[i] / 4;
+ }
+ }
+ }
+ return velems;
+}
+
+static void r300_bind_vertex_elements_state(struct pipe_context *pipe,
+ void *state)
+{
+ struct r300_context *r300 = r300_context(pipe);
+ struct r300_vertex_element_state *velems = state;
+
+ if (velems == NULL) {
+ return;
+ }
+
+ r300->velems = velems;
+
+ if (r300->draw) {
+ draw_flush(r300->draw);
+ draw_set_vertex_elements(r300->draw, velems->count, velems->velem);
+ return;
+ }
+
+ UPDATE_STATE(&velems->vertex_stream, r300->vertex_stream_state);
+ r300->vertex_stream_state.size = (1 + velems->vertex_stream.count) * 2;
+}
+
+static void r300_delete_vertex_elements_state(struct pipe_context *pipe, void *state)
+{
+ FREE(state);
+}
+
+static void* r300_create_vs_state(struct pipe_context* pipe,
+ const struct pipe_shader_state* shader)
+{
+ struct r300_context* r300 = r300_context(pipe);
+
+ struct r300_vertex_shader* vs = CALLOC_STRUCT(r300_vertex_shader);
+
+ /* Copy state directly into shader. */
+ vs->state = *shader;
+ vs->state.tokens = tgsi_dup_tokens(shader->tokens);
+
+ if (r300->screen->caps.has_tcl) {
+ r300_init_vs_outputs(vs);
+ r300_translate_vertex_shader(r300, vs);
+ } else {
+ r300_draw_init_vertex_shader(r300->draw, vs);
+ }
+
+ return vs;
+}
+
+static void r300_bind_vs_state(struct pipe_context* pipe, void* shader)
+{
+ struct r300_context* r300 = r300_context(pipe);
+ struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader;
+
+ if (vs == NULL) {
+ r300->vs_state.state = NULL;
+ return;
+ }
+ if (vs == r300->vs_state.state) {
+ return;
+ }
+ r300->vs_state.state = vs;
+
+ /* The majority of the RS block bits is dependent on the vertex shader. */
+ r300->rs_block_state.dirty = TRUE; /* Will be updated before the emission. */
+
+ if (r300->screen->caps.has_tcl) {
+ r300->vs_state.dirty = TRUE;
+ r300->vs_state.size =
+ vs->code.length + 9 +
+ (vs->immediates_count ? vs->immediates_count * 4 + 3 : 0);
+
+ if (vs->externals_count) {
+ r300->vs_constants.dirty = TRUE;
+ r300->vs_constants.size = vs->externals_count * 4 + 3;
+ } else {
+ r300->vs_constants.size = 0;
+ }
+
+ r300->pvs_flush.dirty = TRUE;
+ } else {
+ draw_flush(r300->draw);
+ draw_bind_vertex_shader(r300->draw,
+ (struct draw_vertex_shader*)vs->draw_vs);
+ }
+}
+
+static void r300_delete_vs_state(struct pipe_context* pipe, void* shader)
+{
+ struct r300_context* r300 = r300_context(pipe);
+ struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader;
+
+ if (r300->screen->caps.has_tcl) {
+ rc_constants_destroy(&vs->code.constants);
+ } else {
+ draw_delete_vertex_shader(r300->draw,
+ (struct draw_vertex_shader*)vs->draw_vs);
+ }
+
+ FREE((void*)vs->state.tokens);
+ FREE(shader);
+}
+
+static void r300_set_constant_buffer(struct pipe_context *pipe,
+ uint shader, uint index,
+ struct pipe_resource *buf)
+{
+ struct r300_context* r300 = r300_context(pipe);
+ struct r300_constant_buffer *cbuf;
+ struct pipe_transfer *tr;
+ float *mapped;
+ int max_size = 0, max_size_bytes = 0, clamped_size = 0;
+
+ switch (shader) {
+ case PIPE_SHADER_VERTEX:
+ cbuf = (struct r300_constant_buffer*)r300->vs_constants.state;
+ max_size = 256;
+ break;
+ case PIPE_SHADER_FRAGMENT:
+ cbuf = (struct r300_constant_buffer*)r300->fs_constants.state;
+ if (r300->screen->caps.is_r500) {
+ max_size = 256;
+ } else {
+ max_size = 32;
+ }
+ break;
+ default:
+ assert(0);
+ return;
+ }
+ max_size_bytes = max_size * 4 * sizeof(float);
+
+ if (buf == NULL || buf->width0 == 0 ||
+ (mapped = pipe_buffer_map(pipe, buf, PIPE_TRANSFER_READ, &tr)) == NULL)
+ {
+ cbuf->count = 0;
+ return;
+ }
+
+ if (shader == PIPE_SHADER_FRAGMENT ||
+ (shader == PIPE_SHADER_VERTEX && r300->screen->caps.has_tcl)) {
+ assert((buf->width0 % (4 * sizeof(float))) == 0);
+
+ /* Check the size of the constant buffer. */
+ /* XXX Subtract immediates and RC_STATE_* variables. */
+ if (buf->width0 > max_size_bytes) {
+ fprintf(stderr, "r300: Max size of the constant buffer is "
+ "%i*4 floats.\n", max_size);
+ }
+
+ clamped_size = MIN2(buf->width0, max_size_bytes);
+ cbuf->count = clamped_size / (4 * sizeof(float));
+
+ if (shader == PIPE_SHADER_FRAGMENT && !r300->screen->caps.is_r500) {
+ unsigned i,j;
+
+ /* Convert constants to float24. */
+ for (i = 0; i < cbuf->count; i++)
+ for (j = 0; j < 4; j++)
+ cbuf->constants[i][j] = pack_float24(mapped[i*4+j]);
+ } else {
+ memcpy(cbuf->constants, mapped, clamped_size);
+ }
+ }
+
+ if (shader == PIPE_SHADER_VERTEX) {
+ if (r300->screen->caps.has_tcl) {
+ if (r300->vs_constants.size) {
+ r300->vs_constants.dirty = TRUE;
+ }
+ r300->pvs_flush.dirty = TRUE;
+ } else if (r300->draw) {
+ draw_set_mapped_constant_buffer(r300->draw, PIPE_SHADER_VERTEX,
+ 0, mapped, buf->width0);
+ }
+ } else if (shader == PIPE_SHADER_FRAGMENT) {
+ r300->fs_constants.dirty = TRUE;
+ }
+
+ pipe_buffer_unmap(pipe, buf, tr);
+}
+
+void r300_init_state_functions(struct r300_context* r300)
+{
+ r300->context.create_blend_state = r300_create_blend_state;
+ r300->context.bind_blend_state = r300_bind_blend_state;
+ r300->context.delete_blend_state = r300_delete_blend_state;
+
+ r300->context.set_blend_color = r300_set_blend_color;
+
+ r300->context.set_clip_state = r300_set_clip_state;
+ r300->context.set_sample_mask = r300_set_sample_mask;
+
+ r300->context.set_constant_buffer = r300_set_constant_buffer;
+
+ r300->context.create_depth_stencil_alpha_state = r300_create_dsa_state;
+ r300->context.bind_depth_stencil_alpha_state = r300_bind_dsa_state;
+ r300->context.delete_depth_stencil_alpha_state = r300_delete_dsa_state;
+
+ r300->context.set_stencil_ref = r300_set_stencil_ref;
+
+ r300->context.set_framebuffer_state = r300_set_framebuffer_state;
+
+ r300->context.create_fs_state = r300_create_fs_state;
+ r300->context.bind_fs_state = r300_bind_fs_state;
+ r300->context.delete_fs_state = r300_delete_fs_state;
+
+ r300->context.set_polygon_stipple = r300_set_polygon_stipple;
+
+ r300->context.create_rasterizer_state = r300_create_rs_state;
+ r300->context.bind_rasterizer_state = r300_bind_rs_state;
+ r300->context.delete_rasterizer_state = r300_delete_rs_state;
+
+ r300->context.create_sampler_state = r300_create_sampler_state;
+ r300->context.bind_fragment_sampler_states = r300_bind_sampler_states;
+ r300->context.bind_vertex_sampler_states = r300_lacks_vertex_textures;
+ r300->context.delete_sampler_state = r300_delete_sampler_state;
+
+ r300->context.set_fragment_sampler_views = r300_set_fragment_sampler_views;
+ r300->context.create_sampler_view = r300_create_sampler_view;
+ r300->context.sampler_view_destroy = r300_sampler_view_destroy;
+
+ r300->context.set_scissor_state = r300_set_scissor_state;
+
+ r300->context.set_viewport_state = r300_set_viewport_state;
+
+ r300->context.set_vertex_buffers = r300_set_vertex_buffers;
+
+ r300->context.create_vertex_elements_state = r300_create_vertex_elements_state;
+ r300->context.bind_vertex_elements_state = r300_bind_vertex_elements_state;
+ r300->context.delete_vertex_elements_state = r300_delete_vertex_elements_state;
+
+ r300->context.create_vs_state = r300_create_vs_state;
+ r300->context.bind_vs_state = r300_bind_vs_state;
+ r300->context.delete_vs_state = r300_delete_vs_state;
+}
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
new file mode 100644
index 0000000000..3aa8deb63c
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -0,0 +1,684 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Copyright 2009 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "draw/draw_context.h"
+
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "r300_context.h"
+#include "r300_fs.h"
+#include "r300_hyperz.h"
+#include "r300_screen.h"
+#include "r300_shader_semantics.h"
+#include "r300_state_derived.h"
+#include "r300_state_inlines.h"
+#include "r300_texture.h"
+#include "r300_vs.h"
+
+/* r300_state_derived: Various bits of state which are dependent upon
+ * currently bound CSO data. */
+
+enum r300_rs_swizzle {
+ SWIZ_XYZW = 0,
+ SWIZ_X001,
+ SWIZ_XY01,
+ SWIZ_0001,
+};
+
+static void r300_draw_emit_attrib(struct r300_context* r300,
+ enum attrib_emit emit,
+ enum interp_mode interp,
+ int index)
+{
+ struct r300_vertex_shader* vs = r300->vs_state.state;
+ struct tgsi_shader_info* info = &vs->info;
+ int output;
+
+ output = draw_find_shader_output(r300->draw,
+ info->output_semantic_name[index],
+ info->output_semantic_index[index]);
+ draw_emit_vertex_attr(&r300->vertex_info, emit, interp, output);
+}
+
+static void r300_draw_emit_all_attribs(struct r300_context* r300)
+{
+ struct r300_vertex_shader* vs = r300->vs_state.state;
+ struct r300_shader_semantics* vs_outputs = &vs->outputs;
+ int i, gen_count;
+
+ /* Position. */
+ if (vs_outputs->pos != ATTR_UNUSED) {
+ r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
+ vs_outputs->pos);
+ } else {
+ assert(0);
+ }
+
+ /* Point size. */
+ if (vs_outputs->psize != ATTR_UNUSED) {
+ r300_draw_emit_attrib(r300, EMIT_1F_PSIZE, INTERP_POS,
+ vs_outputs->psize);
+ }
+
+ /* Colors. */
+ for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+ if (vs_outputs->color[i] != ATTR_UNUSED) {
+ r300_draw_emit_attrib(r300, EMIT_4F, INTERP_LINEAR,
+ vs_outputs->color[i]);
+ }
+ }
+
+ /* Back-face colors. */
+ for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+ if (vs_outputs->bcolor[i] != ATTR_UNUSED) {
+ r300_draw_emit_attrib(r300, EMIT_4F, INTERP_LINEAR,
+ vs_outputs->bcolor[i]);
+ }
+ }
+
+ /* Texture coordinates. */
+ /* Only 8 generic vertex attributes can be used. If there are more,
+ * they won't be rasterized. */
+ gen_count = 0;
+ for (i = 0; i < ATTR_GENERIC_COUNT && gen_count < 8; i++) {
+ if (vs_outputs->generic[i] != ATTR_UNUSED) {
+ r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
+ vs_outputs->generic[i]);
+ gen_count++;
+ }
+ }
+
+ /* Fog coordinates. */
+ if (gen_count < 8 && vs_outputs->fog != ATTR_UNUSED) {
+ r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
+ vs_outputs->fog);
+ gen_count++;
+ }
+
+ /* WPOS. */
+ if (r300_fs(r300)->shader->inputs.wpos != ATTR_UNUSED && gen_count < 8) {
+ DBG(r300, DBG_DRAW, "draw_emit_attrib: WPOS, index: %i\n",
+ vs_outputs->wpos);
+ r300_draw_emit_attrib(r300, EMIT_4F, INTERP_PERSPECTIVE,
+ vs_outputs->wpos);
+ }
+}
+
+/* Update the PSC tables for SW TCL, using Draw. */
+static void r300_swtcl_vertex_psc(struct r300_context *r300)
+{
+ struct r300_vertex_stream_state *vstream = r300->vertex_stream_state.state;
+ struct vertex_info *vinfo = &r300->vertex_info;
+ uint16_t type, swizzle;
+ enum pipe_format format;
+ unsigned i, attrib_count;
+ int* vs_output_tab = r300->stream_loc_notcl;
+
+ memset(vstream, 0, sizeof(struct r300_vertex_stream_state));
+
+ /* For each Draw attribute, route it to the fragment shader according
+ * to the vs_output_tab. */
+ attrib_count = vinfo->num_attribs;
+ DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count);
+ for (i = 0; i < attrib_count; i++) {
+ DBG(r300, DBG_DRAW, "r300: attrib: index %d, interp %d, emit %d,"
+ " vs_output_tab %d\n", vinfo->attrib[i].src_index,
+ vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit,
+ vs_output_tab[i]);
+
+ /* Make sure we have a proper destination for our attribute. */
+ assert(vs_output_tab[i] != -1);
+
+ format = draw_translate_vinfo_format(vinfo->attrib[i].emit);
+
+ /* Obtain the type of data in this attribute. */
+ type = r300_translate_vertex_data_type(format);
+ if (type == R300_INVALID_FORMAT) {
+ fprintf(stderr, "r300: Bad vertex format %s.\n",
+ util_format_short_name(format));
+ assert(0);
+ abort();
+ }
+
+ type |= vs_output_tab[i] << R300_DST_VEC_LOC_SHIFT;
+
+ /* Obtain the swizzle for this attribute. Note that the default
+ * swizzle in the hardware is not XYZW! */
+ swizzle = r300_translate_vertex_data_swizzle(format);
+
+ /* Add the attribute to the PSC table. */
+ if (i & 1) {
+ vstream->vap_prog_stream_cntl[i >> 1] |= type << 16;
+ vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16;
+ } else {
+ vstream->vap_prog_stream_cntl[i >> 1] |= type;
+ vstream->vap_prog_stream_cntl_ext[i >> 1] |= swizzle;
+ }
+ }
+
+ /* Set the last vector in the PSC. */
+ if (i) {
+ i -= 1;
+ }
+ vstream->vap_prog_stream_cntl[i >> 1] |=
+ (R300_LAST_VEC << (i & 1 ? 16 : 0));
+
+ vstream->count = (i >> 1) + 1;
+ r300->vertex_stream_state.dirty = TRUE;
+ r300->vertex_stream_state.size = (1 + vstream->count) * 2;
+}
+
+static void r300_rs_col(struct r300_rs_block* rs, int id, int ptr,
+ enum r300_rs_swizzle swiz)
+{
+ rs->ip[id] |= R300_RS_COL_PTR(ptr);
+ if (swiz == SWIZ_0001) {
+ rs->ip[id] |= R300_RS_COL_FMT(R300_RS_COL_FMT_0001);
+ } else {
+ rs->ip[id] |= R300_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
+ }
+ rs->inst[id] |= R300_RS_INST_COL_ID(id);
+}
+
+static void r300_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset)
+{
+ rs->inst[id] |= R300_RS_INST_COL_CN_WRITE |
+ R300_RS_INST_COL_ADDR(fp_offset);
+}
+
+static void r300_rs_tex(struct r300_rs_block* rs, int id, int ptr,
+ enum r300_rs_swizzle swiz)
+{
+ if (swiz == SWIZ_X001) {
+ rs->ip[id] |= R300_RS_TEX_PTR(ptr*4) |
+ R300_RS_SEL_S(R300_RS_SEL_C0) |
+ R300_RS_SEL_T(R300_RS_SEL_K0) |
+ R300_RS_SEL_R(R300_RS_SEL_K0) |
+ R300_RS_SEL_Q(R300_RS_SEL_K1);
+ } else if (swiz == SWIZ_XY01) {
+ rs->ip[id] |= R300_RS_TEX_PTR(ptr*4) |
+ R300_RS_SEL_S(R300_RS_SEL_C0) |
+ R300_RS_SEL_T(R300_RS_SEL_C1) |
+ R300_RS_SEL_R(R300_RS_SEL_K0) |
+ R300_RS_SEL_Q(R300_RS_SEL_K1);
+ } else {
+ rs->ip[id] |= R300_RS_TEX_PTR(ptr*4) |
+ R300_RS_SEL_S(R300_RS_SEL_C0) |
+ R300_RS_SEL_T(R300_RS_SEL_C1) |
+ R300_RS_SEL_R(R300_RS_SEL_C2) |
+ R300_RS_SEL_Q(R300_RS_SEL_C3);
+ }
+ rs->inst[id] |= R300_RS_INST_TEX_ID(id);
+}
+
+static void r300_rs_tex_write(struct r300_rs_block* rs, int id, int fp_offset)
+{
+ rs->inst[id] |= R300_RS_INST_TEX_CN_WRITE |
+ R300_RS_INST_TEX_ADDR(fp_offset);
+}
+
+static void r500_rs_col(struct r300_rs_block* rs, int id, int ptr,
+ enum r300_rs_swizzle swiz)
+{
+ rs->ip[id] |= R500_RS_COL_PTR(ptr);
+ if (swiz == SWIZ_0001) {
+ rs->ip[id] |= R500_RS_COL_FMT(R300_RS_COL_FMT_0001);
+ } else {
+ rs->ip[id] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGBA);
+ }
+ rs->inst[id] |= R500_RS_INST_COL_ID(id);
+}
+
+static void r500_rs_col_write(struct r300_rs_block* rs, int id, int fp_offset)
+{
+ rs->inst[id] |= R500_RS_INST_COL_CN_WRITE |
+ R500_RS_INST_COL_ADDR(fp_offset);
+}
+
+static void r500_rs_tex(struct r300_rs_block* rs, int id, int ptr,
+ enum r300_rs_swizzle swiz)
+{
+ int rs_tex_comp = ptr*4;
+
+ if (swiz == SWIZ_X001) {
+ rs->ip[id] |= R500_RS_SEL_S(rs_tex_comp) |
+ R500_RS_SEL_T(R500_RS_IP_PTR_K0) |
+ R500_RS_SEL_R(R500_RS_IP_PTR_K0) |
+ R500_RS_SEL_Q(R500_RS_IP_PTR_K1);
+ } else if (swiz == SWIZ_XY01) {
+ rs->ip[id] |= R500_RS_SEL_S(rs_tex_comp) |
+ R500_RS_SEL_T(rs_tex_comp + 1) |
+ R500_RS_SEL_R(R500_RS_IP_PTR_K0) |
+ R500_RS_SEL_Q(R500_RS_IP_PTR_K1);
+ } else {
+ rs->ip[id] |= R500_RS_SEL_S(rs_tex_comp) |
+ R500_RS_SEL_T(rs_tex_comp + 1) |
+ R500_RS_SEL_R(rs_tex_comp + 2) |
+ R500_RS_SEL_Q(rs_tex_comp + 3);
+ }
+ rs->inst[id] |= R500_RS_INST_TEX_ID(id);
+}
+
+static void r500_rs_tex_write(struct r300_rs_block* rs, int id, int fp_offset)
+{
+ rs->inst[id] |= R500_RS_INST_TEX_CN_WRITE |
+ R500_RS_INST_TEX_ADDR(fp_offset);
+}
+
+/* Set up the RS block.
+ *
+ * This is the part of the chipset that is responsible for linking vertex
+ * and fragment shaders and stuffed texture coordinates.
+ *
+ * The rasterizer reads data from VAP, which produces vertex shader outputs,
+ * and GA, which produces stuffed texture coordinates. VAP outputs have
+ * precedence over GA. All outputs must be rasterized otherwise it locks up.
+ * If there are more outputs rasterized than is set in VAP/GA, it locks up
+ * too. The funky part is that this info has been pretty much obtained by trial
+ * and error. */
+static void r300_update_rs_block(struct r300_context *r300)
+{
+ struct r300_vertex_shader *vs = r300->vs_state.state;
+ struct r300_shader_semantics *vs_outputs = &vs->outputs;
+ struct r300_shader_semantics *fs_inputs = &r300_fs(r300)->shader->inputs;
+ struct r300_rs_block rs = {0};
+ int i, col_count = 0, tex_count = 0, fp_offset = 0, count, loc = 0;
+ void (*rX00_rs_col)(struct r300_rs_block*, int, int, enum r300_rs_swizzle);
+ void (*rX00_rs_col_write)(struct r300_rs_block*, int, int);
+ void (*rX00_rs_tex)(struct r300_rs_block*, int, int, enum r300_rs_swizzle);
+ void (*rX00_rs_tex_write)(struct r300_rs_block*, int, int);
+ boolean any_bcolor_used = vs_outputs->bcolor[0] != ATTR_UNUSED ||
+ vs_outputs->bcolor[1] != ATTR_UNUSED;
+ int *stream_loc_notcl = r300->stream_loc_notcl;
+
+ if (r300->screen->caps.is_r500) {
+ rX00_rs_col = r500_rs_col;
+ rX00_rs_col_write = r500_rs_col_write;
+ rX00_rs_tex = r500_rs_tex;
+ rX00_rs_tex_write = r500_rs_tex_write;
+ } else {
+ rX00_rs_col = r300_rs_col;
+ rX00_rs_col_write = r300_rs_col_write;
+ rX00_rs_tex = r300_rs_tex;
+ rX00_rs_tex_write = r300_rs_tex_write;
+ }
+
+ /* The position is always present in VAP. */
+ rs.vap_vsm_vtx_assm |= R300_INPUT_CNTL_POS;
+ rs.vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT;
+ stream_loc_notcl[loc++] = 0;
+
+ /* Set up the point size in VAP. */
+ if (vs_outputs->psize != ATTR_UNUSED) {
+ rs.vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
+ stream_loc_notcl[loc++] = 1;
+ }
+
+ /* Set up and rasterize colors. */
+ for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+ if (vs_outputs->color[i] != ATTR_UNUSED || any_bcolor_used ||
+ vs_outputs->color[1] != ATTR_UNUSED) {
+ /* Set up the color in VAP. */
+ rs.vap_vsm_vtx_assm |= R300_INPUT_CNTL_COLOR;
+ rs.vap_out_vtx_fmt[0] |=
+ R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i;
+ stream_loc_notcl[loc++] = 2 + i;
+
+ /* Rasterize it. */
+ rX00_rs_col(&rs, col_count, col_count, SWIZ_XYZW);
+
+ /* Write it to the FS input register if it's needed by the FS. */
+ if (fs_inputs->color[i] != ATTR_UNUSED) {
+ rX00_rs_col_write(&rs, col_count, fp_offset);
+ fp_offset++;
+
+ DBG(r300, DBG_RS,
+ "r300: Rasterized color %i written to FS.\n", i);
+ } else {
+ DBG(r300, DBG_RS, "r300: Rasterized color %i unused.\n", i);
+ }
+ col_count++;
+ } else {
+ /* Skip the FS input register, leave it uninitialized. */
+ /* If we try to set it to (0,0,0,1), it will lock up. */
+ if (fs_inputs->color[i] != ATTR_UNUSED) {
+ fp_offset++;
+
+ DBG(r300, DBG_RS, "r300: FS input color %i unassigned%s.\n",
+ i);
+ }
+ }
+ }
+
+ /* Set up back-face colors. The rasterizer will do the color selection
+ * automatically. */
+ if (any_bcolor_used) {
+ if (r300->two_sided_color) {
+ /* Rasterize as back-face colors. */
+ for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+ rs.vap_vsm_vtx_assm |= R300_INPUT_CNTL_COLOR;
+ rs.vap_out_vtx_fmt[0] |= R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << (2+i);
+ stream_loc_notcl[loc++] = 4 + i;
+ }
+ } else {
+ /* Rasterize two fake texcoords to prevent from the two-sided color
+ * selection. */
+ /* XXX Consider recompiling the vertex shader to save 2 RS units. */
+ for (i = 0; i < 2; i++) {
+ rs.vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << tex_count);
+ rs.vap_out_vtx_fmt[1] |= (4 << (3 * tex_count));
+ stream_loc_notcl[loc++] = 6 + tex_count;
+
+ /* Rasterize it. */
+ rX00_rs_tex(&rs, tex_count, tex_count, SWIZ_XYZW);
+ tex_count++;
+ }
+ }
+ }
+
+ /* Rasterize texture coordinates. */
+ for (i = 0; i < ATTR_GENERIC_COUNT && tex_count < 8; i++) {
+ bool sprite_coord = !!(r300->sprite_coord_enable & (1 << i));
+
+ if (vs_outputs->generic[i] != ATTR_UNUSED || sprite_coord) {
+ if (!sprite_coord) {
+ /* Set up the texture coordinates in VAP. */
+ rs.vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << tex_count);
+ rs.vap_out_vtx_fmt[1] |= (4 << (3 * tex_count));
+ stream_loc_notcl[loc++] = 6 + tex_count;
+ }
+
+ /* Rasterize it. */
+ rX00_rs_tex(&rs, tex_count, tex_count,
+ sprite_coord ? SWIZ_XY01 : SWIZ_XYZW);
+
+ /* Write it to the FS input register if it's needed by the FS. */
+ if (fs_inputs->generic[i] != ATTR_UNUSED) {
+ rX00_rs_tex_write(&rs, tex_count, fp_offset);
+ fp_offset++;
+
+ DBG(r300, DBG_RS,
+ "r300: Rasterized generic %i written to FS%s.\n",
+ i, sprite_coord ? " (sprite coord)" : "");
+ } else {
+ DBG(r300, DBG_RS,
+ "r300: Rasterized generic %i unused%s.\n",
+ i, sprite_coord ? " (sprite coord)" : "");
+ }
+ tex_count++;
+ } else {
+ /* Skip the FS input register, leave it uninitialized. */
+ /* If we try to set it to (0,0,0,1), it will lock up. */
+ if (fs_inputs->generic[i] != ATTR_UNUSED) {
+ fp_offset++;
+
+ DBG(r300, DBG_RS, "r300: FS input generic %i unassigned%s.\n",
+ i, sprite_coord ? " (sprite coord)" : "");
+ }
+ }
+ }
+
+ /* Rasterize fog coordinates. */
+ if (vs_outputs->fog != ATTR_UNUSED && tex_count < 8) {
+ /* Set up the fog coordinates in VAP. */
+ rs.vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << tex_count);
+ rs.vap_out_vtx_fmt[1] |= (4 << (3 * tex_count));
+ stream_loc_notcl[loc++] = 6 + tex_count;
+
+ /* Rasterize it. */
+ rX00_rs_tex(&rs, tex_count, tex_count, SWIZ_X001);
+
+ /* Write it to the FS input register if it's needed by the FS. */
+ if (fs_inputs->fog != ATTR_UNUSED) {
+ rX00_rs_tex_write(&rs, tex_count, fp_offset);
+ fp_offset++;
+
+ DBG(r300, DBG_RS, "r300: Rasterized fog written to FS.\n");
+ } else {
+ DBG(r300, DBG_RS, "r300: Rasterized fog unused.\n");
+ }
+ tex_count++;
+ } else {
+ /* Skip the FS input register, leave it uninitialized. */
+ /* If we try to set it to (0,0,0,1), it will lock up. */
+ if (fs_inputs->fog != ATTR_UNUSED) {
+ fp_offset++;
+
+ DBG(r300, DBG_RS, "r300: FS input fog unassigned.\n");
+ }
+ }
+
+ /* Rasterize WPOS. */
+ /* Don't set it in VAP if the FS doesn't need it. */
+ if (fs_inputs->wpos != ATTR_UNUSED && tex_count < 8) {
+ /* Set up the WPOS coordinates in VAP. */
+ rs.vap_vsm_vtx_assm |= (R300_INPUT_CNTL_TC0 << tex_count);
+ rs.vap_out_vtx_fmt[1] |= (4 << (3 * tex_count));
+ stream_loc_notcl[loc++] = 6 + tex_count;
+
+ /* Rasterize it. */
+ rX00_rs_tex(&rs, tex_count, tex_count, SWIZ_XYZW);
+
+ /* Write it to the FS input register. */
+ rX00_rs_tex_write(&rs, tex_count, fp_offset);
+
+ DBG(r300, DBG_RS, "r300: Rasterized WPOS written to FS.\n");
+
+ fp_offset++;
+ tex_count++;
+ }
+
+ /* Invalidate the rest of the no-TCL (GA) stream locations. */
+ for (; loc < 16;) {
+ stream_loc_notcl[loc++] = -1;
+ }
+
+ /* Rasterize at least one color, or bad things happen. */
+ if (col_count == 0 && tex_count == 0) {
+ rX00_rs_col(&rs, 0, 0, SWIZ_0001);
+ col_count++;
+
+ DBG(r300, DBG_RS, "r300: Rasterized color 0 to prevent lockups.\n");
+ }
+
+ DBG(r300, DBG_RS, "r300: --- Rasterizer status ---: colors: %i, "
+ "generics: %i.\n", col_count, tex_count);
+
+ rs.count = (tex_count*4) | (col_count << R300_IC_COUNT_SHIFT) |
+ R300_HIRES_EN;
+
+ count = MAX3(col_count, tex_count, 1);
+ rs.inst_count = count - 1;
+
+ /* Now, after all that, see if we actually need to update the state. */
+ if (memcmp(r300->rs_block_state.state, &rs, sizeof(struct r300_rs_block))) {
+ memcpy(r300->rs_block_state.state, &rs, sizeof(struct r300_rs_block));
+ r300->rs_block_state.size = 11 + count*2;
+ }
+}
+
+static void r300_merge_textures_and_samplers(struct r300_context* r300)
+{
+ struct r300_textures_state *state =
+ (struct r300_textures_state*)r300->textures_state.state;
+ struct r300_texture_sampler_state *texstate;
+ struct r300_sampler_state *sampler;
+ struct r300_sampler_view *view;
+ struct r300_texture *tex;
+ unsigned min_level, max_level, i, size;
+ unsigned count = MIN2(state->sampler_view_count,
+ state->sampler_state_count);
+ unsigned char depth_swizzle[4] = {
+ UTIL_FORMAT_SWIZZLE_X,
+ UTIL_FORMAT_SWIZZLE_X,
+ UTIL_FORMAT_SWIZZLE_X,
+ UTIL_FORMAT_SWIZZLE_X
+ };
+
+ /* The KIL opcode fix, see below. */
+ if (!count && !r300->screen->caps.is_r500)
+ count = 1;
+
+ state->tx_enable = 0;
+ state->count = 0;
+ size = 2;
+
+ for (i = 0; i < count; i++) {
+ if (state->sampler_views[i] && state->sampler_states[i]) {
+ state->tx_enable |= 1 << i;
+
+ view = state->sampler_views[i];
+ tex = r300_texture(view->base.texture);
+ sampler = state->sampler_states[i];
+
+ texstate = &state->regs[i];
+ texstate->format = view->format;
+ texstate->filter0 = sampler->filter0;
+ texstate->filter1 = sampler->filter1;
+ texstate->border_color = sampler->border_color;
+
+ /* Assign a texture cache region. */
+ texstate->format.format1 |= view->texcache_region;
+
+ /* If compare mode is disabled, the sampler view swizzles
+ * are stored in the format.
+ * Otherwise, swizzles must be applied after the compare mode
+ * in the fragment shader. */
+ if (util_format_is_depth_or_stencil(tex->b.b.format)) {
+ if (sampler->state.compare_mode == PIPE_TEX_COMPARE_NONE) {
+ texstate->format.format1 |=
+ r300_get_swizzle_combined(depth_swizzle, view->swizzle);
+ } else {
+ texstate->format.format1 |=
+ r300_get_swizzle_combined(depth_swizzle, 0);
+ }
+ }
+
+ /* to emulate 1D textures through 2D ones correctly */
+ if (tex->b.b.target == PIPE_TEXTURE_1D) {
+ texstate->filter0 &= ~R300_TX_WRAP_T_MASK;
+ texstate->filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE);
+ }
+
+ if (tex->uses_pitch) {
+ /* NPOT textures don't support mip filter, unfortunately.
+ * This prevents incorrect rendering. */
+ texstate->filter0 &= ~R300_TX_MIN_FILTER_MIP_MASK;
+
+ /* Mask out the mirrored flag. */
+ if (texstate->filter0 & R300_TX_WRAP_S(R300_TX_MIRRORED)) {
+ texstate->filter0 &= ~R300_TX_WRAP_S(R300_TX_MIRRORED);
+ }
+ if (texstate->filter0 & R300_TX_WRAP_T(R300_TX_MIRRORED)) {
+ texstate->filter0 &= ~R300_TX_WRAP_T(R300_TX_MIRRORED);
+ }
+
+ /* Change repeat to clamp-to-edge.
+ * (the repeat bit has a value of 0, no masking needed). */
+ if ((texstate->filter0 & R300_TX_WRAP_S_MASK) ==
+ R300_TX_WRAP_S(R300_TX_REPEAT)) {
+ texstate->filter0 |= R300_TX_WRAP_S(R300_TX_CLAMP_TO_EDGE);
+ }
+ if ((texstate->filter0 & R300_TX_WRAP_T_MASK) ==
+ R300_TX_WRAP_T(R300_TX_REPEAT)) {
+ texstate->filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE);
+ }
+ } else {
+ /* determine min/max levels */
+ /* the MAX_MIP level is the largest (finest) one */
+ max_level = MIN3(sampler->max_lod + view->base.first_level,
+ tex->b.b.last_level, view->base.last_level);
+ min_level = MIN2(sampler->min_lod + view->base.first_level,
+ max_level);
+ texstate->format.format0 |= R300_TX_NUM_LEVELS(max_level);
+ texstate->filter0 |= R300_TX_MAX_MIP_LEVEL(min_level);
+ }
+
+ texstate->filter0 |= i << 28;
+
+ size += 16;
+ state->count = i+1;
+ } else {
+ /* For the KIL opcode to work on r3xx-r4xx, the texture unit
+ * assigned to this opcode (it's always the first one) must be
+ * enabled. Otherwise the opcode doesn't work.
+ *
+ * In order to not depend on the fragment shader, we just make
+ * the first unit enabled all the time. */
+ if (i == 0 && !r300->screen->caps.is_r500) {
+ pipe_sampler_view_reference(
+ (struct pipe_sampler_view**)&state->sampler_views[i],
+ &r300->texkill_sampler->base);
+
+ state->tx_enable |= 1 << i;
+
+ texstate = &state->regs[i];
+
+ /* Just set some valid state. */
+ texstate->format = r300->texkill_sampler->format;
+ texstate->filter0 =
+ r300_translate_tex_filters(PIPE_TEX_FILTER_NEAREST,
+ PIPE_TEX_FILTER_NEAREST,
+ PIPE_TEX_FILTER_NEAREST,
+ FALSE);
+ texstate->filter1 = 0;
+ texstate->border_color = 0;
+
+ texstate->filter0 |= i << 28;
+ size += 16;
+ state->count = i+1;
+ }
+ }
+ }
+
+ r300->textures_state.size = size;
+
+ /* Pick a fragment shader based on either the texture compare state
+ * or the uses_pitch flag. */
+ if (r300->fs.state && count) {
+ if (r300_pick_fragment_shader(r300)) {
+ r300_mark_fs_code_dirty(r300);
+ }
+ }
+}
+
+void r300_update_derived_state(struct r300_context* r300)
+{
+ if (r300->textures_state.dirty) {
+ r300_merge_textures_and_samplers(r300);
+ }
+
+ if (r300->rs_block_state.dirty) {
+ r300_update_rs_block(r300);
+
+ if (r300->draw) {
+ memset(&r300->vertex_info, 0, sizeof(struct vertex_info));
+ r300_draw_emit_all_attribs(r300);
+ draw_compute_vertex_size(&r300->vertex_info);
+ r300_swtcl_vertex_psc(r300);
+ }
+ }
+
+ r300_update_hyperz_state(r300);
+}
diff --git a/src/gallium/drivers/r300/r300_state_derived.h b/src/gallium/drivers/r300/r300_state_derived.h
new file mode 100644
index 0000000000..71a4a47b00
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_state_derived.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef R300_STATE_DERIVED_H
+#define R300_STATE_DERIVED_H
+
+struct r300_context;
+
+void r300_update_derived_state(struct r300_context* r300);
+
+#endif /* R300_STATE_DERIVED_H */
diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h
new file mode 100644
index 0000000000..03ec127ff7
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_state_inlines.h
@@ -0,0 +1,454 @@
+/*
+ * Copyright 2009 Joakim Sindholt <opensource@zhasha.com>
+ * Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef R300_STATE_INLINES_H
+#define R300_STATE_INLINES_H
+
+#include "draw/draw_vertex.h"
+
+#include "pipe/p_format.h"
+
+#include "util/u_format.h"
+
+#include "r300_reg.h"
+
+#include <stdio.h>
+
+/* Some maths. These should probably find their way to u_math, if needed. */
+
+static INLINE int pack_float_16_6x(float f) {
+ return ((int)(f * 6.0) & 0xffff);
+}
+
+/* Blend state. */
+
+static INLINE uint32_t r300_translate_blend_function(int blend_func)
+{
+ switch (blend_func) {
+ case PIPE_BLEND_ADD:
+ return R300_COMB_FCN_ADD_CLAMP;
+ case PIPE_BLEND_SUBTRACT:
+ return R300_COMB_FCN_SUB_CLAMP;
+ case PIPE_BLEND_REVERSE_SUBTRACT:
+ return R300_COMB_FCN_RSUB_CLAMP;
+ case PIPE_BLEND_MIN:
+ return R300_COMB_FCN_MIN;
+ case PIPE_BLEND_MAX:
+ return R300_COMB_FCN_MAX;
+ default:
+ fprintf(stderr, "r300: Unknown blend function %d\n", blend_func);
+ assert(0);
+ break;
+ }
+ return 0;
+}
+
+/* XXX we can also offer the D3D versions of some of these... */
+static INLINE uint32_t r300_translate_blend_factor(int blend_fact)
+{
+ switch (blend_fact) {
+ case PIPE_BLENDFACTOR_ONE:
+ return R300_BLEND_GL_ONE;
+ case PIPE_BLENDFACTOR_SRC_COLOR:
+ return R300_BLEND_GL_SRC_COLOR;
+ case PIPE_BLENDFACTOR_SRC_ALPHA:
+ return R300_BLEND_GL_SRC_ALPHA;
+ case PIPE_BLENDFACTOR_DST_ALPHA:
+ return R300_BLEND_GL_DST_ALPHA;
+ case PIPE_BLENDFACTOR_DST_COLOR:
+ return R300_BLEND_GL_DST_COLOR;
+ case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+ return R300_BLEND_GL_SRC_ALPHA_SATURATE;
+ case PIPE_BLENDFACTOR_CONST_COLOR:
+ return R300_BLEND_GL_CONST_COLOR;
+ case PIPE_BLENDFACTOR_CONST_ALPHA:
+ return R300_BLEND_GL_CONST_ALPHA;
+ case PIPE_BLENDFACTOR_ZERO:
+ return R300_BLEND_GL_ZERO;
+ case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+ return R300_BLEND_GL_ONE_MINUS_SRC_COLOR;
+ case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+ return R300_BLEND_GL_ONE_MINUS_SRC_ALPHA;
+ case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+ return R300_BLEND_GL_ONE_MINUS_DST_ALPHA;
+ case PIPE_BLENDFACTOR_INV_DST_COLOR:
+ return R300_BLEND_GL_ONE_MINUS_DST_COLOR;
+ case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+ return R300_BLEND_GL_ONE_MINUS_CONST_COLOR;
+ case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+ return R300_BLEND_GL_ONE_MINUS_CONST_ALPHA;
+
+ case PIPE_BLENDFACTOR_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_SRC1_ALPHA:
+ case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+ case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+ fprintf(stderr, "r300: Implementation error: "
+ "Bad blend factor %d not supported!\n", blend_fact);
+ assert(0);
+ break;
+
+ default:
+ fprintf(stderr, "r300: Unknown blend factor %d\n", blend_fact);
+ assert(0);
+ break;
+ }
+ return 0;
+}
+
+/* DSA state. */
+
+static INLINE uint32_t r300_translate_depth_stencil_function(int zs_func)
+{
+ switch (zs_func) {
+ case PIPE_FUNC_NEVER:
+ return R300_ZS_NEVER;
+ case PIPE_FUNC_LESS:
+ return R300_ZS_LESS;
+ case PIPE_FUNC_EQUAL:
+ return R300_ZS_EQUAL;
+ case PIPE_FUNC_LEQUAL:
+ return R300_ZS_LEQUAL;
+ case PIPE_FUNC_GREATER:
+ return R300_ZS_GREATER;
+ case PIPE_FUNC_NOTEQUAL:
+ return R300_ZS_NOTEQUAL;
+ case PIPE_FUNC_GEQUAL:
+ return R300_ZS_GEQUAL;
+ case PIPE_FUNC_ALWAYS:
+ return R300_ZS_ALWAYS;
+ default:
+ fprintf(stderr, "r300: Unknown depth/stencil function %d\n",
+ zs_func);
+ assert(0);
+ break;
+ }
+ return 0;
+}
+
+static INLINE uint32_t r300_translate_stencil_op(int s_op)
+{
+ switch (s_op) {
+ case PIPE_STENCIL_OP_KEEP:
+ return R300_ZS_KEEP;
+ case PIPE_STENCIL_OP_ZERO:
+ return R300_ZS_ZERO;
+ case PIPE_STENCIL_OP_REPLACE:
+ return R300_ZS_REPLACE;
+ case PIPE_STENCIL_OP_INCR:
+ return R300_ZS_INCR;
+ case PIPE_STENCIL_OP_DECR:
+ return R300_ZS_DECR;
+ case PIPE_STENCIL_OP_INCR_WRAP:
+ return R300_ZS_INCR_WRAP;
+ case PIPE_STENCIL_OP_DECR_WRAP:
+ return R300_ZS_DECR_WRAP;
+ case PIPE_STENCIL_OP_INVERT:
+ return R300_ZS_INVERT;
+ default:
+ fprintf(stderr, "r300: Unknown stencil op %d", s_op);
+ assert(0);
+ break;
+ }
+ return 0;
+}
+
+static INLINE uint32_t r300_translate_alpha_function(int alpha_func)
+{
+ switch (alpha_func) {
+ case PIPE_FUNC_NEVER:
+ return R300_FG_ALPHA_FUNC_NEVER;
+ case PIPE_FUNC_LESS:
+ return R300_FG_ALPHA_FUNC_LESS;
+ case PIPE_FUNC_EQUAL:
+ return R300_FG_ALPHA_FUNC_EQUAL;
+ case PIPE_FUNC_LEQUAL:
+ return R300_FG_ALPHA_FUNC_LE;
+ case PIPE_FUNC_GREATER:
+ return R300_FG_ALPHA_FUNC_GREATER;
+ case PIPE_FUNC_NOTEQUAL:
+ return R300_FG_ALPHA_FUNC_NOTEQUAL;
+ case PIPE_FUNC_GEQUAL:
+ return R300_FG_ALPHA_FUNC_GE;
+ case PIPE_FUNC_ALWAYS:
+ return R300_FG_ALPHA_FUNC_ALWAYS;
+ default:
+ fprintf(stderr, "r300: Unknown alpha function %d", alpha_func);
+ assert(0);
+ break;
+ }
+ return 0;
+}
+
+static INLINE uint32_t
+r300_translate_polygon_mode_front(unsigned mode) {
+ switch (mode)
+ {
+ case PIPE_POLYGON_MODE_FILL:
+ return R300_GA_POLY_MODE_FRONT_PTYPE_TRI;
+ case PIPE_POLYGON_MODE_LINE:
+ return R300_GA_POLY_MODE_FRONT_PTYPE_LINE;
+ case PIPE_POLYGON_MODE_POINT:
+ return R300_GA_POLY_MODE_FRONT_PTYPE_POINT;
+
+ default:
+ fprintf(stderr, "r300: Bad polygon mode %i in %s\n", mode,
+ __FUNCTION__);
+ return R300_GA_POLY_MODE_FRONT_PTYPE_TRI;
+ }
+}
+
+static INLINE uint32_t
+r300_translate_polygon_mode_back(unsigned mode) {
+ switch (mode)
+ {
+ case PIPE_POLYGON_MODE_FILL:
+ return R300_GA_POLY_MODE_BACK_PTYPE_TRI;
+ case PIPE_POLYGON_MODE_LINE:
+ return R300_GA_POLY_MODE_BACK_PTYPE_LINE;
+ case PIPE_POLYGON_MODE_POINT:
+ return R300_GA_POLY_MODE_BACK_PTYPE_POINT;
+
+ default:
+ fprintf(stderr, "r300: Bad polygon mode %i in %s\n", mode,
+ __FUNCTION__);
+ return R300_GA_POLY_MODE_BACK_PTYPE_TRI;
+ }
+}
+
+/* Texture sampler state. */
+
+static INLINE uint32_t r300_translate_wrap(int wrap)
+{
+ switch (wrap) {
+ case PIPE_TEX_WRAP_REPEAT:
+ return R300_TX_REPEAT;
+ case PIPE_TEX_WRAP_CLAMP:
+ return R300_TX_CLAMP;
+ case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+ return R300_TX_CLAMP_TO_EDGE;
+ case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+ return R300_TX_CLAMP_TO_BORDER;
+ case PIPE_TEX_WRAP_MIRROR_REPEAT:
+ return R300_TX_REPEAT | R300_TX_MIRRORED;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP:
+ return R300_TX_CLAMP | R300_TX_MIRRORED;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+ return R300_TX_CLAMP_TO_EDGE | R300_TX_MIRRORED;
+ case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+ return R300_TX_CLAMP_TO_BORDER | R300_TX_MIRRORED;
+ default:
+ fprintf(stderr, "r300: Unknown texture wrap %d", wrap);
+ assert(0);
+ return 0;
+ }
+}
+
+static INLINE uint32_t r300_translate_tex_filters(int min, int mag, int mip,
+ int is_anisotropic)
+{
+ uint32_t retval = 0;
+ if (is_anisotropic)
+ retval |= R300_TX_MIN_FILTER_ANISO | R300_TX_MAG_FILTER_ANISO;
+ else {
+ switch (min) {
+ case PIPE_TEX_FILTER_NEAREST:
+ retval |= R300_TX_MIN_FILTER_NEAREST;
+ break;
+ case PIPE_TEX_FILTER_LINEAR:
+ retval |= R300_TX_MIN_FILTER_LINEAR;
+ break;
+ default:
+ fprintf(stderr, "r300: Unknown texture filter %d\n", min);
+ assert(0);
+ break;
+ }
+ switch (mag) {
+ case PIPE_TEX_FILTER_NEAREST:
+ retval |= R300_TX_MAG_FILTER_NEAREST;
+ break;
+ case PIPE_TEX_FILTER_LINEAR:
+ retval |= R300_TX_MAG_FILTER_LINEAR;
+ break;
+ default:
+ fprintf(stderr, "r300: Unknown texture filter %d\n", mag);
+ assert(0);
+ break;
+ }
+ }
+ switch (mip) {
+ case PIPE_TEX_MIPFILTER_NONE:
+ retval |= R300_TX_MIN_FILTER_MIP_NONE;
+ break;
+ case PIPE_TEX_MIPFILTER_NEAREST:
+ retval |= R300_TX_MIN_FILTER_MIP_NEAREST;
+ break;
+ case PIPE_TEX_MIPFILTER_LINEAR:
+ retval |= R300_TX_MIN_FILTER_MIP_LINEAR;
+ break;
+ default:
+ fprintf(stderr, "r300: Unknown texture filter %d\n", mip);
+ assert(0);
+ break;
+ }
+
+ return retval;
+}
+
+static INLINE uint32_t r300_anisotropy(unsigned max_aniso)
+{
+ if (max_aniso >= 16) {
+ return R300_TX_MAX_ANISO_16_TO_1;
+ } else if (max_aniso >= 8) {
+ return R300_TX_MAX_ANISO_8_TO_1;
+ } else if (max_aniso >= 4) {
+ return R300_TX_MAX_ANISO_4_TO_1;
+ } else if (max_aniso >= 2) {
+ return R300_TX_MAX_ANISO_2_TO_1;
+ } else {
+ return R300_TX_MAX_ANISO_1_TO_1;
+ }
+}
+
+static INLINE uint32_t r500_anisotropy(unsigned max_aniso)
+{
+ if (!max_aniso) {
+ return 0;
+ }
+ max_aniso -= 1;
+
+ // Map the range [0, 15] to [0, 63].
+ return R500_TX_MAX_ANISO(MIN2((unsigned)(max_aniso*4.2001), 63)) |
+ R500_TX_ANISO_HIGH_QUALITY;
+}
+
+/* Non-CSO state. (For now.) */
+
+static INLINE uint32_t r300_translate_gb_pipes(int pipe_count)
+{
+ switch (pipe_count) {
+ case 1:
+ return R300_GB_TILE_PIPE_COUNT_RV300;
+ case 2:
+ return R300_GB_TILE_PIPE_COUNT_R300;
+ case 3:
+ return R300_GB_TILE_PIPE_COUNT_R420_3P;
+ case 4:
+ return R300_GB_TILE_PIPE_COUNT_R420;
+ }
+ return 0;
+}
+
+
+/* Translate pipe_formats into PSC vertex types. */
+static INLINE uint16_t
+r300_translate_vertex_data_type(enum pipe_format format) {
+ uint32_t result = 0;
+ const struct util_format_description *desc;
+
+ desc = util_format_description(format);
+
+ if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
+ return R300_INVALID_FORMAT;
+ }
+
+ switch (desc->channel[0].type) {
+ /* Half-floats, floats, doubles */
+ case UTIL_FORMAT_TYPE_FLOAT:
+ switch (desc->channel[0].size) {
+ case 16:
+ /* Supported only on RV350 and later. */
+ if (desc->nr_channels > 2) {
+ result = R300_DATA_TYPE_FLT16_4;
+ } else {
+ result = R300_DATA_TYPE_FLT16_2;
+ }
+ break;
+ case 32:
+ result = R300_DATA_TYPE_FLOAT_1 + (desc->nr_channels - 1);
+ break;
+ default:
+ return R300_INVALID_FORMAT;
+ }
+ break;
+ /* Unsigned ints */
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ /* Signed ints */
+ case UTIL_FORMAT_TYPE_SIGNED:
+ switch (desc->channel[0].size) {
+ case 8:
+ result = R300_DATA_TYPE_BYTE;
+ break;
+ case 16:
+ if (desc->nr_channels > 2) {
+ result = R300_DATA_TYPE_SHORT_4;
+ } else {
+ result = R300_DATA_TYPE_SHORT_2;
+ }
+ break;
+ default:
+ return R300_INVALID_FORMAT;
+ }
+ break;
+ default:
+ return R300_INVALID_FORMAT;
+ }
+
+ if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
+ result |= R300_SIGNED;
+ }
+ if (desc->channel[0].normalized) {
+ result |= R300_NORMALIZE;
+ }
+
+ return result;
+}
+
+static INLINE uint16_t
+r300_translate_vertex_data_swizzle(enum pipe_format format) {
+ const struct util_format_description *desc = util_format_description(format);
+ unsigned i, swizzle = 0;
+
+ assert(format);
+
+ if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
+ fprintf(stderr, "r300: Bad format %s in %s:%d\n",
+ util_format_short_name(format), __FUNCTION__, __LINE__);
+ return 0;
+ }
+
+ for (i = 0; i < desc->nr_channels; i++) {
+ swizzle |=
+ MIN2(desc->swizzle[i], R300_SWIZZLE_SELECT_FP_ONE) << (3*i);
+ }
+ /* Set (0,0,0,1) in unused components. */
+ for (; i < 3; i++) {
+ swizzle |= R300_SWIZZLE_SELECT_FP_ZERO << (3*i);
+ }
+ for (; i < 4; i++) {
+ swizzle |= R300_SWIZZLE_SELECT_FP_ONE << (3*i);
+ }
+
+ return swizzle | (0xf << R300_WRITE_ENA_SHIFT);
+}
+
+#endif /* R300_STATE_INLINES_H */
diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c
new file mode 100644
index 0000000000..e67a0ae244
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_state_invariant.c
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2009 Joakim Sindholt <opensource@zhasha.com>
+ * Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "r300_context.h"
+#include "r300_cs.h"
+#include "r300_reg.h"
+#include "r300_screen.h"
+#include "r300_state_invariant.h"
+
+/* Calculate and emit invariant state. This is data that the 3D engine
+ * will probably want at the beginning of every CS, but it's not currently
+ * handled by any CSO setup, and in addition it doesn't really change much.
+ *
+ * Note that eventually this should be empty, but it's useful for development
+ * and general unduplication of code. */
+void r300_emit_invariant_state(struct r300_context* r300,
+ unsigned size, void* state)
+{
+ CS_LOCALS(r300);
+
+ BEGIN_CS(12 + (r300->screen->caps.has_tcl ? 2 : 0));
+
+ /*** Graphics Backend (GB) ***/
+ /* Source of fog depth */
+ OUT_CS_REG(R300_GB_SELECT, R300_GB_FOG_SELECT_1_1_W);
+
+ /*** Fog (FG) ***/
+ OUT_CS_REG(R300_FG_FOG_BLEND, 0x0);
+ OUT_CS_REG(R300_FG_FOG_COLOR_R, 0x0);
+ OUT_CS_REG(R300_FG_FOG_COLOR_G, 0x0);
+ OUT_CS_REG(R300_FG_FOG_COLOR_B, 0x0);
+
+ /*** VAP ***/
+ /* Sign/normalize control */
+ OUT_CS_REG(R300_VAP_PSC_SGN_NORM_CNTL, R300_SGN_NORM_NO_ZERO);
+ /* TCL-only stuff */
+ if (r300->screen->caps.has_tcl) {
+ /* Amount of time to wait for vertex fetches in PVS */
+ OUT_CS_REG(VAP_PVS_VTX_TIMEOUT_REG, 0xffff);
+ }
+
+ END_CS;
+
+ /* XXX unsorted stuff from surface_fill */
+ BEGIN_CS(38 + (r300->screen->caps.has_tcl ? 7 : 0) +
+ (r300->screen->caps.is_rv350 ? 4 : 0) +
+ (r300->screen->caps.is_r400 ? 2 : 0));
+
+ if (r300->screen->caps.has_tcl) {
+ /*Flushing PVS is required before the VAP_GB registers can be changed*/
+ OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0);
+ OUT_CS_REG_SEQ(R300_VAP_GB_VERT_CLIP_ADJ, 4);
+ OUT_CS_32F(1.0);
+ OUT_CS_32F(1.0);
+ OUT_CS_32F(1.0);
+ OUT_CS_32F(1.0);
+ }
+ /* XXX line tex stuffing */
+ OUT_CS_REG_SEQ(R300_GA_LINE_S0, 1);
+ OUT_CS_32F(0.0);
+ OUT_CS_REG_SEQ(R300_GA_LINE_S1, 1);
+ OUT_CS_32F(1.0);
+ OUT_CS_REG(R300_GA_TRIANGLE_STIPPLE, 0x5 |
+ (0x5 << R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_SHIFT));
+ /* XXX this big chunk should be refactored into rs_state */
+ OUT_CS_REG(R300_GA_SOLID_RG, 0x00000000);
+ OUT_CS_REG(R300_GA_SOLID_BA, 0x00000000);
+ OUT_CS_REG(R300_GA_ROUND_MODE, 0x00000001);
+ OUT_CS_REG(R300_GA_OFFSET, 0x00000000);
+ OUT_CS_REG(R300_GA_FOG_SCALE, 0x3DBF1412);
+ OUT_CS_REG(R300_GA_FOG_OFFSET, 0x00000000);
+ OUT_CS_REG(R300_SU_TEX_WRAP, 0x00000000);
+ OUT_CS_REG(R300_SU_DEPTH_SCALE, 0x4B7FFFFF);
+ OUT_CS_REG(R300_SU_DEPTH_OFFSET, 0x00000000);
+ OUT_CS_REG(R300_SC_HYPERZ, 0x0000001C);
+ OUT_CS_REG(R300_SC_EDGERULE, 0x2DA49525);
+ OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0x00000000);
+
+ if (r300->screen->caps.is_rv350) {
+ OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x01010101);
+ OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFEFEFEFE);
+ }
+
+ OUT_CS_REG(R300_ZB_BW_CNTL, 0x00000000);
+ OUT_CS_REG(R300_ZB_DEPTHCLEARVALUE, 0x00000000);
+ OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0x00000000);
+ OUT_CS_REG(R300_ZB_HIZ_PITCH, 0x00000000);
+ if (r300->screen->caps.is_r400)
+ OUT_CS_REG(R400_US_CODE_BANK, 0);
+ END_CS;
+}
diff --git a/src/gallium/drivers/r300/r300_state_invariant.h b/src/gallium/drivers/r300/r300_state_invariant.h
new file mode 100644
index 0000000000..83d031c7fe
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_state_invariant.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef R300_STATE_INVARIANT_H
+#define R300_STATE_INVARIANT_H
+
+struct r300_context;
+
+void r300_emit_invariant_state(struct r300_context* r300,
+ unsigned size, void* state);
+
+#endif /* R300_STATE_INVARIANT_H */
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
new file mode 100644
index 0000000000..ddb6600056
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -0,0 +1,1149 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Copyright 2010 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+/* Always include headers in the reverse order!! ~ M. */
+#include "r300_texture.h"
+
+#include "r300_context.h"
+#include "r300_reg.h"
+#include "r300_transfer.h"
+#include "r300_screen.h"
+#include "r300_winsys.h"
+
+#include "util/u_format.h"
+#include "util/u_format_s3tc.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "pipe/p_screen.h"
+#include "state_tracker/drm_api.h"
+
+enum r300_dim {
+ DIM_WIDTH = 0,
+ DIM_HEIGHT = 1
+};
+
+unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format,
+ const unsigned char *swizzle_view)
+{
+ unsigned i;
+ unsigned char swizzle[4];
+ unsigned result = 0;
+ const uint32_t swizzle_shift[4] = {
+ R300_TX_FORMAT_R_SHIFT,
+ R300_TX_FORMAT_G_SHIFT,
+ R300_TX_FORMAT_B_SHIFT,
+ R300_TX_FORMAT_A_SHIFT
+ };
+ const uint32_t swizzle_bit[4] = {
+ R300_TX_FORMAT_X,
+ R300_TX_FORMAT_Y,
+ R300_TX_FORMAT_Z,
+ R300_TX_FORMAT_W
+ };
+
+ if (swizzle_view) {
+ /* Combine two sets of swizzles. */
+ for (i = 0; i < 4; i++) {
+ swizzle[i] = swizzle_view[i] <= UTIL_FORMAT_SWIZZLE_W ?
+ swizzle_format[swizzle_view[i]] : swizzle_view[i];
+ }
+ } else {
+ memcpy(swizzle, swizzle_format, 4);
+ }
+
+ /* Get swizzle. */
+ for (i = 0; i < 4; i++) {
+ switch (swizzle[i]) {
+ case UTIL_FORMAT_SWIZZLE_Y:
+ result |= swizzle_bit[1] << swizzle_shift[i];
+ break;
+ case UTIL_FORMAT_SWIZZLE_Z:
+ result |= swizzle_bit[2] << swizzle_shift[i];
+ break;
+ case UTIL_FORMAT_SWIZZLE_W:
+ result |= swizzle_bit[3] << swizzle_shift[i];
+ break;
+ case UTIL_FORMAT_SWIZZLE_0:
+ result |= R300_TX_FORMAT_ZERO << swizzle_shift[i];
+ break;
+ case UTIL_FORMAT_SWIZZLE_1:
+ result |= R300_TX_FORMAT_ONE << swizzle_shift[i];
+ break;
+ default: /* UTIL_FORMAT_SWIZZLE_X */
+ result |= swizzle_bit[0] << swizzle_shift[i];
+ }
+ }
+ return result;
+}
+
+/* Translate a pipe_format into a useful texture format for sampling.
+ *
+ * Some special formats are translated directly using R300_EASY_TX_FORMAT,
+ * but the majority of them is translated in a generic way, automatically
+ * supporting all the formats hw can support.
+ *
+ * R300_EASY_TX_FORMAT swizzles the texture.
+ * Note the signature of R300_EASY_TX_FORMAT:
+ * R300_EASY_TX_FORMAT(B, G, R, A, FORMAT);
+ *
+ * The FORMAT specifies how the texture sampler will treat the texture, and
+ * makes available X, Y, Z, W, ZERO, and ONE for swizzling. */
+uint32_t r300_translate_texformat(enum pipe_format format,
+ const unsigned char *swizzle_view)
+{
+ uint32_t result = 0;
+ const struct util_format_description *desc;
+ unsigned i;
+ boolean uniform = TRUE;
+ const uint32_t sign_bit[4] = {
+ R300_TX_FORMAT_SIGNED_X,
+ R300_TX_FORMAT_SIGNED_Y,
+ R300_TX_FORMAT_SIGNED_Z,
+ R300_TX_FORMAT_SIGNED_W,
+ };
+
+ desc = util_format_description(format);
+
+ /* Colorspace (return non-RGB formats directly). */
+ switch (desc->colorspace) {
+ /* Depth stencil formats.
+ * Swizzles are added in r300_merge_textures_and_samplers. */
+ case UTIL_FORMAT_COLORSPACE_ZS:
+ switch (format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ return R300_TX_FORMAT_X16;
+ case PIPE_FORMAT_X8Z24_UNORM:
+ case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
+ return R500_TX_FORMAT_Y8X24;
+ default:
+ return ~0; /* Unsupported. */
+ }
+
+ /* YUV formats. */
+ case UTIL_FORMAT_COLORSPACE_YUV:
+ result |= R300_TX_FORMAT_YUV_TO_RGB;
+
+ switch (format) {
+ case PIPE_FORMAT_UYVY:
+ return R300_EASY_TX_FORMAT(X, Y, Z, ONE, YVYU422) | result;
+ case PIPE_FORMAT_YUYV:
+ return R300_EASY_TX_FORMAT(X, Y, Z, ONE, VYUY422) | result;
+ default:
+ return ~0; /* Unsupported/unknown. */
+ }
+
+ /* Add gamma correction. */
+ case UTIL_FORMAT_COLORSPACE_SRGB:
+ result |= R300_TX_FORMAT_GAMMA;
+ break;
+
+ default:
+ switch (format) {
+ /* Same as YUV but without the YUR->RGB conversion. */
+ case PIPE_FORMAT_R8G8_B8G8_UNORM:
+ return R300_EASY_TX_FORMAT(X, Y, Z, ONE, YVYU422) | result;
+ case PIPE_FORMAT_G8R8_G8B8_UNORM:
+ return R300_EASY_TX_FORMAT(X, Y, Z, ONE, VYUY422) | result;
+ default:;
+ }
+ }
+
+ result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view);
+
+ /* S3TC formats. */
+ if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
+ if (!util_format_s3tc_enabled) {
+ return ~0; /* Unsupported. */
+ }
+
+ switch (format) {
+ case PIPE_FORMAT_DXT1_RGB:
+ case PIPE_FORMAT_DXT1_RGBA:
+ case PIPE_FORMAT_DXT1_SRGB:
+ case PIPE_FORMAT_DXT1_SRGBA:
+ return R300_TX_FORMAT_DXT1 | result;
+ case PIPE_FORMAT_DXT3_RGBA:
+ case PIPE_FORMAT_DXT3_SRGBA:
+ return R300_TX_FORMAT_DXT3 | result;
+ case PIPE_FORMAT_DXT5_RGBA:
+ case PIPE_FORMAT_DXT5_SRGBA:
+ return R300_TX_FORMAT_DXT5 | result;
+ default:
+ return ~0; /* Unsupported/unknown. */
+ }
+ }
+
+ /* Add sign. */
+ for (i = 0; i < desc->nr_channels; i++) {
+ if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
+ result |= sign_bit[i];
+ }
+ }
+
+ /* This is truly a special format.
+ * It stores R8G8 and B is computed using sqrt(1 - R^2 - G^2)
+ * in the sampler unit. Also known as D3DFMT_CxV8U8. */
+ if (format == PIPE_FORMAT_R8G8Bx_SNORM) {
+ return R300_TX_FORMAT_CxV8U8 | result;
+ }
+
+ /* RGTC formats. */
+ if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) {
+ switch (format) {
+ case PIPE_FORMAT_RGTC1_UNORM:
+ case PIPE_FORMAT_RGTC1_SNORM:
+ return R500_TX_FORMAT_ATI1N | result;
+ case PIPE_FORMAT_RGTC2_UNORM:
+ case PIPE_FORMAT_RGTC2_SNORM:
+ return R400_TX_FORMAT_ATI2N | result;
+ default:
+ return ~0; /* Unsupported/unknown. */
+ }
+ }
+
+ /* See whether the components are of the same size. */
+ for (i = 1; i < desc->nr_channels; i++) {
+ uniform = uniform && desc->channel[0].size == desc->channel[i].size;
+ }
+
+ /* Non-uniform formats. */
+ if (!uniform) {
+ switch (desc->nr_channels) {
+ case 3:
+ if (desc->channel[0].size == 5 &&
+ desc->channel[1].size == 6 &&
+ desc->channel[2].size == 5) {
+ return R300_TX_FORMAT_Z5Y6X5 | result;
+ }
+ if (desc->channel[0].size == 5 &&
+ desc->channel[1].size == 5 &&
+ desc->channel[2].size == 6) {
+ return R300_TX_FORMAT_Z6Y5X5 | result;
+ }
+ return ~0; /* Unsupported/unknown. */
+
+ case 4:
+ if (desc->channel[0].size == 5 &&
+ desc->channel[1].size == 5 &&
+ desc->channel[2].size == 5 &&
+ desc->channel[3].size == 1) {
+ return R300_TX_FORMAT_W1Z5Y5X5 | result;
+ }
+ if (desc->channel[0].size == 10 &&
+ desc->channel[1].size == 10 &&
+ desc->channel[2].size == 10 &&
+ desc->channel[3].size == 2) {
+ return R300_TX_FORMAT_W2Z10Y10X10 | result;
+ }
+ }
+ return ~0; /* Unsupported/unknown. */
+ }
+
+ /* And finally, uniform formats. */
+ switch (desc->channel[0].type) {
+ case UTIL_FORMAT_TYPE_UNSIGNED:
+ case UTIL_FORMAT_TYPE_SIGNED:
+ if (!desc->channel[0].normalized &&
+ desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB) {
+ return ~0;
+ }
+
+ switch (desc->channel[0].size) {
+ case 4:
+ switch (desc->nr_channels) {
+ case 2:
+ return R300_TX_FORMAT_Y4X4 | result;
+ case 4:
+ return R300_TX_FORMAT_W4Z4Y4X4 | result;
+ }
+ return ~0;
+
+ case 8:
+ switch (desc->nr_channels) {
+ case 1:
+ return R300_TX_FORMAT_X8 | result;
+ case 2:
+ return R300_TX_FORMAT_Y8X8 | result;
+ case 4:
+ return R300_TX_FORMAT_W8Z8Y8X8 | result;
+ }
+ return ~0;
+
+ case 16:
+ switch (desc->nr_channels) {
+ case 1:
+ return R300_TX_FORMAT_X16 | result;
+ case 2:
+ return R300_TX_FORMAT_Y16X16 | result;
+ case 4:
+ return R300_TX_FORMAT_W16Z16Y16X16 | result;
+ }
+ }
+ return ~0;
+
+ case UTIL_FORMAT_TYPE_FLOAT:
+ switch (desc->channel[0].size) {
+ case 16:
+ switch (desc->nr_channels) {
+ case 1:
+ return R300_TX_FORMAT_16F | result;
+ case 2:
+ return R300_TX_FORMAT_16F_16F | result;
+ case 4:
+ return R300_TX_FORMAT_16F_16F_16F_16F | result;
+ }
+ return ~0;
+
+ case 32:
+ switch (desc->nr_channels) {
+ case 1:
+ return R300_TX_FORMAT_32F | result;
+ case 2:
+ return R300_TX_FORMAT_32F_32F | result;
+ case 4:
+ return R300_TX_FORMAT_32F_32F_32F_32F | result;
+ }
+ }
+ }
+
+ return ~0; /* Unsupported/unknown. */
+}
+
+uint32_t r500_tx_format_msb_bit(enum pipe_format format)
+{
+ switch (format) {
+ case PIPE_FORMAT_RGTC1_UNORM:
+ case PIPE_FORMAT_RGTC1_SNORM:
+ case PIPE_FORMAT_X8Z24_UNORM:
+ case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
+ return R500_TXFORMAT_MSB;
+ default:
+ return 0;
+ }
+}
+
+/* Buffer formats. */
+
+/* Colorbuffer formats. This is the unswizzled format of the RB3D block's
+ * output. For the swizzling of the targets, check the shader's format. */
+static uint32_t r300_translate_colorformat(enum pipe_format format)
+{
+ switch (format) {
+ /* 8-bit buffers. */
+ case PIPE_FORMAT_A8_UNORM:
+ case PIPE_FORMAT_I8_UNORM:
+ case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_R8_UNORM:
+ case PIPE_FORMAT_R8_SNORM:
+ return R300_COLOR_FORMAT_I8;
+
+ /* 16-bit buffers. */
+ case PIPE_FORMAT_B5G6R5_UNORM:
+ return R300_COLOR_FORMAT_RGB565;
+
+ case PIPE_FORMAT_B5G5R5A1_UNORM:
+ case PIPE_FORMAT_B5G5R5X1_UNORM:
+ return R300_COLOR_FORMAT_ARGB1555;
+
+ case PIPE_FORMAT_B4G4R4A4_UNORM:
+ case PIPE_FORMAT_B4G4R4X4_UNORM:
+ return R300_COLOR_FORMAT_ARGB4444;
+
+ /* 32-bit buffers. */
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ case PIPE_FORMAT_B8G8R8X8_UNORM:
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_X8R8G8B8_UNORM:
+ case PIPE_FORMAT_A8B8G8R8_UNORM:
+ case PIPE_FORMAT_R8G8B8A8_SNORM:
+ case PIPE_FORMAT_X8B8G8R8_UNORM:
+ case PIPE_FORMAT_R8G8B8X8_UNORM:
+ case PIPE_FORMAT_R8SG8SB8UX8U_NORM:
+ return R300_COLOR_FORMAT_ARGB8888;
+
+ case PIPE_FORMAT_R10G10B10A2_UNORM:
+ case PIPE_FORMAT_R10G10B10X2_SNORM:
+ case PIPE_FORMAT_B10G10R10A2_UNORM:
+ case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
+ return R500_COLOR_FORMAT_ARGB2101010; /* R5xx-only? */
+
+ /* 64-bit buffers. */
+ case PIPE_FORMAT_R16G16B16A16_UNORM:
+ case PIPE_FORMAT_R16G16B16A16_SNORM:
+ case PIPE_FORMAT_R16G16B16A16_FLOAT:
+ return R300_COLOR_FORMAT_ARGB16161616;
+
+ /* 128-bit buffers. */
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ return R300_COLOR_FORMAT_ARGB32323232;
+
+ /* YUV buffers. */
+ case PIPE_FORMAT_UYVY:
+ return R300_COLOR_FORMAT_YVYU;
+ case PIPE_FORMAT_YUYV:
+ return R300_COLOR_FORMAT_VYUY;
+ default:
+ return ~0; /* Unsupported. */
+ }
+}
+
+/* Depthbuffer and stencilbuffer. Thankfully, we only support two flavors. */
+static uint32_t r300_translate_zsformat(enum pipe_format format)
+{
+ switch (format) {
+ /* 16-bit depth, no stencil */
+ case PIPE_FORMAT_Z16_UNORM:
+ return R300_DEPTHFORMAT_16BIT_INT_Z;
+ /* 24-bit depth, ignored stencil */
+ case PIPE_FORMAT_X8Z24_UNORM:
+ /* 24-bit depth, 8-bit stencil */
+ case PIPE_FORMAT_S8_USCALED_Z24_UNORM:
+ return R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL;
+ default:
+ return ~0; /* Unsupported. */
+ }
+}
+
+/* Shader output formats. This is essentially the swizzle from the shader
+ * to the RB3D block.
+ *
+ * Note that formats are stored from C3 to C0. */
+static uint32_t r300_translate_out_fmt(enum pipe_format format)
+{
+ uint32_t modifier = 0;
+ unsigned i;
+ const struct util_format_description *desc;
+ static const uint32_t sign_bit[4] = {
+ R300_OUT_SIGN(0x1),
+ R300_OUT_SIGN(0x2),
+ R300_OUT_SIGN(0x4),
+ R300_OUT_SIGN(0x8),
+ };
+
+ desc = util_format_description(format);
+
+ /* Specifies how the shader output is written to the fog unit. */
+ if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT) {
+ if (desc->channel[0].size == 32) {
+ modifier |= R300_US_OUT_FMT_C4_32_FP;
+ } else {
+ modifier |= R300_US_OUT_FMT_C4_16_FP;
+ }
+ } else {
+ if (desc->channel[0].size == 16) {
+ modifier |= R300_US_OUT_FMT_C4_16;
+ } else {
+ /* C4_8 seems to be used for the formats whose pixel size
+ * is <= 32 bits. */
+ modifier |= R300_US_OUT_FMT_C4_8;
+ }
+ }
+
+ /* Add sign. */
+ for (i = 0; i < 4; i++)
+ if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
+ modifier |= sign_bit[i];
+ }
+
+ /* Add swizzles and return. */
+ switch (format) {
+ /* 8-bit outputs.
+ * COLORFORMAT_I8 stores the C2 component. */
+ case PIPE_FORMAT_A8_UNORM:
+ return modifier | R300_C2_SEL_A;
+ case PIPE_FORMAT_I8_UNORM:
+ case PIPE_FORMAT_L8_UNORM:
+ case PIPE_FORMAT_R8_UNORM:
+ case PIPE_FORMAT_R8_SNORM:
+ return modifier | R300_C2_SEL_R;
+
+ /* BGRA outputs. */
+ case PIPE_FORMAT_B5G6R5_UNORM:
+ case PIPE_FORMAT_B5G5R5A1_UNORM:
+ case PIPE_FORMAT_B5G5R5X1_UNORM:
+ case PIPE_FORMAT_B4G4R4A4_UNORM:
+ case PIPE_FORMAT_B4G4R4X4_UNORM:
+ case PIPE_FORMAT_B8G8R8A8_UNORM:
+ case PIPE_FORMAT_B8G8R8X8_UNORM:
+ case PIPE_FORMAT_B10G10R10A2_UNORM:
+ return modifier |
+ R300_C0_SEL_B | R300_C1_SEL_G |
+ R300_C2_SEL_R | R300_C3_SEL_A;
+
+ /* ARGB outputs. */
+ case PIPE_FORMAT_A8R8G8B8_UNORM:
+ case PIPE_FORMAT_X8R8G8B8_UNORM:
+ return modifier |
+ R300_C0_SEL_A | R300_C1_SEL_R |
+ R300_C2_SEL_G | R300_C3_SEL_B;
+
+ /* ABGR outputs. */
+ case PIPE_FORMAT_A8B8G8R8_UNORM:
+ case PIPE_FORMAT_X8B8G8R8_UNORM:
+ return modifier |
+ R300_C0_SEL_A | R300_C1_SEL_B |
+ R300_C2_SEL_G | R300_C3_SEL_R;
+
+ /* RGBA outputs. */
+ case PIPE_FORMAT_R8G8B8X8_UNORM:
+ case PIPE_FORMAT_R8G8B8A8_SNORM:
+ case PIPE_FORMAT_R8SG8SB8UX8U_NORM:
+ case PIPE_FORMAT_R10G10B10A2_UNORM:
+ case PIPE_FORMAT_R10G10B10X2_SNORM:
+ case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
+ case PIPE_FORMAT_R16G16B16A16_UNORM:
+ case PIPE_FORMAT_R16G16B16A16_SNORM:
+ case PIPE_FORMAT_R16G16B16A16_FLOAT:
+ case PIPE_FORMAT_R32G32B32A32_FLOAT:
+ return modifier |
+ R300_C0_SEL_R | R300_C1_SEL_G |
+ R300_C2_SEL_B | R300_C3_SEL_A;
+
+ default:
+ return ~0; /* Unsupported. */
+ }
+}
+
+boolean r300_is_colorbuffer_format_supported(enum pipe_format format)
+{
+ return r300_translate_colorformat(format) != ~0 &&
+ r300_translate_out_fmt(format) != ~0;
+}
+
+boolean r300_is_zs_format_supported(enum pipe_format format)
+{
+ return r300_translate_zsformat(format) != ~0;
+}
+
+boolean r300_is_sampler_format_supported(enum pipe_format format)
+{
+ return r300_translate_texformat(format, 0) != ~0;
+}
+
+static void r300_texture_setup_immutable_state(struct r300_screen* screen,
+ struct r300_texture* tex)
+{
+ struct r300_texture_format_state* f = &tex->tx_format;
+ struct pipe_resource *pt = &tex->b.b;
+ boolean is_r500 = screen->caps.is_r500;
+
+ /* Set sampler state. */
+ f->format0 = R300_TX_WIDTH((pt->width0 - 1) & 0x7ff) |
+ R300_TX_HEIGHT((pt->height0 - 1) & 0x7ff);
+
+ if (tex->uses_pitch) {
+ /* rectangles love this */
+ f->format0 |= R300_TX_PITCH_EN;
+ f->format2 = (tex->hwpitch[0] - 1) & 0x1fff;
+ } else {
+ /* power of two textures (3D, mipmaps, and no pitch) */
+ f->format0 |= R300_TX_DEPTH(util_logbase2(pt->depth0) & 0xf);
+ }
+
+ f->format1 = 0;
+ if (pt->target == PIPE_TEXTURE_CUBE) {
+ f->format1 |= R300_TX_FORMAT_CUBIC_MAP;
+ }
+ if (pt->target == PIPE_TEXTURE_3D) {
+ f->format1 |= R300_TX_FORMAT_3D;
+ }
+
+ /* large textures on r500 */
+ if (is_r500)
+ {
+ if (pt->width0 > 2048) {
+ f->format2 |= R500_TXWIDTH_BIT11;
+ }
+ if (pt->height0 > 2048) {
+ f->format2 |= R500_TXHEIGHT_BIT11;
+ }
+ }
+
+ f->tile_config = R300_TXO_MACRO_TILE(tex->macrotile) |
+ R300_TXO_MICRO_TILE(tex->microtile);
+}
+
+static void r300_texture_setup_fb_state(struct r300_screen* screen,
+ struct r300_texture* tex)
+{
+ unsigned i;
+
+ /* Set framebuffer state. */
+ if (util_format_is_depth_or_stencil(tex->b.b.format)) {
+ for (i = 0; i <= tex->b.b.last_level; i++) {
+ tex->fb_state.pitch[i] =
+ tex->hwpitch[i] |
+ R300_DEPTHMACROTILE(tex->mip_macrotile[i]) |
+ R300_DEPTHMICROTILE(tex->microtile);
+ }
+ tex->fb_state.format = r300_translate_zsformat(tex->b.b.format);
+ } else {
+ for (i = 0; i <= tex->b.b.last_level; i++) {
+ tex->fb_state.pitch[i] =
+ tex->hwpitch[i] |
+ r300_translate_colorformat(tex->b.b.format) |
+ R300_COLOR_TILE(tex->mip_macrotile[i]) |
+ R300_COLOR_MICROTILE(tex->microtile);
+ }
+ tex->fb_state.format = r300_translate_out_fmt(tex->b.b.format);
+ }
+}
+
+void r300_texture_reinterpret_format(struct pipe_screen *screen,
+ struct pipe_resource *tex,
+ enum pipe_format new_format)
+{
+ struct r300_screen *r300screen = r300_screen(screen);
+
+ SCREEN_DBG(r300screen, DBG_TEX,
+ "r300: texture_reinterpret_format: %s -> %s\n",
+ util_format_short_name(tex->format),
+ util_format_short_name(new_format));
+
+ tex->format = new_format;
+
+ r300_texture_setup_fb_state(r300_screen(screen), r300_texture(tex));
+}
+
+unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level,
+ unsigned zslice, unsigned face)
+{
+ unsigned offset = tex->offset[level];
+
+ switch (tex->b.b.target) {
+ case PIPE_TEXTURE_3D:
+ assert(face == 0);
+ return offset + zslice * tex->layer_size[level];
+
+ case PIPE_TEXTURE_CUBE:
+ assert(zslice == 0);
+ return offset + face * tex->layer_size[level];
+
+ default:
+ assert(zslice == 0 && face == 0);
+ return offset;
+ }
+}
+
+/* Returns the number of pixels that the texture should be aligned to
+ * in the given dimension. */
+static unsigned r300_get_pixel_alignment(struct r300_texture *tex,
+ enum r300_buffer_tiling macrotile,
+ enum r300_dim dim)
+{
+ static const unsigned table[2][5][3][2] =
+ {
+ {
+ /* Macro: linear linear linear
+ Micro: linear tiled square-tiled */
+ {{ 32, 1}, { 8, 4}, { 0, 0}}, /* 8 bits per pixel */
+ {{ 16, 1}, { 8, 2}, { 4, 4}}, /* 16 bits per pixel */
+ {{ 8, 1}, { 4, 2}, { 0, 0}}, /* 32 bits per pixel */
+ {{ 4, 1}, { 0, 0}, { 2, 2}}, /* 64 bits per pixel */
+ {{ 2, 1}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */
+ },
+ {
+ /* Macro: tiled tiled tiled
+ Micro: linear tiled square-tiled */
+ {{256, 8}, {64, 32}, { 0, 0}}, /* 8 bits per pixel */
+ {{128, 8}, {64, 16}, {32, 32}}, /* 16 bits per pixel */
+ {{ 64, 8}, {32, 16}, { 0, 0}}, /* 32 bits per pixel */
+ {{ 32, 8}, { 0, 0}, {16, 16}}, /* 64 bits per pixel */
+ {{ 16, 8}, { 0, 0}, { 0, 0}} /* 128 bits per pixel */
+ }
+ };
+ static const unsigned aa_block[2] = {4, 8};
+ unsigned res = 0;
+ unsigned pixsize = util_format_get_blocksize(tex->b.b.format);
+
+ assert(macrotile <= R300_BUFFER_TILED);
+ assert(tex->microtile <= R300_BUFFER_SQUARETILED);
+ assert(pixsize <= 16);
+ assert(dim <= DIM_HEIGHT);
+
+ if (tex->b.b.nr_samples > 1) {
+ /* Multisampled textures have their own alignment scheme. */
+ if (pixsize == 4)
+ res = aa_block[dim];
+ } else {
+ /* Standard alignment. */
+ res = table[macrotile][util_logbase2(pixsize)][tex->microtile][dim];
+ }
+
+ assert(res);
+ return res;
+}
+
+/* Return true if macrotiling should be enabled on the miplevel. */
+static boolean r300_texture_macro_switch(struct r300_texture *tex,
+ unsigned level,
+ boolean rv350_mode,
+ enum r300_dim dim)
+{
+ unsigned tile, texdim;
+
+ tile = r300_get_pixel_alignment(tex, R300_BUFFER_TILED, dim);
+ if (dim == DIM_WIDTH) {
+ texdim = u_minify(tex->b.b.width0, level);
+ } else {
+ texdim = u_minify(tex->b.b.height0, level);
+ }
+
+ /* See TX_FILTER1_n.MACRO_SWITCH. */
+ if (rv350_mode) {
+ return texdim >= tile;
+ } else {
+ return texdim > tile;
+ }
+}
+
+/**
+ * Return the stride, in bytes, of the texture images of the given texture
+ * at the given level.
+ */
+unsigned r300_texture_get_stride(struct r300_screen* screen,
+ struct r300_texture* tex, unsigned level)
+{
+ unsigned tile_width, width, stride;
+
+ if (tex->stride_override)
+ return tex->stride_override;
+
+ /* Check the level. */
+ if (level > tex->b.b.last_level) {
+ SCREEN_DBG(screen, DBG_TEX, "%s: level (%u) > last_level (%u)\n",
+ __FUNCTION__, level, tex->b.b.last_level);
+ return 0;
+ }
+
+ width = u_minify(tex->b.b.width0, level);
+
+ if (util_format_is_plain(tex->b.b.format)) {
+ tile_width = r300_get_pixel_alignment(tex, tex->mip_macrotile[level],
+ DIM_WIDTH);
+ width = align(width, tile_width);
+
+ stride = util_format_get_stride(tex->b.b.format, width);
+
+ /* Some IGPs need a minimum stride of 64 bytes, hmm...
+ * This doesn't seem to apply to tiled textures, according to r300c. */
+ if (!tex->microtile && !tex->mip_macrotile[level] &&
+ (screen->caps.family == CHIP_FAMILY_RS600 ||
+ screen->caps.family == CHIP_FAMILY_RS690 ||
+ screen->caps.family == CHIP_FAMILY_RS740)) {
+ return stride < 64 ? 64 : stride;
+ }
+
+ /* The alignment to 32 bytes is sort of implied by the layout... */
+ return stride;
+ } else {
+ return align(util_format_get_stride(tex->b.b.format, width), 32);
+ }
+}
+
+static unsigned r300_texture_get_nblocksy(struct r300_texture* tex,
+ unsigned level)
+{
+ unsigned height, tile_height;
+
+ height = u_minify(tex->b.b.height0, level);
+
+ if (util_format_is_plain(tex->b.b.format)) {
+ tile_height = r300_get_pixel_alignment(tex, tex->mip_macrotile[level],
+ DIM_HEIGHT);
+ height = align(height, tile_height);
+
+ /* This is needed for the kernel checker, unfortunately. */
+ height = util_next_power_of_two(height);
+ }
+
+ return util_format_get_nblocksy(tex->b.b.format, height);
+}
+
+static void r300_texture_3d_fix_mipmapping(struct r300_screen *screen,
+ struct r300_texture *tex)
+{
+ /* The kernels <= 2.6.34-rc4 compute the size of mipmapped 3D textures
+ * incorrectly. This is a workaround to prevent CS from being rejected. */
+
+ unsigned i, size;
+
+ if (!screen->rws->get_value(screen->rws, R300_VID_DRM_2_3_0) &&
+ tex->b.b.target == PIPE_TEXTURE_3D &&
+ tex->b.b.last_level > 0) {
+ size = 0;
+
+ for (i = 0; i <= tex->b.b.last_level; i++) {
+ size += r300_texture_get_stride(screen, tex, i) *
+ r300_texture_get_nblocksy(tex, i);
+ }
+
+ size *= tex->b.b.depth0;
+ tex->size = size;
+ }
+}
+
+static void r300_setup_miptree(struct r300_screen* screen,
+ struct r300_texture* tex)
+{
+ struct pipe_resource* base = &tex->b.b;
+ unsigned stride, size, layer_size, nblocksy, i;
+ boolean rv350_mode = screen->caps.is_rv350;
+
+ SCREEN_DBG(screen, DBG_TEXALLOC,
+ "r300: Making miptree for texture, format %s\n",
+ util_format_short_name(base->format));
+
+ for (i = 0; i <= base->last_level; i++) {
+ /* Let's see if this miplevel can be macrotiled. */
+ tex->mip_macrotile[i] =
+ (tex->macrotile == R300_BUFFER_TILED &&
+ r300_texture_macro_switch(tex, i, rv350_mode, DIM_WIDTH) &&
+ r300_texture_macro_switch(tex, i, rv350_mode, DIM_HEIGHT)) ?
+ R300_BUFFER_TILED : R300_BUFFER_LINEAR;
+
+ stride = r300_texture_get_stride(screen, tex, i);
+ nblocksy = r300_texture_get_nblocksy(tex, i);
+ layer_size = stride * nblocksy;
+
+ if (base->nr_samples) {
+ layer_size *= base->nr_samples;
+ }
+
+ if (base->target == PIPE_TEXTURE_CUBE)
+ size = layer_size * 6;
+ else
+ size = layer_size * u_minify(base->depth0, i);
+
+ tex->offset[i] = tex->size;
+ tex->size = tex->offset[i] + size;
+ tex->layer_size[i] = layer_size;
+ tex->pitch[i] = stride / util_format_get_blocksize(base->format);
+ tex->hwpitch[i] =
+ tex->pitch[i] * util_format_get_blockwidth(base->format);
+
+ SCREEN_DBG(screen, DBG_TEXALLOC, "r300: Texture miptree: Level %d "
+ "(%dx%dx%d px, pitch %d bytes) %d bytes total, macrotiled %s\n",
+ i, u_minify(base->width0, i), u_minify(base->height0, i),
+ u_minify(base->depth0, i), stride, tex->size,
+ tex->mip_macrotile[i] ? "TRUE" : "FALSE");
+ }
+}
+
+static void r300_setup_flags(struct r300_texture* tex)
+{
+ tex->uses_pitch = !util_is_power_of_two(tex->b.b.width0) ||
+ !util_is_power_of_two(tex->b.b.height0) ||
+ tex->stride_override;
+}
+
+static void r300_setup_tiling(struct pipe_screen *screen,
+ struct r300_texture *tex)
+{
+ struct r300_winsys_screen *rws = (struct r300_winsys_screen *)screen->winsys;
+ enum pipe_format format = tex->b.b.format;
+ boolean rv350_mode = r300_screen(screen)->caps.is_rv350;
+ boolean is_zb = util_format_is_depth_or_stencil(format);
+ boolean dbg_no_tiling = SCREEN_DBG_ON(r300_screen(screen), DBG_NO_TILING);
+
+ if (!util_format_is_plain(format)) {
+ return;
+ }
+
+ /* If height == 1, disable microtiling except for zbuffer. */
+ if (!is_zb && (tex->b.b.height0 == 1 || dbg_no_tiling)) {
+ return;
+ }
+
+ /* Set microtiling. */
+ switch (util_format_get_blocksize(format)) {
+ case 1:
+ case 4:
+ tex->microtile = R300_BUFFER_TILED;
+ break;
+
+ case 2:
+ case 8:
+ if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) {
+ tex->microtile = R300_BUFFER_SQUARETILED;
+ }
+ break;
+ }
+
+ if (dbg_no_tiling) {
+ return;
+ }
+
+ /* Set macrotiling. */
+ if (r300_texture_macro_switch(tex, 0, rv350_mode, DIM_WIDTH) &&
+ r300_texture_macro_switch(tex, 0, rv350_mode, DIM_HEIGHT)) {
+ tex->macrotile = R300_BUFFER_TILED;
+ }
+}
+
+static unsigned r300_texture_is_referenced(struct pipe_context *context,
+ struct pipe_resource *texture,
+ unsigned face, unsigned level)
+{
+ struct r300_context *r300 = r300_context(context);
+ struct r300_texture *rtex = (struct r300_texture *)texture;
+
+ if (r300->rws->is_buffer_referenced(r300->rws, rtex->buffer, R300_REF_CS))
+ return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
+
+ return PIPE_UNREFERENCED;
+}
+
+static void r300_texture_destroy(struct pipe_screen *screen,
+ struct pipe_resource* texture)
+{
+ struct r300_texture* tex = (struct r300_texture*)texture;
+ struct r300_winsys_screen *rws = (struct r300_winsys_screen *)texture->screen->winsys;
+
+ rws->buffer_reference(rws, &tex->buffer, NULL);
+ FREE(tex);
+}
+
+static boolean r300_texture_get_handle(struct pipe_screen* screen,
+ struct pipe_resource *texture,
+ struct winsys_handle *whandle)
+{
+ struct r300_winsys_screen *rws = (struct r300_winsys_screen *)screen->winsys;
+ struct r300_texture* tex = (struct r300_texture*)texture;
+
+ if (!tex) {
+ return FALSE;
+ }
+
+ whandle->stride = r300_texture_get_stride(r300_screen(screen), tex, 0);
+
+ return rws->buffer_get_handle(rws, tex->buffer, whandle);
+}
+
+struct u_resource_vtbl r300_texture_vtbl =
+{
+ r300_texture_get_handle, /* get_handle */
+ r300_texture_destroy, /* resource_destroy */
+ r300_texture_is_referenced, /* is_resource_referenced */
+ r300_texture_get_transfer, /* get_transfer */
+ r300_texture_transfer_destroy, /* transfer_destroy */
+ r300_texture_transfer_map, /* transfer_map */
+ u_default_transfer_flush_region, /* transfer_flush_region */
+ r300_texture_transfer_unmap, /* transfer_unmap */
+ u_default_transfer_inline_write /* transfer_inline_write */
+};
+
+/* Create a new texture. */
+struct pipe_resource* r300_texture_create(struct pipe_screen* screen,
+ const struct pipe_resource* base)
+{
+ struct r300_texture* tex = CALLOC_STRUCT(r300_texture);
+ struct r300_screen* rscreen = r300_screen(screen);
+ struct r300_winsys_screen *rws = (struct r300_winsys_screen *)screen->winsys;
+
+ if (!tex) {
+ return NULL;
+ }
+
+ /* Refuse to create a texture with size 0. */
+ if (!base->width0 ||
+ (!base->height0 && (base->target == PIPE_TEXTURE_2D ||
+ base->target == PIPE_TEXTURE_CUBE)) ||
+ (!base->depth0 && base->target == PIPE_TEXTURE_3D)) {
+ fprintf(stderr, "r300: texture_create: "
+ "Got invalid texture dimensions: %ix%ix%i\n",
+ base->width0, base->height0, base->depth0);
+ FREE(tex);
+ return NULL;
+ }
+
+ tex->b.b = *base;
+ tex->b.vtbl = &r300_texture_vtbl;
+ pipe_reference_init(&tex->b.b.reference, 1);
+ tex->b.b.screen = screen;
+
+ r300_setup_flags(tex);
+ if (!(base->flags & R300_RESOURCE_FLAG_TRANSFER) &&
+ !(base->bind & PIPE_BIND_SCANOUT)) {
+ r300_setup_tiling(screen, tex);
+ }
+ r300_setup_miptree(rscreen, tex);
+ r300_texture_3d_fix_mipmapping(rscreen, tex);
+ r300_texture_setup_immutable_state(rscreen, tex);
+ r300_texture_setup_fb_state(rscreen, tex);
+
+ SCREEN_DBG(rscreen, DBG_TEX,
+ "r300: texture_create: Macro: %s, Micro: %s, Pitch: %i, "
+ "Dim: %ix%ix%i, LastLevel: %i, Size: %i, Format: %s\n",
+ tex->macrotile ? "YES" : " NO",
+ tex->microtile ? "YES" : " NO",
+ tex->hwpitch[0],
+ base->width0, base->height0, base->depth0, base->last_level,
+ tex->size,
+ util_format_short_name(base->format));
+
+ tex->domain = base->flags & R300_RESOURCE_FLAG_TRANSFER ? R300_DOMAIN_GTT :
+ R300_DOMAIN_VRAM;
+
+ tex->buffer = rws->buffer_create(rws, 2048, base->bind, tex->domain,
+ tex->size);
+
+ if (!tex->buffer) {
+ FREE(tex);
+ return NULL;
+ }
+
+ rws->buffer_set_tiling(rws, tex->buffer,
+ tex->pitch[0] * util_format_get_blocksize(tex->b.b.format),
+ tex->microtile,
+ tex->macrotile);
+
+ return (struct pipe_resource*)tex;
+}
+
+/* Not required to implement u_resource_vtbl, consider moving to another file:
+ */
+struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen,
+ struct pipe_resource* texture,
+ unsigned face,
+ unsigned level,
+ unsigned zslice,
+ unsigned flags)
+{
+ struct r300_texture* tex = r300_texture(texture);
+ struct r300_surface* surface = CALLOC_STRUCT(r300_surface);
+
+ if (surface) {
+ pipe_reference_init(&surface->base.reference, 1);
+ pipe_resource_reference(&surface->base.texture, texture);
+ surface->base.format = texture->format;
+ surface->base.width = u_minify(texture->width0, level);
+ surface->base.height = u_minify(texture->height0, level);
+ surface->base.usage = flags;
+ surface->base.zslice = zslice;
+ surface->base.face = face;
+ surface->base.level = level;
+
+ surface->buffer = tex->buffer;
+ surface->domain = tex->domain;
+ surface->offset = r300_texture_get_offset(tex, level, zslice, face);
+ surface->pitch = tex->fb_state.pitch[level];
+ surface->format = tex->fb_state.format;
+ }
+
+ return &surface->base;
+}
+
+/* Not required to implement u_resource_vtbl, consider moving to another file:
+ */
+void r300_tex_surface_destroy(struct pipe_surface* s)
+{
+ pipe_resource_reference(&s->texture, NULL);
+ FREE(s);
+}
+
+struct pipe_resource*
+r300_texture_from_handle(struct pipe_screen* screen,
+ const struct pipe_resource* base,
+ struct winsys_handle *whandle)
+{
+ struct r300_winsys_screen *rws = (struct r300_winsys_screen*)screen->winsys;
+ struct r300_screen* rscreen = r300_screen(screen);
+ struct r300_winsys_buffer *buffer;
+ struct r300_texture* tex;
+ boolean override_zb_flags;
+
+ /* Support only 2D textures without mipmaps */
+ if (base->target != PIPE_TEXTURE_2D ||
+ base->depth0 != 1 ||
+ base->last_level != 0) {
+ return NULL;
+ }
+
+ buffer = rws->buffer_from_handle(rws, whandle->handle);
+ if (!buffer) {
+ return NULL;
+ }
+
+ tex = CALLOC_STRUCT(r300_texture);
+ if (!tex) {
+ return NULL;
+ }
+
+ tex->b.b = *base;
+ tex->b.vtbl = &r300_texture_vtbl;
+ pipe_reference_init(&tex->b.b.reference, 1);
+ tex->b.b.screen = screen;
+ tex->domain = R300_DOMAIN_VRAM;
+
+ tex->stride_override = whandle->stride;
+
+ /* one ref already taken */
+ tex->buffer = buffer;
+
+ rws->buffer_get_tiling(rws, buffer, &tex->microtile, &tex->macrotile);
+ r300_setup_flags(tex);
+ SCREEN_DBG(rscreen, DBG_TEX,
+ "r300: texture_from_handle: Macro: %s, Micro: %s, "
+ "Pitch: % 4i, Dim: %ix%i, Format: %s\n",
+ tex->macrotile ? "YES" : " NO",
+ tex->microtile ? "YES" : " NO",
+ whandle->stride / util_format_get_blocksize(base->format),
+ base->width0, base->height0,
+ util_format_short_name(base->format));
+
+ /* Enforce microtiled zbuffer. */
+ override_zb_flags = util_format_is_depth_or_stencil(base->format) &&
+ tex->microtile == R300_BUFFER_LINEAR;
+
+ if (override_zb_flags) {
+ switch (util_format_get_blocksize(base->format)) {
+ case 4:
+ tex->microtile = R300_BUFFER_TILED;
+ break;
+
+ case 2:
+ if (rws->get_value(rws, R300_VID_SQUARE_TILING_SUPPORT)) {
+ tex->microtile = R300_BUFFER_SQUARETILED;
+ break;
+ }
+ /* Pass through. */
+
+ default:
+ override_zb_flags = FALSE;
+ }
+ }
+
+ r300_setup_miptree(rscreen, tex);
+ r300_texture_setup_immutable_state(rscreen, tex);
+ r300_texture_setup_fb_state(rscreen, tex);
+
+ if (override_zb_flags) {
+ rws->buffer_set_tiling(rws, tex->buffer,
+ tex->pitch[0] * util_format_get_blocksize(tex->b.b.format),
+ tex->microtile,
+ tex->macrotile);
+ }
+ return (struct pipe_resource*)tex;
+}
diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h
new file mode 100644
index 0000000000..99e7694254
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_texture.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef R300_TEXTURE_H
+#define R300_TEXTURE_H
+
+#include "pipe/p_format.h"
+
+struct pipe_screen;
+struct pipe_resource;
+struct winsys_handle;
+struct r300_texture;
+struct r300_screen;
+
+unsigned r300_get_swizzle_combined(const unsigned char *swizzle_format,
+ const unsigned char *swizzle_view);
+
+uint32_t r300_translate_texformat(enum pipe_format format,
+ const unsigned char *swizzle_view);
+
+uint32_t r500_tx_format_msb_bit(enum pipe_format format);
+
+unsigned r300_texture_get_stride(struct r300_screen* screen,
+ struct r300_texture* tex, unsigned level);
+
+unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level,
+ unsigned zslice, unsigned face);
+
+void r300_texture_reinterpret_format(struct pipe_screen *screen,
+ struct pipe_resource *tex,
+ enum pipe_format new_format);
+
+boolean r300_is_colorbuffer_format_supported(enum pipe_format format);
+
+boolean r300_is_zs_format_supported(enum pipe_format format);
+
+boolean r300_is_sampler_format_supported(enum pipe_format format);
+
+
+struct pipe_resource*
+r300_texture_from_handle(struct pipe_screen* screen,
+ const struct pipe_resource* base,
+ struct winsys_handle *whandle);
+
+struct pipe_resource*
+r300_texture_create(struct pipe_screen* screen,
+ const struct pipe_resource* templ);
+
+
+struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen,
+ struct pipe_resource* texture,
+ unsigned face,
+ unsigned level,
+ unsigned zslice,
+ unsigned flags);
+
+void r300_tex_surface_destroy(struct pipe_surface* s);
+
+#endif /* R300_TEXTURE_H */
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
new file mode 100644
index 0000000000..5394e04f72
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -0,0 +1,383 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "r300_tgsi_to_rc.h"
+
+#include "radeon_compiler.h"
+#include "radeon_program.h"
+
+#include "tgsi/tgsi_info.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_scan.h"
+#include "tgsi/tgsi_util.h"
+
+static unsigned translate_opcode(unsigned opcode)
+{
+ switch(opcode) {
+ case TGSI_OPCODE_ARL: return RC_OPCODE_ARL;
+ case TGSI_OPCODE_MOV: return RC_OPCODE_MOV;
+ case TGSI_OPCODE_LIT: return RC_OPCODE_LIT;
+ case TGSI_OPCODE_RCP: return RC_OPCODE_RCP;
+ case TGSI_OPCODE_RSQ: return RC_OPCODE_RSQ;
+ case TGSI_OPCODE_EXP: return RC_OPCODE_EXP;
+ case TGSI_OPCODE_LOG: return RC_OPCODE_LOG;
+ case TGSI_OPCODE_MUL: return RC_OPCODE_MUL;
+ case TGSI_OPCODE_ADD: return RC_OPCODE_ADD;
+ case TGSI_OPCODE_DP3: return RC_OPCODE_DP3;
+ case TGSI_OPCODE_DP4: return RC_OPCODE_DP4;
+ case TGSI_OPCODE_DST: return RC_OPCODE_DST;
+ case TGSI_OPCODE_MIN: return RC_OPCODE_MIN;
+ case TGSI_OPCODE_MAX: return RC_OPCODE_MAX;
+ case TGSI_OPCODE_SLT: return RC_OPCODE_SLT;
+ case TGSI_OPCODE_SGE: return RC_OPCODE_SGE;
+ case TGSI_OPCODE_MAD: return RC_OPCODE_MAD;
+ case TGSI_OPCODE_SUB: return RC_OPCODE_SUB;
+ case TGSI_OPCODE_LRP: return RC_OPCODE_LRP;
+ /* case TGSI_OPCODE_CND: return RC_OPCODE_CND; */
+ /* case TGSI_OPCODE_CND0: return RC_OPCODE_CND0; */
+ /* case TGSI_OPCODE_DP2A: return RC_OPCODE_DP2A; */
+ /* gap */
+ case TGSI_OPCODE_FRC: return RC_OPCODE_FRC;
+ /* case TGSI_OPCODE_CLAMP: return RC_OPCODE_CLAMP; */
+ case TGSI_OPCODE_FLR: return RC_OPCODE_FLR;
+ /* case TGSI_OPCODE_ROUND: return RC_OPCODE_ROUND; */
+ case TGSI_OPCODE_EX2: return RC_OPCODE_EX2;
+ case TGSI_OPCODE_LG2: return RC_OPCODE_LG2;
+ case TGSI_OPCODE_POW: return RC_OPCODE_POW;
+ case TGSI_OPCODE_XPD: return RC_OPCODE_XPD;
+ /* gap */
+ case TGSI_OPCODE_ABS: return RC_OPCODE_ABS;
+ /* case TGSI_OPCODE_RCC: return RC_OPCODE_RCC; */
+ case TGSI_OPCODE_DPH: return RC_OPCODE_DPH;
+ case TGSI_OPCODE_COS: return RC_OPCODE_COS;
+ case TGSI_OPCODE_DDX: return RC_OPCODE_DDX;
+ case TGSI_OPCODE_DDY: return RC_OPCODE_DDY;
+ /* case TGSI_OPCODE_KILP: return RC_OPCODE_KILP; */
+ /* case TGSI_OPCODE_PK2H: return RC_OPCODE_PK2H; */
+ /* case TGSI_OPCODE_PK2US: return RC_OPCODE_PK2US; */
+ /* case TGSI_OPCODE_PK4B: return RC_OPCODE_PK4B; */
+ /* case TGSI_OPCODE_PK4UB: return RC_OPCODE_PK4UB; */
+ /* case TGSI_OPCODE_RFL: return RC_OPCODE_RFL; */
+ case TGSI_OPCODE_SEQ: return RC_OPCODE_SEQ;
+ case TGSI_OPCODE_SFL: return RC_OPCODE_SFL;
+ case TGSI_OPCODE_SGT: return RC_OPCODE_SGT;
+ case TGSI_OPCODE_SIN: return RC_OPCODE_SIN;
+ case TGSI_OPCODE_SLE: return RC_OPCODE_SLE;
+ case TGSI_OPCODE_SNE: return RC_OPCODE_SNE;
+ /* case TGSI_OPCODE_STR: return RC_OPCODE_STR; */
+ case TGSI_OPCODE_TEX: return RC_OPCODE_TEX;
+ case TGSI_OPCODE_TXD: return RC_OPCODE_TXD;
+ case TGSI_OPCODE_TXP: return RC_OPCODE_TXP;
+ /* case TGSI_OPCODE_UP2H: return RC_OPCODE_UP2H; */
+ /* case TGSI_OPCODE_UP2US: return RC_OPCODE_UP2US; */
+ /* case TGSI_OPCODE_UP4B: return RC_OPCODE_UP4B; */
+ /* case TGSI_OPCODE_UP4UB: return RC_OPCODE_UP4UB; */
+ /* case TGSI_OPCODE_X2D: return RC_OPCODE_X2D; */
+ /* case TGSI_OPCODE_ARA: return RC_OPCODE_ARA; */
+ /* case TGSI_OPCODE_ARR: return RC_OPCODE_ARR; */
+ /* case TGSI_OPCODE_BRA: return RC_OPCODE_BRA; */
+ /* case TGSI_OPCODE_CAL: return RC_OPCODE_CAL; */
+ /* case TGSI_OPCODE_RET: return RC_OPCODE_RET; */
+ /* case TGSI_OPCODE_SSG: return RC_OPCODE_SSG; */
+ case TGSI_OPCODE_CMP: return RC_OPCODE_CMP;
+ case TGSI_OPCODE_SCS: return RC_OPCODE_SCS;
+ case TGSI_OPCODE_TXB: return RC_OPCODE_TXB;
+ /* case TGSI_OPCODE_NRM: return RC_OPCODE_NRM; */
+ /* case TGSI_OPCODE_DIV: return RC_OPCODE_DIV; */
+ /* case TGSI_OPCODE_DP2: return RC_OPCODE_DP2; */
+ case TGSI_OPCODE_TXL: return RC_OPCODE_TXL;
+ case TGSI_OPCODE_BRK: return RC_OPCODE_BRK;
+ case TGSI_OPCODE_IF: return RC_OPCODE_IF;
+ case TGSI_OPCODE_BGNLOOP: return RC_OPCODE_BGNLOOP;
+ case TGSI_OPCODE_ELSE: return RC_OPCODE_ELSE;
+ case TGSI_OPCODE_ENDIF: return RC_OPCODE_ENDIF;
+ case TGSI_OPCODE_ENDLOOP: return RC_OPCODE_ENDLOOP;
+ /* case TGSI_OPCODE_PUSHA: return RC_OPCODE_PUSHA; */
+ /* case TGSI_OPCODE_POPA: return RC_OPCODE_POPA; */
+ case TGSI_OPCODE_CEIL: return RC_OPCODE_CEIL;
+ /* case TGSI_OPCODE_I2F: return RC_OPCODE_I2F; */
+ /* case TGSI_OPCODE_NOT: return RC_OPCODE_NOT; */
+ case TGSI_OPCODE_TRUNC: return RC_OPCODE_FLR;
+ /* case TGSI_OPCODE_SHL: return RC_OPCODE_SHL; */
+ /* case TGSI_OPCODE_ISHR: return RC_OPCODE_SHR; */
+ /* case TGSI_OPCODE_AND: return RC_OPCODE_AND; */
+ /* case TGSI_OPCODE_OR: return RC_OPCODE_OR; */
+ /* case TGSI_OPCODE_MOD: return RC_OPCODE_MOD; */
+ /* case TGSI_OPCODE_XOR: return RC_OPCODE_XOR; */
+ /* case TGSI_OPCODE_SAD: return RC_OPCODE_SAD; */
+ /* case TGSI_OPCODE_TXF: return RC_OPCODE_TXF; */
+ /* case TGSI_OPCODE_TXQ: return RC_OPCODE_TXQ; */
+ /* case TGSI_OPCODE_CONT: return RC_OPCODE_CONT; */
+ /* case TGSI_OPCODE_EMIT: return RC_OPCODE_EMIT; */
+ /* case TGSI_OPCODE_ENDPRIM: return RC_OPCODE_ENDPRIM; */
+ /* case TGSI_OPCODE_BGNLOOP2: return RC_OPCODE_BGNLOOP2; */
+ /* case TGSI_OPCODE_BGNSUB: return RC_OPCODE_BGNSUB; */
+ /* case TGSI_OPCODE_ENDLOOP2: return RC_OPCODE_ENDLOOP2; */
+ /* case TGSI_OPCODE_ENDSUB: return RC_OPCODE_ENDSUB; */
+ case TGSI_OPCODE_NOP: return RC_OPCODE_NOP;
+ /* gap */
+ /* case TGSI_OPCODE_NRM4: return RC_OPCODE_NRM4; */
+ /* case TGSI_OPCODE_CALLNZ: return RC_OPCODE_CALLNZ; */
+ /* case TGSI_OPCODE_IFC: return RC_OPCODE_IFC; */
+ /* case TGSI_OPCODE_BREAKC: return RC_OPCODE_BREAKC; */
+ case TGSI_OPCODE_KIL: return RC_OPCODE_KIL;
+ }
+
+ fprintf(stderr, "r300: Unknown TGSI/RC opcode: %s\n", tgsi_get_opcode_name(opcode));
+ return RC_OPCODE_ILLEGAL_OPCODE;
+}
+
+static unsigned translate_saturate(unsigned saturate)
+{
+ switch(saturate) {
+ default:
+ fprintf(stderr, "Unknown saturate mode: %i\n", saturate);
+ /* fall-through */
+ case TGSI_SAT_NONE: return RC_SATURATE_NONE;
+ case TGSI_SAT_ZERO_ONE: return RC_SATURATE_ZERO_ONE;
+ }
+}
+
+static unsigned translate_register_file(unsigned file)
+{
+ switch(file) {
+ case TGSI_FILE_CONSTANT: return RC_FILE_CONSTANT;
+ case TGSI_FILE_IMMEDIATE: return RC_FILE_CONSTANT;
+ case TGSI_FILE_INPUT: return RC_FILE_INPUT;
+ case TGSI_FILE_OUTPUT: return RC_FILE_OUTPUT;
+ default:
+ fprintf(stderr, "Unhandled register file: %i\n", file);
+ /* fall-through */
+ case TGSI_FILE_TEMPORARY: return RC_FILE_TEMPORARY;
+ case TGSI_FILE_ADDRESS: return RC_FILE_ADDRESS;
+ }
+}
+
+static int translate_register_index(
+ struct tgsi_to_rc * ttr,
+ unsigned file,
+ int index)
+{
+ if (file == TGSI_FILE_IMMEDIATE)
+ return ttr->immediate_offset + index;
+
+ return index;
+}
+
+static void transform_dstreg(
+ struct tgsi_to_rc * ttr,
+ struct rc_dst_register * dst,
+ struct tgsi_full_dst_register * src)
+{
+ dst->File = translate_register_file(src->Register.File);
+ dst->Index = translate_register_index(ttr, src->Register.File, src->Register.Index);
+ dst->WriteMask = src->Register.WriteMask;
+ dst->RelAddr = src->Register.Indirect;
+}
+
+static void transform_srcreg(
+ struct tgsi_to_rc * ttr,
+ struct rc_src_register * dst,
+ struct tgsi_full_src_register * src)
+{
+ unsigned i, j;
+
+ dst->File = translate_register_file(src->Register.File);
+ dst->Index = translate_register_index(ttr, src->Register.File, src->Register.Index);
+ dst->RelAddr = src->Register.Indirect;
+ dst->Swizzle = tgsi_util_get_full_src_register_swizzle(src, 0);
+ dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 1) << 3;
+ dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 2) << 6;
+ dst->Swizzle |= tgsi_util_get_full_src_register_swizzle(src, 3) << 9;
+ dst->Abs = src->Register.Absolute;
+ dst->Negate = src->Register.Negate ? RC_MASK_XYZW : 0;
+
+ if (src->Register.File == TGSI_FILE_IMMEDIATE) {
+ for (i = 0; i < ttr->imms_to_swizzle_count; i++) {
+ if (ttr->imms_to_swizzle[i].index == src->Register.Index) {
+ dst->File = RC_FILE_TEMPORARY;
+ dst->Index = 0;
+ dst->Swizzle = 0;
+ for (j = 0; j < 4; j++) {
+ dst->Swizzle |= GET_SWZ(ttr->imms_to_swizzle[i].swizzle,
+ tgsi_util_get_full_src_register_swizzle(src, j)) << (j * 3);
+ }
+ break;
+ }
+ }
+ }
+}
+
+static void transform_texture(struct rc_instruction * dst, struct tgsi_instruction_texture src,
+ uint32_t *shadowSamplers)
+{
+ switch(src.Texture) {
+ case TGSI_TEXTURE_1D:
+ dst->U.I.TexSrcTarget = RC_TEXTURE_1D;
+ break;
+ case TGSI_TEXTURE_2D:
+ dst->U.I.TexSrcTarget = RC_TEXTURE_2D;
+ break;
+ case TGSI_TEXTURE_3D:
+ dst->U.I.TexSrcTarget = RC_TEXTURE_3D;
+ break;
+ case TGSI_TEXTURE_CUBE:
+ dst->U.I.TexSrcTarget = RC_TEXTURE_CUBE;
+ break;
+ case TGSI_TEXTURE_RECT:
+ dst->U.I.TexSrcTarget = RC_TEXTURE_RECT;
+ break;
+ case TGSI_TEXTURE_SHADOW1D:
+ dst->U.I.TexSrcTarget = RC_TEXTURE_1D;
+ dst->U.I.TexShadow = 1;
+ *shadowSamplers |= 1 << dst->U.I.TexSrcUnit;
+ break;
+ case TGSI_TEXTURE_SHADOW2D:
+ dst->U.I.TexSrcTarget = RC_TEXTURE_2D;
+ dst->U.I.TexShadow = 1;
+ *shadowSamplers |= 1 << dst->U.I.TexSrcUnit;
+ break;
+ case TGSI_TEXTURE_SHADOWRECT:
+ dst->U.I.TexSrcTarget = RC_TEXTURE_RECT;
+ dst->U.I.TexShadow = 1;
+ *shadowSamplers |= 1 << dst->U.I.TexSrcUnit;
+ break;
+ }
+}
+
+static void transform_instruction(struct tgsi_to_rc * ttr, struct tgsi_full_instruction * src)
+{
+ struct rc_instruction * dst;
+ int i;
+
+ dst = rc_insert_new_instruction(ttr->compiler, ttr->compiler->Program.Instructions.Prev);
+ dst->U.I.Opcode = translate_opcode(src->Instruction.Opcode);
+ dst->U.I.SaturateMode = translate_saturate(src->Instruction.Saturate);
+
+ if (src->Instruction.NumDstRegs)
+ transform_dstreg(ttr, &dst->U.I.DstReg, &src->Dst[0]);
+
+ for(i = 0; i < src->Instruction.NumSrcRegs; ++i) {
+ if (src->Src[i].Register.File == TGSI_FILE_SAMPLER)
+ dst->U.I.TexSrcUnit = src->Src[i].Register.Index;
+ else
+ transform_srcreg(ttr, &dst->U.I.SrcReg[i], &src->Src[i]);
+ }
+
+ /* Texturing. */
+ if (src->Instruction.Texture)
+ transform_texture(dst, src->Texture,
+ &ttr->compiler->Program.ShadowSamplers);
+}
+
+static void handle_immediate(struct tgsi_to_rc * ttr,
+ struct tgsi_full_immediate * imm,
+ unsigned index)
+{
+ struct rc_constant constant;
+ unsigned swizzle = 0;
+ boolean can_swizzle = TRUE;
+ unsigned i;
+
+ for (i = 0; i < 4; i++) {
+ if (imm->u[i].Float == 0.0f) {
+ swizzle |= RC_SWIZZLE_ZERO << (i * 3);
+ } else if (imm->u[i].Float == 0.5f && ttr->use_half_swizzles) {
+ swizzle |= RC_SWIZZLE_HALF << (i * 3);
+ } else if (imm->u[i].Float == 1.0f) {
+ swizzle |= RC_SWIZZLE_ONE << (i * 3);
+ } else {
+ can_swizzle = FALSE;
+ break;
+ }
+ }
+
+ if (can_swizzle) {
+ ttr->imms_to_swizzle[ttr->imms_to_swizzle_count].index = index;
+ ttr->imms_to_swizzle[ttr->imms_to_swizzle_count].swizzle = swizzle;
+ ttr->imms_to_swizzle_count++;
+ } else {
+ constant.Type = RC_CONSTANT_IMMEDIATE;
+ constant.Size = 4;
+ for(i = 0; i < 4; ++i)
+ constant.u.Immediate[i] = imm->u[i].Float;
+ rc_constants_add(&ttr->compiler->Program.Constants, &constant);
+ }
+}
+
+void r300_tgsi_to_rc(struct tgsi_to_rc * ttr,
+ const struct tgsi_token * tokens)
+{
+ struct tgsi_full_instruction *inst;
+ struct tgsi_parse_context parser;
+ unsigned imm_index = 0;
+ int i;
+
+ /* Allocate constants placeholders.
+ *
+ * Note: What if declared constants are not contiguous? */
+ for(i = 0; i <= ttr->info->file_max[TGSI_FILE_CONSTANT]; ++i) {
+ struct rc_constant constant;
+ memset(&constant, 0, sizeof(constant));
+ constant.Type = RC_CONSTANT_EXTERNAL;
+ constant.Size = 4;
+ constant.u.External = i;
+ rc_constants_add(&ttr->compiler->Program.Constants, &constant);
+ }
+
+ ttr->immediate_offset = ttr->compiler->Program.Constants.Count;
+
+ ttr->imms_to_swizzle = malloc(ttr->info->immediate_count * sizeof(struct swizzled_imms));
+ ttr->imms_to_swizzle_count = 0;
+
+ tgsi_parse_init(&parser, tokens);
+
+ while (!tgsi_parse_end_of_tokens(&parser)) {
+ tgsi_parse_token(&parser);
+
+ switch (parser.FullToken.Token.Type) {
+ case TGSI_TOKEN_TYPE_DECLARATION:
+ break;
+ case TGSI_TOKEN_TYPE_IMMEDIATE:
+ handle_immediate(ttr, &parser.FullToken.FullImmediate, imm_index);
+ imm_index++;
+ break;
+ case TGSI_TOKEN_TYPE_INSTRUCTION:
+ inst = &parser.FullToken.FullInstruction;
+ /* This hack with the RET opcode woudn't work with
+ * conditionals. */
+ if (inst->Instruction.Opcode == TGSI_OPCODE_END ||
+ inst->Instruction.Opcode == TGSI_OPCODE_RET) {
+ break;
+ }
+
+ transform_instruction(ttr, inst);
+ break;
+ }
+ }
+
+ tgsi_parse_free(&parser);
+
+ free(ttr->imms_to_swizzle);
+
+ rc_calculate_inputs_outputs(ttr->compiler);
+}
diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.h b/src/gallium/drivers/r300/r300_tgsi_to_rc.h
new file mode 100644
index 0000000000..97641a954b
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef R300_TGSI_TO_RC_H
+#define R300_TGSI_TO_RC_H
+
+#include "pipe/p_compiler.h"
+
+struct radeon_compiler;
+
+struct tgsi_full_declaration;
+struct tgsi_shader_info;
+struct tgsi_token;
+
+struct swizzled_imms {
+ unsigned index;
+ unsigned swizzle;
+};
+
+struct tgsi_to_rc {
+ struct radeon_compiler * compiler;
+ const struct tgsi_shader_info * info;
+
+ int immediate_offset;
+ struct swizzled_imms * imms_to_swizzle;
+ unsigned imms_to_swizzle_count;
+
+ /* Vertex shaders have no half swizzles, and no way to handle them, so
+ * until rc grows proper support, indicate if they're safe to use. */
+ boolean use_half_swizzles;
+};
+
+void r300_tgsi_to_rc(struct tgsi_to_rc * ttr, const struct tgsi_token * tokens);
+
+#endif /* R300_TGSI_TO_RC_H */
diff --git a/src/gallium/drivers/r300/r300_transfer.c b/src/gallium/drivers/r300/r300_transfer.c
new file mode 100644
index 0000000000..d41f258836
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_transfer.c
@@ -0,0 +1,271 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Copyright 2010 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "r300_transfer.h"
+#include "r300_texture.h"
+#include "r300_screen_buffer.h"
+
+#include "util/u_memory.h"
+#include "util/u_format.h"
+
+struct r300_transfer {
+ /* Parent class */
+ struct pipe_transfer transfer;
+
+ /* Offset from start of buffer. */
+ unsigned offset;
+
+ /* Detiled texture. */
+ struct r300_texture *detiled_texture;
+};
+
+/* Convenience cast wrapper. */
+static INLINE struct r300_transfer*
+r300_transfer(struct pipe_transfer* transfer)
+{
+ return (struct r300_transfer*)transfer;
+}
+
+/* Copy from a tiled texture to a detiled one. */
+static void r300_copy_from_tiled_texture(struct pipe_context *ctx,
+ struct r300_transfer *r300transfer)
+{
+ struct pipe_transfer *transfer = (struct pipe_transfer*)r300transfer;
+ struct pipe_resource *tex = transfer->resource;
+ struct pipe_subresource subdst;
+
+ subdst.face = 0;
+ subdst.level = 0;
+
+ ctx->resource_copy_region(ctx, &r300transfer->detiled_texture->b.b, subdst,
+ 0, 0, 0,
+ tex, transfer->sr,
+ transfer->box.x, transfer->box.y, transfer->box.z,
+ transfer->box.width, transfer->box.height);
+}
+
+/* Copy a detiled texture to a tiled one. */
+static void r300_copy_into_tiled_texture(struct pipe_context *ctx,
+ struct r300_transfer *r300transfer)
+{
+ struct pipe_transfer *transfer = (struct pipe_transfer*)r300transfer;
+ struct pipe_resource *tex = transfer->resource;
+ struct pipe_subresource subsrc;
+
+ subsrc.face = 0;
+ subsrc.level = 0;
+
+ ctx->resource_copy_region(ctx, tex, transfer->sr,
+ transfer->box.x, transfer->box.y, transfer->box.z,
+ &r300transfer->detiled_texture->b.b, subsrc,
+ 0, 0, 0,
+ transfer->box.width, transfer->box.height);
+}
+
+struct pipe_transfer*
+r300_texture_get_transfer(struct pipe_context *ctx,
+ struct pipe_resource *texture,
+ struct pipe_subresource sr,
+ unsigned usage,
+ const struct pipe_box *box)
+{
+ struct r300_texture *tex = r300_texture(texture);
+ struct r300_screen *r300screen = r300_screen(ctx->screen);
+ struct r300_transfer *trans;
+ struct pipe_resource base;
+ boolean referenced_cs, referenced_hw, blittable;
+
+ referenced_cs = r300screen->rws->is_buffer_referenced(
+ r300screen->rws, tex->buffer, R300_REF_CS);
+ if (referenced_cs) {
+ referenced_hw = TRUE;
+ } else {
+ referenced_hw = r300screen->rws->is_buffer_referenced(
+ r300screen->rws, tex->buffer, R300_REF_HW);
+ }
+
+ blittable = ctx->screen->is_format_supported(
+ ctx->screen, texture->format, texture->target, 0,
+ PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET, 0);
+
+ trans = CALLOC_STRUCT(r300_transfer);
+ if (trans) {
+ /* Initialize the transfer object. */
+ pipe_resource_reference(&trans->transfer.resource, texture);
+ trans->transfer.sr = sr;
+ trans->transfer.usage = usage;
+ trans->transfer.box = *box;
+
+ /* If the texture is tiled, we must create a temporary detiled texture
+ * for this transfer.
+ * Also make write transfers pipelined. */
+ if (tex->microtile || tex->macrotile ||
+ ((referenced_hw & !(usage & PIPE_TRANSFER_READ)) && blittable)) {
+ base.target = PIPE_TEXTURE_2D;
+ base.format = texture->format;
+ base.width0 = box->width;
+ base.height0 = box->height;
+ base.depth0 = 0;
+ base.last_level = 0;
+ base.nr_samples = 0;
+ base.usage = PIPE_USAGE_DYNAMIC;
+ base.bind = 0;
+ base.flags = R300_RESOURCE_FLAG_TRANSFER;
+
+ /* For texture reading, the temporary (detiled) texture is used as
+ * a render target when blitting from a tiled texture. */
+ if (usage & PIPE_TRANSFER_READ) {
+ base.bind |= PIPE_BIND_RENDER_TARGET;
+ }
+ /* For texture writing, the temporary texture is used as a sampler
+ * when blitting into a tiled texture. */
+ if (usage & PIPE_TRANSFER_WRITE) {
+ base.bind |= PIPE_BIND_SAMPLER_VIEW;
+ }
+
+ /* Create the temporary texture. */
+ trans->detiled_texture = r300_texture(
+ ctx->screen->resource_create(ctx->screen,
+ &base));
+
+ if (!trans->detiled_texture) {
+ /* Oh crap, the thing can't create the texture.
+ * Let's flush and try again. */
+ ctx->flush(ctx, 0, NULL);
+
+ trans->detiled_texture = r300_texture(
+ ctx->screen->resource_create(ctx->screen,
+ &base));
+
+ if (!trans->detiled_texture) {
+ /* For linear textures, it's safe to fallback to
+ * an unpipelined transfer. */
+ if (!tex->microtile && !tex->macrotile) {
+ goto unpipelined;
+ }
+
+ /* Otherwise, go to hell. */
+ fprintf(stderr,
+ "r300: Failed to create a transfer object, praise.\n");
+ FREE(trans);
+ return NULL;
+ }
+ }
+
+ assert(!trans->detiled_texture->microtile &&
+ !trans->detiled_texture->macrotile);
+
+ /* Set the stride.
+ *
+ * Even though we are using an internal texture for this,
+ * the transfer sr, box and usage parameters still reflect
+ * the arguments received to get_transfer. We just do the
+ * right thing internally.
+ */
+ trans->transfer.stride =
+ r300_texture_get_stride(r300screen, trans->detiled_texture, 0);
+
+ if (usage & PIPE_TRANSFER_READ) {
+ /* We cannot map a tiled texture directly because the data is
+ * in a different order, therefore we do detiling using a blit. */
+ r300_copy_from_tiled_texture(ctx, trans);
+
+ /* Always referenced in the blit. */
+ ctx->flush(ctx, 0, NULL);
+ }
+ return &trans->transfer;
+ }
+
+ unpipelined:
+ /* Unpipelined transfer. */
+ trans->transfer.stride =
+ r300_texture_get_stride(r300screen, tex, sr.level);
+ trans->offset = r300_texture_get_offset(tex, sr.level, box->z, sr.face);
+
+ if (referenced_cs && (usage & PIPE_TRANSFER_READ))
+ ctx->flush(ctx, PIPE_FLUSH_RENDER_CACHE, NULL);
+ return &trans->transfer;
+ }
+ return NULL;
+}
+
+void r300_texture_transfer_destroy(struct pipe_context *ctx,
+ struct pipe_transfer *trans)
+{
+ struct r300_transfer *r300transfer = r300_transfer(trans);
+
+ if (r300transfer->detiled_texture) {
+ if (trans->usage & PIPE_TRANSFER_WRITE) {
+ r300_copy_into_tiled_texture(ctx, r300transfer);
+ }
+
+ pipe_resource_reference(
+ (struct pipe_resource**)&r300transfer->detiled_texture, NULL);
+ }
+ pipe_resource_reference(&trans->resource, NULL);
+ FREE(trans);
+}
+
+void* r300_texture_transfer_map(struct pipe_context *ctx,
+ struct pipe_transfer *transfer)
+{
+ struct r300_winsys_screen *rws = (struct r300_winsys_screen *)ctx->winsys;
+ struct r300_transfer *r300transfer = r300_transfer(transfer);
+ struct r300_texture *tex = r300_texture(transfer->resource);
+ char *map;
+ enum pipe_format format = tex->b.b.format;
+
+ if (r300transfer->detiled_texture) {
+ /* The detiled texture is of the same size as the region being mapped
+ * (no offset needed). */
+ return rws->buffer_map(rws,
+ r300transfer->detiled_texture->buffer,
+ transfer->usage);
+ } else {
+ /* Tiling is disabled. */
+ map = rws->buffer_map(rws, tex->buffer,
+ transfer->usage);
+
+ if (!map) {
+ return NULL;
+ }
+
+ return map + r300_transfer(transfer)->offset +
+ transfer->box.y / util_format_get_blockheight(format) * transfer->stride +
+ transfer->box.x / util_format_get_blockwidth(format) * util_format_get_blocksize(format);
+ }
+}
+
+void r300_texture_transfer_unmap(struct pipe_context *ctx,
+ struct pipe_transfer *transfer)
+{
+ struct r300_winsys_screen *rws = (struct r300_winsys_screen *)ctx->winsys;
+ struct r300_transfer *r300transfer = r300_transfer(transfer);
+ struct r300_texture *tex = r300_texture(transfer->resource);
+
+ if (r300transfer->detiled_texture) {
+ rws->buffer_unmap(rws, r300transfer->detiled_texture->buffer);
+ } else {
+ rws->buffer_unmap(rws, tex->buffer);
+ }
+}
diff --git a/src/gallium/drivers/r300/r300_transfer.h b/src/gallium/drivers/r300/r300_transfer.h
new file mode 100644
index 0000000000..0d32a68d1f
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_transfer.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Copyright 2010 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef R300_TRANSFER
+#define R300_TRANSFER
+
+#include "pipe/p_context.h"
+
+struct r300_context;
+
+struct pipe_transfer*
+r300_texture_get_transfer(struct pipe_context *ctx,
+ struct pipe_resource *texture,
+ struct pipe_subresource sr,
+ unsigned usage,
+ const struct pipe_box *box);
+
+void
+r300_texture_transfer_destroy(struct pipe_context *ctx,
+ struct pipe_transfer *trans);
+
+void*
+r300_texture_transfer_map(struct pipe_context *ctx,
+ struct pipe_transfer *transfer);
+
+void
+r300_texture_transfer_unmap(struct pipe_context *ctx,
+ struct pipe_transfer *transfer);
+
+
+#endif
diff --git a/src/gallium/drivers/r300/r300_vs.c b/src/gallium/drivers/r300/r300_vs.c
new file mode 100644
index 0000000000..b25c786d6b
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_vs.c
@@ -0,0 +1,252 @@
+/*
+ * Copyright 2009 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Copyright 2009 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#include "r300_vs.h"
+
+#include "r300_context.h"
+#include "r300_screen.h"
+#include "r300_tgsi_to_rc.h"
+#include "r300_reg.h"
+
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_ureg.h"
+
+#include "radeon_compiler.h"
+
+/* Convert info about VS output semantics into r300_shader_semantics. */
+static void r300_shader_read_vs_outputs(
+ struct tgsi_shader_info* info,
+ struct r300_shader_semantics* vs_outputs)
+{
+ int i;
+ unsigned index;
+
+ r300_shader_semantics_reset(vs_outputs);
+
+ for (i = 0; i < info->num_outputs; i++) {
+ index = info->output_semantic_index[i];
+
+ switch (info->output_semantic_name[i]) {
+ case TGSI_SEMANTIC_POSITION:
+ assert(index == 0);
+ vs_outputs->pos = i;
+ break;
+
+ case TGSI_SEMANTIC_PSIZE:
+ assert(index == 0);
+ vs_outputs->psize = i;
+ break;
+
+ case TGSI_SEMANTIC_COLOR:
+ assert(index < ATTR_COLOR_COUNT);
+ vs_outputs->color[index] = i;
+ break;
+
+ case TGSI_SEMANTIC_BCOLOR:
+ assert(index < ATTR_COLOR_COUNT);
+ vs_outputs->bcolor[index] = i;
+ break;
+
+ case TGSI_SEMANTIC_GENERIC:
+ assert(index < ATTR_GENERIC_COUNT);
+ vs_outputs->generic[index] = i;
+ break;
+
+ case TGSI_SEMANTIC_FOG:
+ assert(index == 0);
+ vs_outputs->fog = i;
+ break;
+
+ case TGSI_SEMANTIC_EDGEFLAG:
+ assert(index == 0);
+ fprintf(stderr, "r300 VP: cannot handle edgeflag output.\n");
+ break;
+
+ default:
+ fprintf(stderr, "r300 VP: unknown vertex output semantic: %i.\n",
+ info->output_semantic_name[i]);
+ }
+ }
+
+ /* WPOS is a straight copy of POSITION and it's always emitted. */
+ vs_outputs->wpos = i;
+}
+
+static void set_vertex_inputs_outputs(struct r300_vertex_program_compiler * c)
+{
+ struct r300_vertex_shader * vs = c->UserData;
+ struct r300_shader_semantics* outputs = &vs->outputs;
+ struct tgsi_shader_info* info = &vs->info;
+ int i, reg = 0;
+ boolean any_bcolor_used = outputs->bcolor[0] != ATTR_UNUSED ||
+ outputs->bcolor[1] != ATTR_UNUSED;
+
+ /* Fill in the input mapping */
+ for (i = 0; i < info->num_inputs; i++)
+ c->code->inputs[i] = i;
+
+ /* Position. */
+ if (outputs->pos != ATTR_UNUSED) {
+ c->code->outputs[outputs->pos] = reg++;
+ } else {
+ assert(0);
+ }
+
+ /* Point size. */
+ if (outputs->psize != ATTR_UNUSED) {
+ c->code->outputs[outputs->psize] = reg++;
+ }
+
+ /* If we're writing back facing colors we need to send
+ * four colors to make front/back face colors selection work.
+ * If the vertex program doesn't write all 4 colors, lets
+ * pretend it does by skipping output index reg so the colors
+ * get written into appropriate output vectors.
+ */
+
+ /* Colors. */
+ for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+ if (outputs->color[i] != ATTR_UNUSED) {
+ c->code->outputs[outputs->color[i]] = reg++;
+ } else if (any_bcolor_used ||
+ outputs->color[1] != ATTR_UNUSED) {
+ reg++;
+ }
+ }
+
+ /* Back-face colors. */
+ for (i = 0; i < ATTR_COLOR_COUNT; i++) {
+ if (outputs->bcolor[i] != ATTR_UNUSED) {
+ c->code->outputs[outputs->bcolor[i]] = reg++;
+ } else if (any_bcolor_used) {
+ reg++;
+ }
+ }
+
+ /* Texture coordinates. */
+ for (i = 0; i < ATTR_GENERIC_COUNT; i++) {
+ if (outputs->generic[i] != ATTR_UNUSED) {
+ c->code->outputs[outputs->generic[i]] = reg++;
+ }
+ }
+
+ /* Fog coordinates. */
+ if (outputs->fog != ATTR_UNUSED) {
+ c->code->outputs[outputs->fog] = reg++;
+ }
+
+ /* WPOS. */
+ c->code->outputs[outputs->wpos] = reg++;
+}
+
+void r300_init_vs_outputs(struct r300_vertex_shader *vs)
+{
+ tgsi_scan_shader(vs->state.tokens, &vs->info);
+ r300_shader_read_vs_outputs(&vs->info, &vs->outputs);
+}
+
+static void r300_dummy_vertex_shader(
+ struct r300_context* r300,
+ struct r300_vertex_shader* shader)
+{
+ struct ureg_program *ureg;
+ struct ureg_dst dst;
+ struct ureg_src imm;
+
+ /* Make a simple vertex shader which outputs (0, 0, 0, 1),
+ * effectively rendering nothing. */
+ ureg = ureg_create(TGSI_PROCESSOR_VERTEX);
+ dst = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0);
+ imm = ureg_imm4f(ureg, 0, 0, 0, 1);
+
+ ureg_MOV(ureg, dst, imm);
+ ureg_END(ureg);
+
+ shader->state.tokens = tgsi_dup_tokens(ureg_finalize(ureg));
+ ureg_destroy(ureg);
+
+ shader->dummy = TRUE;
+ r300_init_vs_outputs(shader);
+ r300_translate_vertex_shader(r300, shader);
+}
+
+void r300_translate_vertex_shader(struct r300_context *r300,
+ struct r300_vertex_shader *vs)
+{
+ struct r300_vertex_program_compiler compiler;
+ struct tgsi_to_rc ttr;
+
+ /* Setup the compiler */
+ rc_init(&compiler.Base);
+
+ compiler.Base.Debug = DBG_ON(r300, DBG_VP);
+ compiler.code = &vs->code;
+ compiler.UserData = vs;
+ compiler.Base.is_r500 = r300->screen->caps.is_r500;
+ compiler.Base.max_temp_regs = 32;
+
+ if (compiler.Base.Debug) {
+ debug_printf("r300: Initial vertex program\n");
+ tgsi_dump(vs->state.tokens, 0);
+ }
+
+ /* Translate TGSI to our internal representation */
+ ttr.compiler = &compiler.Base;
+ ttr.info = &vs->info;
+ ttr.use_half_swizzles = FALSE;
+
+ r300_tgsi_to_rc(&ttr, vs->state.tokens);
+
+ compiler.RequiredOutputs = ~(~0 << (vs->info.num_outputs + 1));
+ compiler.SetHwInputOutput = &set_vertex_inputs_outputs;
+
+ /* Insert the WPOS output. */
+ rc_copy_output(&compiler.Base, 0, vs->outputs.wpos);
+
+ /* Invoke the compiler */
+ r3xx_compile_vertex_program(&compiler);
+ if (compiler.Base.Error) {
+ /* XXX We should fallback using Draw. */
+ fprintf(stderr, "r300 VP: Compiler error:\n%sUsing a dummy shader"
+ " instead.\nIf there's an 'unknown opcode' message, please"
+ " file a bug report and attach this log.\n", compiler.Base.ErrorMsg);
+
+ if (vs->dummy) {
+ fprintf(stderr, "r300 VP: Cannot compile the dummy shader! "
+ "Giving up...\n");
+ abort();
+ }
+
+ rc_destroy(&compiler.Base);
+ r300_dummy_vertex_shader(r300, vs);
+ return;
+ }
+
+ /* Initialize numbers of constants for each type. */
+ vs->externals_count = ttr.immediate_offset;
+ vs->immediates_count = vs->code.constants.Count - vs->externals_count;
+
+ /* And, finally... */
+ rc_destroy(&compiler.Base);
+}
diff --git a/src/gallium/drivers/r300/r300_vs.h b/src/gallium/drivers/r300/r300_vs.h
new file mode 100644
index 0000000000..170de6c79d
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_vs.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2009 Corbin Simpson <MostAwesomeDude@gmail.com>
+ * Copyright 2009 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef R300_VS_H
+#define R300_VS_H
+
+#include "pipe/p_state.h"
+#include "tgsi/tgsi_scan.h"
+#include "radeon_code.h"
+
+#include "r300_context.h"
+#include "r300_shader_semantics.h"
+
+struct r300_context;
+
+struct r300_vertex_shader {
+ /* Parent class */
+ struct pipe_shader_state state;
+
+ struct tgsi_shader_info info;
+ struct r300_shader_semantics outputs;
+
+ /* Whether the shader was replaced by a dummy one due to a shader
+ * compilation failure. */
+ boolean dummy;
+
+ /* Numbers of constants for each type. */
+ unsigned externals_count;
+ unsigned immediates_count;
+
+ /* HWTCL-specific. */
+ /* Machine code (if translated) */
+ struct r300_vertex_program_code code;
+
+ /* SWTCL-specific. */
+ void *draw_vs;
+};
+
+void r300_init_vs_outputs(struct r300_vertex_shader *vs);
+
+void r300_translate_vertex_shader(struct r300_context *r300,
+ struct r300_vertex_shader *vs);
+
+void r300_draw_init_vertex_shader(struct draw_context *draw,
+ struct r300_vertex_shader *vs);
+
+#endif /* R300_VS_H */
diff --git a/src/gallium/drivers/r300/r300_vs_draw.c b/src/gallium/drivers/r300/r300_vs_draw.c
new file mode 100644
index 0000000000..d64040b891
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_vs_draw.c
@@ -0,0 +1,358 @@
+/**************************************************************************
+ *
+ * Copyright 2009 Marek Olšák <maraeo@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/* This file contains the vertex shader tranformations for SW TCL needed
+ * to overcome the limitations of the r300 rasterizer.
+ *
+ * Transformations:
+ * 1) If the secondary color output is present, the primary color must be
+ * inserted before it.
+ * 2) If any back-face color output is present, there must be all 4 color
+ * outputs and missing ones must be inserted.
+ * 3) Insert a trailing texcoord output containing a copy of POS, for WPOS.
+ *
+ * I know this code is cumbersome, but I don't know of any nicer way
+ * of transforming TGSI shaders. ~ M.
+ */
+
+#include "r300_vs.h"
+
+#include <stdio.h>
+
+#include "tgsi/tgsi_transform.h"
+#include "tgsi/tgsi_dump.h"
+
+#include "draw/draw_context.h"
+
+struct vs_transform_context {
+ struct tgsi_transform_context base;
+
+ boolean color_used[2];
+ boolean bcolor_used[2];
+ boolean temp_used[128];
+
+ /* Index of the pos output, typically 0. */
+ unsigned pos_output;
+ /* Index of the pos temp where all writes of pos are redirected to. */
+ unsigned pos_temp;
+ /* The index of the last generic output, after which we insert a new
+ * output for WPOS. */
+ int last_generic;
+
+ unsigned num_outputs;
+ /* Used to shift output decl. indices when inserting new ones. */
+ unsigned decl_shift;
+ /* Used to remap writes to output decls if their indices changed. */
+ unsigned out_remap[32];
+
+ /* First instruction processed? */
+ boolean first_instruction;
+ /* End instruction processed? */
+ boolean end_instruction;
+};
+
+static void emit_temp(struct tgsi_transform_context *ctx, unsigned reg)
+{
+ struct tgsi_full_declaration decl;
+
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_TEMPORARY;
+ decl.Range.First = decl.Range.Last = reg;
+ ctx->emit_declaration(ctx, &decl);
+}
+
+static void emit_output(struct tgsi_transform_context *ctx,
+ unsigned name, unsigned index, unsigned interp,
+ unsigned reg)
+{
+ struct vs_transform_context *vsctx = (struct vs_transform_context *)ctx;
+ struct tgsi_full_declaration decl;
+
+ decl = tgsi_default_full_declaration();
+ decl.Declaration.File = TGSI_FILE_OUTPUT;
+ decl.Declaration.Interpolate = interp;
+ decl.Declaration.Semantic = TRUE;
+ decl.Semantic.Name = name;
+ decl.Semantic.Index = index;
+ decl.Range.First = decl.Range.Last = reg;
+ ctx->emit_declaration(ctx, &decl);
+ ++vsctx->num_outputs;
+}
+
+static void insert_output(struct tgsi_transform_context *ctx,
+ struct tgsi_full_declaration *before,
+ unsigned name, unsigned index, unsigned interp)
+{
+ struct vs_transform_context *vsctx = (struct vs_transform_context *)ctx;
+ unsigned i;
+
+ /* Make a place for the new output. */
+ for (i = before->Range.First; i < Elements(vsctx->out_remap); i++) {
+ ++vsctx->out_remap[i];
+ }
+
+ /* Insert the new output. */
+ emit_output(ctx, name, index, interp, before->Range.First);
+
+ ++vsctx->decl_shift;
+}
+
+static void insert_trailing_bcolor(struct tgsi_transform_context *ctx,
+ struct tgsi_full_declaration *before)
+{
+ struct vs_transform_context *vsctx = (struct vs_transform_context *)ctx;
+
+ /* If BCOLOR0 is used, make sure BCOLOR1 is present too. Otherwise
+ * the rasterizer doesn't do the color selection correctly. */
+ if (vsctx->bcolor_used[0] && !vsctx->bcolor_used[1]) {
+ if (before) {
+ insert_output(ctx, before, TGSI_SEMANTIC_BCOLOR, 1,
+ TGSI_INTERPOLATE_LINEAR);
+ } else {
+ emit_output(ctx, TGSI_SEMANTIC_BCOLOR, 1,
+ TGSI_INTERPOLATE_LINEAR, vsctx->num_outputs);
+ }
+ vsctx->bcolor_used[1] = TRUE;
+ }
+}
+
+static void transform_decl(struct tgsi_transform_context *ctx,
+ struct tgsi_full_declaration *decl)
+{
+ struct vs_transform_context *vsctx = (struct vs_transform_context *)ctx;
+ unsigned i;
+
+ if (decl->Declaration.File == TGSI_FILE_OUTPUT) {
+ switch (decl->Semantic.Name) {
+ case TGSI_SEMANTIC_POSITION:
+ vsctx->pos_output = decl->Range.First;
+ break;
+
+ case TGSI_SEMANTIC_COLOR:
+ assert(decl->Semantic.Index < 2);
+ vsctx->color_used[decl->Semantic.Index] = TRUE;
+
+ /* We must rasterize the first color if the second one is
+ * used, otherwise the rasterizer doesn't do the color
+ * selection correctly. Declare it, but don't write to it. */
+ if (decl->Semantic.Index == 1 && !vsctx->color_used[0]) {
+ insert_output(ctx, decl, TGSI_SEMANTIC_COLOR, 0,
+ TGSI_INTERPOLATE_LINEAR);
+ vsctx->color_used[0] = TRUE;
+ }
+ break;
+
+ case TGSI_SEMANTIC_BCOLOR:
+ assert(decl->Semantic.Index < 2);
+ vsctx->bcolor_used[decl->Semantic.Index] = TRUE;
+
+ /* We must rasterize all 4 colors if back-face colors are
+ * used, otherwise the rasterizer doesn't do the color
+ * selection correctly. Declare it, but don't write to it. */
+ if (!vsctx->color_used[0]) {
+ insert_output(ctx, decl, TGSI_SEMANTIC_COLOR, 0,
+ TGSI_INTERPOLATE_LINEAR);
+ vsctx->color_used[0] = TRUE;
+ }
+ if (!vsctx->color_used[1]) {
+ insert_output(ctx, decl, TGSI_SEMANTIC_COLOR, 1,
+ TGSI_INTERPOLATE_LINEAR);
+ vsctx->color_used[1] = TRUE;
+ }
+ if (decl->Semantic.Index == 1 && !vsctx->bcolor_used[0]) {
+ insert_output(ctx, decl, TGSI_SEMANTIC_BCOLOR, 0,
+ TGSI_INTERPOLATE_LINEAR);
+ vsctx->color_used[2] = TRUE;
+ }
+ /* One more case is handled in insert_trailing_bcolor. */
+ break;
+
+ case TGSI_SEMANTIC_GENERIC:
+ vsctx->last_generic = MAX2(vsctx->last_generic, decl->Semantic.Index);
+ break;
+ }
+
+ if (decl->Semantic.Name != TGSI_SEMANTIC_BCOLOR) {
+ /* Insert it as soon as possible. */
+ insert_trailing_bcolor(ctx, decl);
+ }
+
+ /* Since we're inserting new outputs in between, the following outputs
+ * should be moved to the right so that they don't overlap with
+ * the newly added ones. */
+ decl->Range.First += vsctx->decl_shift;
+ decl->Range.Last += vsctx->decl_shift;
+
+ ++vsctx->num_outputs;
+ } else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
+ for (i = decl->Range.First; i <= decl->Range.Last; i++) {
+ vsctx->temp_used[i] = TRUE;
+ }
+ }
+
+ ctx->emit_declaration(ctx, decl);
+}
+
+static void transform_inst(struct tgsi_transform_context *ctx,
+ struct tgsi_full_instruction *inst)
+{
+ struct vs_transform_context *vsctx = (struct vs_transform_context *) ctx;
+ struct tgsi_full_instruction new_inst;
+ unsigned i;
+
+ if (!vsctx->first_instruction) {
+ vsctx->first_instruction = TRUE;
+
+ /* The trailing BCOLOR should be inserted before the code
+ * if it hasn't already been done so. */
+ insert_trailing_bcolor(ctx, NULL);
+
+ /* Insert the generic output for WPOS. */
+ emit_output(ctx, TGSI_SEMANTIC_GENERIC, vsctx->last_generic + 1,
+ TGSI_INTERPOLATE_PERSPECTIVE, vsctx->num_outputs);
+
+ /* Find a free temp for POSITION. */
+ for (i = 0; i < Elements(vsctx->temp_used); i++) {
+ if (!vsctx->temp_used[i]) {
+ emit_temp(ctx, i);
+ vsctx->pos_temp = i;
+ break;
+ }
+ }
+ }
+
+ if (inst->Instruction.Opcode == TGSI_OPCODE_END) {
+ /* MOV OUT[pos_output], TEMP[pos_temp]; */
+ new_inst = tgsi_default_full_instruction();
+ new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
+ new_inst.Instruction.NumDstRegs = 1;
+ new_inst.Dst[0].Register.File = TGSI_FILE_OUTPUT;
+ new_inst.Dst[0].Register.Index = vsctx->pos_output;
+ new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
+ new_inst.Instruction.NumSrcRegs = 1;
+ new_inst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
+ new_inst.Src[0].Register.Index = vsctx->pos_temp;
+ ctx->emit_instruction(ctx, &new_inst);
+
+ /* MOV OUT[n-1], TEMP[pos_temp]; */
+ new_inst = tgsi_default_full_instruction();
+ new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
+ new_inst.Instruction.NumDstRegs = 1;
+ new_inst.Dst[0].Register.File = TGSI_FILE_OUTPUT;
+ new_inst.Dst[0].Register.Index = vsctx->num_outputs - 1;
+ new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
+ new_inst.Instruction.NumSrcRegs = 1;
+ new_inst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
+ new_inst.Src[0].Register.Index = vsctx->pos_temp;
+ ctx->emit_instruction(ctx, &new_inst);
+
+ vsctx->end_instruction = TRUE;
+ } else {
+ /* Not an END instruction. */
+ /* Fix writes to outputs. */
+ for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
+ struct tgsi_full_dst_register *dst = &inst->Dst[i];
+ if (dst->Register.File == TGSI_FILE_OUTPUT) {
+ if (dst->Register.Index == vsctx->pos_output) {
+ /* Replace writes to OUT[pos_output] with TEMP[pos_temp]. */
+ dst->Register.File = TGSI_FILE_TEMPORARY;
+ dst->Register.Index = vsctx->pos_temp;
+ } else {
+ /* Not a position, good...
+ * Since we were changing the indices of output decls,
+ * we must redirect writes into them too. */
+ dst->Register.Index = vsctx->out_remap[dst->Register.Index];
+ }
+ }
+ }
+
+ /* Inserting 2 instructions before the END opcode moves all following
+ * labels by 2. Subroutines are always after the END opcode so
+ * they're always moved. */
+ if (inst->Instruction.Opcode == TGSI_OPCODE_CAL) {
+ inst->Label.Label += 2;
+ }
+ /* The labels of the following opcodes are moved only after
+ * the END opcode. */
+ if (vsctx->end_instruction &&
+ (inst->Instruction.Opcode == TGSI_OPCODE_IF ||
+ inst->Instruction.Opcode == TGSI_OPCODE_ELSE ||
+ inst->Instruction.Opcode == TGSI_OPCODE_BGNLOOP ||
+ inst->Instruction.Opcode == TGSI_OPCODE_ENDLOOP)) {
+ inst->Label.Label += 2;
+ }
+ }
+
+ ctx->emit_instruction(ctx, inst);
+}
+
+void r300_draw_init_vertex_shader(struct draw_context *draw,
+ struct r300_vertex_shader *vs)
+{
+ struct pipe_shader_state new_vs;
+ struct vs_transform_context transform;
+ const uint newLen = tgsi_num_tokens(vs->state.tokens) + 100 /* XXX */;
+ unsigned i;
+
+ new_vs.tokens = tgsi_alloc_tokens(newLen);
+ if (new_vs.tokens == NULL)
+ return;
+
+ memset(&transform, 0, sizeof(transform));
+ for (i = 0; i < Elements(transform.out_remap); i++) {
+ transform.out_remap[i] = i;
+ }
+ transform.last_generic = -1;
+ transform.base.transform_instruction = transform_inst;
+ transform.base.transform_declaration = transform_decl;
+
+ tgsi_transform_shader(vs->state.tokens,
+ (struct tgsi_token*)new_vs.tokens,
+ newLen, &transform.base);
+
+#if 0
+ printf("----------------------------------------------\norig shader:\n");
+ tgsi_dump(vs->state.tokens, 0);
+ printf("----------------------------------------------\nnew shader:\n");
+ tgsi_dump(new_vs.tokens, 0);
+ printf("----------------------------------------------\n");
+#endif
+
+ /* Free old tokens. */
+ FREE((void*)vs->state.tokens);
+
+ vs->draw_vs = draw_create_vertex_shader(draw, &new_vs);
+
+ /* Instead of duplicating and freeing the tokens, copy the pointer directly. */
+ vs->state.tokens = new_vs.tokens;
+
+ /* Init the VS output table for the rasterizer. */
+ r300_init_vs_outputs(vs);
+
+ /* Make the last generic be WPOS. */
+ vs->outputs.wpos = vs->outputs.generic[transform.last_generic + 1];
+ vs->outputs.generic[transform.last_generic + 1] = ATTR_UNUSED;
+}
diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h
new file mode 100644
index 0000000000..77c1c13ef9
--- /dev/null
+++ b/src/gallium/drivers/r300/r300_winsys.h
@@ -0,0 +1,168 @@
+/*
+ * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef R300_WINSYS_H
+#define R300_WINSYS_H
+
+/* The public interface header for the r300 pipe driver.
+ * Any winsys hosting this pipe needs to implement r300_winsys and then
+ * call r300_create_screen to start things. */
+
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "r300_defines.h"
+
+struct r300_winsys_buffer;
+
+enum r300_value_id {
+ R300_VID_PCI_ID,
+ R300_VID_GB_PIPES,
+ R300_VID_Z_PIPES,
+ R300_VID_SQUARE_TILING_SUPPORT,
+ R300_VID_DRM_2_3_0,
+};
+
+enum r300_reference_domain { /* bitfield */
+ R300_REF_CS = 1,
+ R300_REF_HW = 2
+};
+
+struct r300_winsys_screen {
+ void (*destroy)(struct r300_winsys_screen *ws);
+
+ /**
+ * Buffer management. Buffer attributes are mostly fixed over its lifetime.
+ *
+ * Remember that gallium gets to choose the interface it needs, and the
+ * window systems must then implement that interface (rather than the
+ * other way around...).
+ *
+ * usage is a bitmask of R300_WINSYS_BUFFER_USAGE_PIXEL/VERTEX/INDEX/CONSTANT. This
+ * usage argument is only an optimization hint, not a guarantee, therefore
+ * proper behavior must be observed in all circumstances.
+ *
+ * alignment indicates the client's alignment requirements, eg for
+ * SSE instructions.
+ */
+ struct r300_winsys_buffer *(*buffer_create)(struct r300_winsys_screen *ws,
+ unsigned alignment,
+ unsigned usage,
+ enum r300_buffer_domain domain,
+ unsigned size);
+
+ /**
+ * Map the entire data store of a buffer object into the client's address.
+ * flags is bitmask of R300_WINSYS_BUFFER_USAGE_CPU_READ/WRITE flags.
+ */
+ void *(*buffer_map)( struct r300_winsys_screen *ws,
+ struct r300_winsys_buffer *buf,
+ unsigned usage);
+
+ void (*buffer_unmap)( struct r300_winsys_screen *ws,
+ struct r300_winsys_buffer *buf );
+
+ void (*buffer_destroy)( struct r300_winsys_buffer *buf );
+
+
+ void (*buffer_reference)(struct r300_winsys_screen *rws,
+ struct r300_winsys_buffer **pdst,
+ struct r300_winsys_buffer *src);
+
+ void (*buffer_wait)(struct r300_winsys_screen *rws,
+ struct r300_winsys_buffer *buf);
+
+ /* Add a pipe_resource to the list of buffer objects to validate. */
+ boolean (*add_buffer)(struct r300_winsys_screen *winsys,
+ struct r300_winsys_buffer *buf,
+ enum r300_buffer_domain rd,
+ enum r300_buffer_domain wd);
+
+
+ /* Revalidate all currently setup pipe_buffers.
+ * Returns TRUE if a flush is required. */
+ boolean (*validate)(struct r300_winsys_screen* winsys);
+
+ /* Return the number of free dwords in CS. */
+ unsigned (*get_cs_free_dwords)(struct r300_winsys_screen *winsys);
+
+ /* Return the pointer to the first free dword in CS and assume a pipe
+ * driver wants to fill "count" dwords. */
+ uint32_t *(*get_cs_pointer)(struct r300_winsys_screen *winsys,
+ unsigned count);
+
+ /* Write a dword to the command buffer. */
+ void (*write_cs_dword)(struct r300_winsys_screen* winsys, uint32_t dword);
+
+ /* Write a table of dwords to the command buffer. */
+ void (*write_cs_table)(struct r300_winsys_screen* winsys,
+ const void *dwords, unsigned count);
+
+ /* Write a relocated dword to the command buffer. */
+ void (*write_cs_reloc)(struct r300_winsys_screen *winsys,
+ struct r300_winsys_buffer *buf,
+ enum r300_buffer_domain rd,
+ enum r300_buffer_domain wd,
+ uint32_t flags);
+
+ /* Flush the CS. */
+ void (*flush_cs)(struct r300_winsys_screen* winsys);
+
+ /* winsys flush - callback from winsys when flush required */
+ void (*set_flush_cb)(struct r300_winsys_screen *winsys,
+ void (*flush_cb)(void *), void *data);
+
+ void (*reset_bos)(struct r300_winsys_screen *winsys);
+
+ void (*buffer_get_tiling)(struct r300_winsys_screen *winsys,
+ struct r300_winsys_buffer *buffer,
+ enum r300_buffer_tiling *microtiled,
+ enum r300_buffer_tiling *macrotiled);
+
+ void (*buffer_set_tiling)(struct r300_winsys_screen *winsys,
+ struct r300_winsys_buffer *buffer,
+ uint32_t pitch,
+ enum r300_buffer_tiling microtiled,
+ enum r300_buffer_tiling macrotiled);
+
+ uint32_t (*get_value)(struct r300_winsys_screen *winsys,
+ enum r300_value_id vid);
+
+ struct r300_winsys_buffer *(*buffer_from_handle)(struct r300_winsys_screen *winsys,
+ unsigned handle);
+
+ boolean (*buffer_get_handle)(struct r300_winsys_screen *winsys,
+ struct r300_winsys_buffer *buffer,
+ struct winsys_handle *whandle);
+
+ boolean (*is_buffer_referenced)(struct r300_winsys_screen *winsys,
+ struct r300_winsys_buffer *buffer,
+ enum r300_reference_domain domain);
+};
+
+struct r300_winsys_screen *
+r300_winsys_screen(struct pipe_screen *screen);
+
+/* Creates a new r300 screen. */
+struct pipe_screen* r300_create_screen(struct r300_winsys_screen *rws);
+
+#endif /* R300_WINSYS_H */