summaryrefslogtreecommitdiff
path: root/src/mesa/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'src/mesa/drivers')
-rw-r--r--src/mesa/drivers/dri/common/dri_metaops.c1
-rw-r--r--src/mesa/drivers/dri/i915/Makefile2
-rw-r--r--src/mesa/drivers/dri/i965/Makefile2
-rw-r--r--src/mesa/drivers/dri/i965/brw_state.h124
-rw-r--r--src/mesa/drivers/dri/i965/brw_util.c2
-rw-r--r--src/mesa/drivers/dri/intel/intel_context.c30
-rw-r--r--src/mesa/drivers/dri/intel/intel_extensions_es2.c1
-rw-r--r--src/mesa/drivers/dri/intel/intel_pixel_bitmap.c1
-rw-r--r--src/mesa/drivers/dri/intel/intel_regions.c6
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex_copy.c2
-rw-r--r--src/mesa/drivers/dri/intel/intel_tex_format.c36
-rw-r--r--src/mesa/drivers/dri/intel/server/i830_dri.h62
-rw-r--r--src/mesa/drivers/dri/intel/server/intel.h331
-rw-r--r--src/mesa/drivers/dri/mach64/mach64_ioctl.h3
-rw-r--r--src/mesa/drivers/dri/nouveau/nouveau_fbo.c7
-rw-r--r--src/mesa/drivers/dri/nouveau/nouveau_texture.c57
-rw-r--r--src/mesa/drivers/dri/nouveau/nv20_state_fb.c5
-rw-r--r--src/mesa/drivers/dri/nouveau/nv20_state_tex.c3
-rw-r--r--src/mesa/drivers/dri/r200/r200_swtcl.c2
-rw-r--r--src/mesa/drivers/dri/r200/r200_tcl.c2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c8
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c9
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog.c26
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog.h2
-rw-r--r--src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c114
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_code.h3
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c39
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c251
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h9
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_optimize.c3
-rw-r--r--src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c23
-rw-r--r--src/mesa/drivers/dri/r300/r300_render.c2
-rw-r--r--src/mesa/drivers/dri/r600/r600_blit.c20
-rw-r--r--src/mesa/drivers/dri/r600/r700_assembler.c149
-rw-r--r--src/mesa/drivers/dri/r600/r700_chip.c37
-rw-r--r--src/mesa/drivers/dri/r600/r700_clear.c3
-rw-r--r--src/mesa/drivers/dri/r600/r700_render.c27
-rw-r--r--src/mesa/drivers/dri/r600/r700_vertprog.c6
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_common.c15
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_common_context.c58
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_common_context.h8
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_pixel_read.c3
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_screen.c74
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_screen.h7
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_span.c192
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_swtcl.c2
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_tcl.c2
-rw-r--r--src/mesa/drivers/dri/radeon/radeon_tex_copy.c6
48 files changed, 1034 insertions, 743 deletions
diff --git a/src/mesa/drivers/dri/common/dri_metaops.c b/src/mesa/drivers/dri/common/dri_metaops.c
index 86e59a8e51..a2f404b616 100644
--- a/src/mesa/drivers/dri/common/dri_metaops.c
+++ b/src/mesa/drivers/dri/common/dri_metaops.c
@@ -29,6 +29,7 @@
#include "main/arbprogram.h"
#include "main/arrayobj.h"
#include "main/bufferobj.h"
+#include "main/context.h"
#include "main/enable.h"
#include "main/matrix.h"
#include "main/texstate.h"
diff --git a/src/mesa/drivers/dri/i915/Makefile b/src/mesa/drivers/dri/i915/Makefile
index 71ee753748..65fd658c04 100644
--- a/src/mesa/drivers/dri/i915/Makefile
+++ b/src/mesa/drivers/dri/i915/Makefile
@@ -56,7 +56,7 @@ C_SOURCES = \
ASM_SOURCES =
-DRIVER_DEFINES = -I../intel -I../intel/server -DI915 \
+DRIVER_DEFINES = -I../intel -DI915 \
$(shell pkg-config libdrm --atleast-version=2.3.1 \
&& echo "-DDRM_VBLANK_FLIP=DRM_VBLANK_FLIP")
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile
index 831981558d..e381a5c714 100644
--- a/src/mesa/drivers/dri/i965/Makefile
+++ b/src/mesa/drivers/dri/i965/Makefile
@@ -106,7 +106,7 @@ C_SOURCES = \
ASM_SOURCES =
-DRIVER_DEFINES = -I../intel -I../intel/server
+DRIVER_DEFINES = -I../intel
INCLUDES += $(INTEL_CFLAGS)
DRI_LIB_DEPS += $(INTEL_LIBS)
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 40eece276b..af08446f2d 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -46,68 +46,68 @@ brw_add_validated_bo(struct brw_context *brw, drm_intel_bo *bo)
}
};
-const struct brw_tracked_state brw_blend_constant_color;
-const struct brw_tracked_state brw_cc_unit;
-const struct brw_tracked_state brw_check_fallback;
-const struct brw_tracked_state brw_clip_prog;
-const struct brw_tracked_state brw_clip_unit;
-const struct brw_tracked_state brw_vs_constants;
-const struct brw_tracked_state brw_wm_constants;
-const struct brw_tracked_state brw_constant_buffer;
-const struct brw_tracked_state brw_curbe_offsets;
-const struct brw_tracked_state brw_invarient_state;
-const struct brw_tracked_state brw_gs_prog;
-const struct brw_tracked_state brw_gs_unit;
-const struct brw_tracked_state brw_line_stipple;
-const struct brw_tracked_state brw_aa_line_parameters;
-const struct brw_tracked_state brw_pipelined_state_pointers;
-const struct brw_tracked_state brw_binding_table_pointers;
-const struct brw_tracked_state brw_depthbuffer;
-const struct brw_tracked_state brw_polygon_stipple_offset;
-const struct brw_tracked_state brw_polygon_stipple;
-const struct brw_tracked_state brw_program_parameters;
-const struct brw_tracked_state brw_recalculate_urb_fence;
-const struct brw_tracked_state brw_sf_prog;
-const struct brw_tracked_state brw_sf_unit;
-const struct brw_tracked_state brw_sf_vp;
-const struct brw_tracked_state brw_state_base_address;
-const struct brw_tracked_state brw_urb_fence;
-const struct brw_tracked_state brw_vertex_state;
-const struct brw_tracked_state brw_vs_surfaces;
-const struct brw_tracked_state brw_vs_prog;
-const struct brw_tracked_state brw_vs_unit;
-const struct brw_tracked_state brw_wm_input_sizes;
-const struct brw_tracked_state brw_wm_prog;
-const struct brw_tracked_state brw_wm_samplers;
-const struct brw_tracked_state brw_wm_constant_surface;
-const struct brw_tracked_state brw_wm_surfaces;
-const struct brw_tracked_state brw_wm_binding_table;
-const struct brw_tracked_state brw_wm_unit;
-
-const struct brw_tracked_state brw_psp_urb_cbs;
-
-const struct brw_tracked_state brw_pipe_control;
-
-const struct brw_tracked_state brw_drawing_rect;
-const struct brw_tracked_state brw_indices;
-const struct brw_tracked_state brw_vertices;
-const struct brw_tracked_state brw_index_buffer;
-const struct brw_tracked_state gen6_binding_table_pointers;
-const struct brw_tracked_state gen6_blend_state;
-const struct brw_tracked_state gen6_cc_state_pointers;
-const struct brw_tracked_state gen6_clip_state;
-const struct brw_tracked_state gen6_clip_vp;
-const struct brw_tracked_state gen6_color_calc_state;
-const struct brw_tracked_state gen6_depth_stencil_state;
-const struct brw_tracked_state gen6_gs_state;
-const struct brw_tracked_state gen6_sampler_state;
-const struct brw_tracked_state gen6_scissor_state;
-const struct brw_tracked_state gen6_sf_state;
-const struct brw_tracked_state gen6_sf_vp;
-const struct brw_tracked_state gen6_urb;
-const struct brw_tracked_state gen6_viewport_state;
-const struct brw_tracked_state gen6_vs_state;
-const struct brw_tracked_state gen6_wm_state;
+extern const struct brw_tracked_state brw_blend_constant_color;
+extern const struct brw_tracked_state brw_cc_unit;
+extern const struct brw_tracked_state brw_check_fallback;
+extern const struct brw_tracked_state brw_clip_prog;
+extern const struct brw_tracked_state brw_clip_unit;
+extern const struct brw_tracked_state brw_vs_constants;
+extern const struct brw_tracked_state brw_wm_constants;
+extern const struct brw_tracked_state brw_constant_buffer;
+extern const struct brw_tracked_state brw_curbe_offsets;
+extern const struct brw_tracked_state brw_invarient_state;
+extern const struct brw_tracked_state brw_gs_prog;
+extern const struct brw_tracked_state brw_gs_unit;
+extern const struct brw_tracked_state brw_line_stipple;
+extern const struct brw_tracked_state brw_aa_line_parameters;
+extern const struct brw_tracked_state brw_pipelined_state_pointers;
+extern const struct brw_tracked_state brw_binding_table_pointers;
+extern const struct brw_tracked_state brw_depthbuffer;
+extern const struct brw_tracked_state brw_polygon_stipple_offset;
+extern const struct brw_tracked_state brw_polygon_stipple;
+extern const struct brw_tracked_state brw_program_parameters;
+extern const struct brw_tracked_state brw_recalculate_urb_fence;
+extern const struct brw_tracked_state brw_sf_prog;
+extern const struct brw_tracked_state brw_sf_unit;
+extern const struct brw_tracked_state brw_sf_vp;
+extern const struct brw_tracked_state brw_state_base_address;
+extern const struct brw_tracked_state brw_urb_fence;
+extern const struct brw_tracked_state brw_vertex_state;
+extern const struct brw_tracked_state brw_vs_surfaces;
+extern const struct brw_tracked_state brw_vs_prog;
+extern const struct brw_tracked_state brw_vs_unit;
+extern const struct brw_tracked_state brw_wm_input_sizes;
+extern const struct brw_tracked_state brw_wm_prog;
+extern const struct brw_tracked_state brw_wm_samplers;
+extern const struct brw_tracked_state brw_wm_constant_surface;
+extern const struct brw_tracked_state brw_wm_surfaces;
+extern const struct brw_tracked_state brw_wm_binding_table;
+extern const struct brw_tracked_state brw_wm_unit;
+
+extern const struct brw_tracked_state brw_psp_urb_cbs;
+
+extern const struct brw_tracked_state brw_pipe_control;
+
+extern const struct brw_tracked_state brw_drawing_rect;
+extern const struct brw_tracked_state brw_indices;
+extern const struct brw_tracked_state brw_vertices;
+extern const struct brw_tracked_state brw_index_buffer;
+extern const struct brw_tracked_state gen6_binding_table_pointers;
+extern const struct brw_tracked_state gen6_blend_state;
+extern const struct brw_tracked_state gen6_cc_state_pointers;
+extern const struct brw_tracked_state gen6_clip_state;
+extern const struct brw_tracked_state gen6_clip_vp;
+extern const struct brw_tracked_state gen6_color_calc_state;
+extern const struct brw_tracked_state gen6_depth_stencil_state;
+extern const struct brw_tracked_state gen6_gs_state;
+extern const struct brw_tracked_state gen6_sampler_state;
+extern const struct brw_tracked_state gen6_scissor_state;
+extern const struct brw_tracked_state gen6_sf_state;
+extern const struct brw_tracked_state gen6_sf_vp;
+extern const struct brw_tracked_state gen6_urb;
+extern const struct brw_tracked_state gen6_viewport_state;
+extern const struct brw_tracked_state gen6_vs_state;
+extern const struct brw_tracked_state gen6_wm_state;
/***********************************************************************
* brw_state.c
diff --git a/src/mesa/drivers/dri/i965/brw_util.c b/src/mesa/drivers/dri/i965/brw_util.c
index 1db2a210d4..e878da3850 100644
--- a/src/mesa/drivers/dri/i965/brw_util.c
+++ b/src/mesa/drivers/dri/i965/brw_util.c
@@ -30,6 +30,8 @@
*/
+#include <assert.h>
+
#include "main/mtypes.h"
#include "program/prog_parameter.h"
#include "brw_util.h"
diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c
index 5f2035d79c..e19f44035f 100644
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -29,6 +29,7 @@
#include "main/glheader.h"
#include "main/context.h"
#include "main/extensions.h"
+#include "main/fbobject.h"
#include "main/framebuffer.h"
#include "main/imports.h"
#include "main/points.h"
@@ -39,8 +40,6 @@
#include "drivers/common/driverfuncs.h"
#include "drivers/common/meta.h"
-#include "i830_dri.h"
-
#include "intel_chipset.h"
#include "intel_buffers.h"
#include "intel_tex.h"
@@ -420,7 +419,7 @@ intel_prepare_render(struct intel_context *intel)
__DRIdrawable *drawable;
drawable = driContext->driDrawablePriv;
- if (drawable->dri2.stamp != driContext->dri2.draw_stamp) {
+ if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
if (drawable->lastStamp != drawable->dri2.stamp)
intel_update_renderbuffers(driContext, drawable);
intel_draw_buffer(&intel->ctx, intel->ctx.DrawBuffer);
@@ -428,7 +427,7 @@ intel_prepare_render(struct intel_context *intel)
}
drawable = driContext->driReadablePriv;
- if (drawable->dri2.stamp != driContext->dri2.read_stamp) {
+ if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
if (drawable->lastStamp != drawable->dri2.stamp)
intel_update_renderbuffers(driContext, drawable);
driContext->dri2.read_stamp = drawable->dri2.stamp;
@@ -613,6 +612,7 @@ intelInitContext(struct intel_context *intel,
__DRIscreen *sPriv = driContextPriv->driScreenPriv;
struct intel_screen *intelScreen = sPriv->private;
int bo_reuse_mode;
+ __GLcontextModes visual;
/* we can't do anything without a connection to the device */
if (intelScreen->bufmgr == NULL)
@@ -624,6 +624,11 @@ intelInitContext(struct intel_context *intel,
functions->Viewport = intel_viewport;
}
+ if (mesaVis == NULL) {
+ memset(&visual, 0, sizeof visual);
+ mesaVis = &visual;
+ }
+
if (!_mesa_initialize_context_for_api(&intel->ctx, api, mesaVis, shareCtx,
functions, (void *) intel)) {
printf("%s: failed to init mesa context\n", __FUNCTION__);
@@ -890,14 +895,21 @@ intelMakeCurrent(__DRIcontext * driContextPriv,
}
if (driContextPriv) {
- struct gl_framebuffer *fb = driDrawPriv->driverPrivate;
- struct gl_framebuffer *readFb = driReadPriv->driverPrivate;
+ struct gl_framebuffer *fb, *readFb;
+
+ if (driDrawPriv == NULL && driReadPriv == NULL) {
+ fb = _mesa_get_incomplete_framebuffer();
+ readFb = _mesa_get_incomplete_framebuffer();
+ } else {
+ fb = driDrawPriv->driverPrivate;
+ readFb = driReadPriv->driverPrivate;
+ driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
+ driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
+ }
- driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
- driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
intel_prepare_render(intel);
_mesa_make_current(&intel->ctx, fb, readFb);
-
+
/* We do this in intel_prepare_render() too, but intel->ctx.DrawBuffer
* is NULL at that point. We can't call _mesa_makecurrent()
* first, since we need the buffer size for the initial
diff --git a/src/mesa/drivers/dri/intel/intel_extensions_es2.c b/src/mesa/drivers/dri/intel/intel_extensions_es2.c
index baf8e13001..de34bbb2ae 100644
--- a/src/mesa/drivers/dri/intel/intel_extensions_es2.c
+++ b/src/mesa/drivers/dri/intel/intel_extensions_es2.c
@@ -28,7 +28,6 @@
#include "main/extensions.h"
#include "intel_extensions.h"
-#include "utils.h"
static const char *es2_extensions[] = {
/* Used by mesa internally (cf all_mesa_extensions in ../common/utils.c) */
diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
index 0e2fe893fe..02c0ffce31 100644
--- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
+++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c
@@ -45,6 +45,7 @@
#include "main/attrib.h"
#include "main/enable.h"
#include "main/viewport.h"
+#include "main/context.h"
#include "swrast/swrast.h"
#include "intel_screen.h"
diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c
index fe4de18960..680d18ba29 100644
--- a/src/mesa/drivers/dri/intel/intel_regions.c
+++ b/src/mesa/drivers/dri/intel/intel_regions.c
@@ -155,6 +155,9 @@ intel_region_alloc_internal(struct intel_context *intel,
}
region = calloc(sizeof(*region), 1);
+ if (region == NULL)
+ return region;
+
region->cpp = cpp;
region->width = width;
region->height = height;
@@ -189,6 +192,9 @@ intel_region_alloc(struct intel_context *intel,
region = intel_region_alloc_internal(intel, cpp, width, height,
aligned_pitch / cpp, buffer);
+ if (region == NULL)
+ return region;
+
region->tiling = tiling;
return region;
diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c
index 224b506c05..6efb2ddc55 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_copy.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c
@@ -102,7 +102,7 @@ do_copy_texsubimage(struct intel_context *intel,
GLcontext *ctx = &intel->ctx;
const struct intel_region *src = get_teximage_source(intel, internalFormat);
- if (!intelImage->mt || !src) {
+ if (!intelImage->mt || !src || !src->buffer) {
if (INTEL_DEBUG & DEBUG_FALLBACKS)
fprintf(stderr, "%s fail %p %p (0x%08x)\n",
__FUNCTION__, intelImage->mt, src, internalFormat);
diff --git a/src/mesa/drivers/dri/intel/intel_tex_format.c b/src/mesa/drivers/dri/intel/intel_tex_format.c
index 5f813c0efa..e03b203fb4 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_format.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_format.c
@@ -19,7 +19,6 @@ intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat,
GLenum format, GLenum type)
{
struct intel_context *intel = intel_context(ctx);
- const GLboolean do32bpt = (intel->ctx.Visual.rgbBits >= 24);
#if 0
printf("%s intFmt=0x%x format=0x%x type=0x%x\n",
@@ -30,39 +29,28 @@ intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat,
case 4:
case GL_RGBA:
case GL_COMPRESSED_RGBA:
- if (format == GL_BGRA) {
- if (type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) {
- return MESA_FORMAT_ARGB8888;
- }
- else if (type == GL_UNSIGNED_SHORT_4_4_4_4_REV) {
- return MESA_FORMAT_ARGB4444;
- }
- else if (type == GL_UNSIGNED_SHORT_1_5_5_5_REV) {
- return MESA_FORMAT_ARGB1555;
- }
- }
- return do32bpt ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_ARGB4444;
+ if (type == GL_UNSIGNED_SHORT_4_4_4_4_REV)
+ return MESA_FORMAT_ARGB4444;
+ else if (type == GL_UNSIGNED_SHORT_1_5_5_5_REV)
+ return MESA_FORMAT_ARGB1555;
+ else
+ return MESA_FORMAT_ARGB8888;
case 3:
case GL_RGB:
case GL_COMPRESSED_RGB:
- if (format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5) {
- return MESA_FORMAT_RGB565;
- }
- if (do32bpt) {
- if (intel->has_xrgb_textures)
- return MESA_FORMAT_XRGB8888;
- else
- return MESA_FORMAT_ARGB8888;
- } else {
+ if (type == GL_UNSIGNED_SHORT_5_6_5)
return MESA_FORMAT_RGB565;
- }
+ else if (intel->has_xrgb_textures)
+ return MESA_FORMAT_XRGB8888;
+ else
+ return MESA_FORMAT_ARGB8888;
case GL_RGBA8:
case GL_RGB10_A2:
case GL_RGBA12:
case GL_RGBA16:
- return do32bpt ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_ARGB4444;
+ return MESA_FORMAT_ARGB8888;
case GL_RGBA4:
case GL_RGBA2:
diff --git a/src/mesa/drivers/dri/intel/server/i830_dri.h b/src/mesa/drivers/dri/intel/server/i830_dri.h
deleted file mode 100644
index def049e7a6..0000000000
--- a/src/mesa/drivers/dri/intel/server/i830_dri.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/* $XFree86: xc/programs/Xserver/hw/xfree86/drivers/i810/i830_dri.h,v 1.6 2003/09/28 20:15:59 alanh Exp $ */
-
-#ifndef _I830_DRI_H
-#define _I830_DRI_H
-
-#include "xf86drm.h"
-
-#define I830_MAX_DRAWABLES 256
-
-#define I830_MAJOR_VERSION 1
-#define I830_MINOR_VERSION 9
-#define I830_PATCHLEVEL 0
-
-#define I830_REG_SIZE 0x80000
-
-typedef struct _I830DRIRec {
- drm_handle_t regs;
- drmSize regsSize;
-
- drmSize unused1; /* backbufferSize */
- drm_handle_t unused2; /* backbuffer */
-
- drmSize unused3; /* depthbufferSize */
- drm_handle_t unused4; /* depthbuffer */
-
- drmSize unused5; /* rotatedSize */
- drm_handle_t unused6; /* rotatedbuffer */
-
- drm_handle_t unused7; /* textures */
- int unused8; /* textureSize */
-
- drm_handle_t unused9; /* agp_buffers */
- drmSize unused10; /* agp_buf_size */
-
- int deviceID;
- int width;
- int height;
- int mem;
- int cpp;
- int bitsPerPixel;
-
- int unused11[8]; /* was front/back/depth/rotated offset/pitch */
-
- int unused12; /* logTextureGranularity */
- int unused13; /* textureOffset */
-
- int irq;
- int sarea_priv_offset;
-} I830DRIRec, *I830DRIPtr;
-
-typedef struct {
- /* Nothing here yet */
- int dummy;
-} I830ConfigPrivRec, *I830ConfigPrivPtr;
-
-typedef struct {
- /* Nothing here yet */
- int dummy;
-} I830DRIContextRec, *I830DRIContextPtr;
-
-
-#endif
diff --git a/src/mesa/drivers/dri/intel/server/intel.h b/src/mesa/drivers/dri/intel/server/intel.h
deleted file mode 100644
index 6ea72499c1..0000000000
--- a/src/mesa/drivers/dri/intel/server/intel.h
+++ /dev/null
@@ -1,331 +0,0 @@
-#ifndef _INTEL_H_
-#define _INTEL_H_
-
-#include "xf86drm.h" /* drm_handle_t, etc */
-
-/* Intel */
-#ifndef PCI_CHIP_I810
-#define PCI_CHIP_I810 0x7121
-#define PCI_CHIP_I810_DC100 0x7123
-#define PCI_CHIP_I810_E 0x7125
-#define PCI_CHIP_I815 0x1132
-#define PCI_CHIP_I810_BRIDGE 0x7120
-#define PCI_CHIP_I810_DC100_BRIDGE 0x7122
-#define PCI_CHIP_I810_E_BRIDGE 0x7124
-#define PCI_CHIP_I815_BRIDGE 0x1130
-#endif
-
-#define PCI_CHIP_845_G 0x2562
-#define PCI_CHIP_I830_M 0x3577
-
-#ifndef PCI_CHIP_I855_GM
-#define PCI_CHIP_I855_GM 0x3582
-#define PCI_CHIP_I855_GM_BRIDGE 0x3580
-#endif
-
-#ifndef PCI_CHIP_I865_G
-#define PCI_CHIP_I865_G 0x2572
-#define PCI_CHIP_I865_G_BRIDGE 0x2570
-#endif
-
-#ifndef PCI_CHIP_I915_G
-#define PCI_CHIP_I915_G 0x2582
-#define PCI_CHIP_I915_G_BRIDGE 0x2580
-#endif
-
-#ifndef PCI_CHIP_I915_GM
-#define PCI_CHIP_I915_GM 0x2592
-#define PCI_CHIP_I915_GM_BRIDGE 0x2590
-#endif
-
-#ifndef PCI_CHIP_E7221_G
-#define PCI_CHIP_E7221_G 0x258A
-/* Same as I915_G_BRIDGE */
-#define PCI_CHIP_E7221_G_BRIDGE 0x2580
-#endif
-
-#ifndef PCI_CHIP_I945_G
-#define PCI_CHIP_I945_G 0x2772
-#define PCI_CHIP_I945_G_BRIDGE 0x2770
-#endif
-
-#ifndef PCI_CHIP_I945_GM
-#define PCI_CHIP_I945_GM 0x27A2
-#define PCI_CHIP_I945_GM_BRIDGE 0x27A0
-#endif
-
-#define IS_I810(pI810) (pI810->Chipset == PCI_CHIP_I810 || \
- pI810->Chipset == PCI_CHIP_I810_DC100 || \
- pI810->Chipset == PCI_CHIP_I810_E)
-#define IS_I815(pI810) (pI810->Chipset == PCI_CHIP_I815)
-#define IS_I830(pI810) (pI810->Chipset == PCI_CHIP_I830_M)
-#define IS_845G(pI810) (pI810->Chipset == PCI_CHIP_845_G)
-#define IS_I85X(pI810) (pI810->Chipset == PCI_CHIP_I855_GM)
-#define IS_I852(pI810) (pI810->Chipset == PCI_CHIP_I855_GM && (pI810->variant == I852_GM || pI810->variant == I852_GME))
-#define IS_I855(pI810) (pI810->Chipset == PCI_CHIP_I855_GM && (pI810->variant == I855_GM || pI810->variant == I855_GME))
-#define IS_I865G(pI810) (pI810->Chipset == PCI_CHIP_I865_G)
-
-#define IS_I915G(pI810) (pI810->Chipset == PCI_CHIP_I915_G || pI810->Chipset == PCI_CHIP_E7221_G)
-#define IS_I915GM(pI810) (pI810->Chipset == PCI_CHIP_I915_GM)
-#define IS_I945G(pI810) (pI810->Chipset == PCI_CHIP_I945_G)
-#define IS_I945GM(pI810) (pI810->Chipset == PCI_CHIP_I945_GM)
-#define IS_I9XX(pI810) (IS_I915G(pI810) || IS_I915GM(pI810) || IS_I945G(pI810) || IS_I945GM(pI810))
-
-#define IS_MOBILE(pI810) (IS_I830(pI810) || IS_I85X(pI810) || IS_I915GM(pI810) || IS_I945GM(pI810))
-
-#define I830_GMCH_CTRL 0x52
-
-#define I830_GMCH_MEM_MASK 0x1
-#define I830_GMCH_MEM_64M 0x1
-#define I830_GMCH_MEM_128M 0
-
-#define I830_GMCH_GMS_MASK 0x70
-#define I830_GMCH_GMS_DISABLED 0x00
-#define I830_GMCH_GMS_LOCAL 0x10
-#define I830_GMCH_GMS_STOLEN_512 0x20
-#define I830_GMCH_GMS_STOLEN_1024 0x30
-#define I830_GMCH_GMS_STOLEN_8192 0x40
-
-#define I855_GMCH_GMS_MASK (0x7 << 4)
-#define I855_GMCH_GMS_DISABLED 0x00
-#define I855_GMCH_GMS_STOLEN_1M (0x1 << 4)
-#define I855_GMCH_GMS_STOLEN_4M (0x2 << 4)
-#define I855_GMCH_GMS_STOLEN_8M (0x3 << 4)
-#define I855_GMCH_GMS_STOLEN_16M (0x4 << 4)
-#define I855_GMCH_GMS_STOLEN_32M (0x5 << 4)
-#define I915G_GMCH_GMS_STOLEN_48M (0x6 << 4)
-#define I915G_GMCH_GMS_STOLEN_64M (0x7 << 4)
-
-typedef unsigned char Bool;
-#define TRUE 1
-#define FALSE 0
-
-#define PIPE_NONE 0<<0
-#define PIPE_CRT 1<<0
-#define PIPE_TV 1<<1
-#define PIPE_DFP 1<<2
-#define PIPE_LFP 1<<3
-#define PIPE_CRT2 1<<4
-#define PIPE_TV2 1<<5
-#define PIPE_DFP2 1<<6
-#define PIPE_LFP2 1<<7
-
-typedef struct _I830MemPool *I830MemPoolPtr;
-typedef struct _I830MemRange *I830MemRangePtr;
-typedef struct _I830MemRange {
- long Start;
- long End;
- long Size;
- unsigned long Physical;
- unsigned long Offset; /* Offset of AGP-allocated portion */
- unsigned long Alignment;
- drm_handle_t Key;
- unsigned long Pitch; // add pitch
- I830MemPoolPtr Pool;
-} I830MemRange;
-
-typedef struct _I830MemPool {
- I830MemRange Total;
- I830MemRange Free;
- I830MemRange Fixed;
- I830MemRange Allocated;
-} I830MemPool;
-
-typedef struct {
- int tail_mask;
- I830MemRange mem;
- unsigned char *virtual_start;
- int head;
- int tail;
- int space;
-} I830RingBuffer;
-
-typedef struct _I830Rec {
- unsigned char *MMIOBase;
- unsigned char *FbBase;
- int cpp;
- uint32_t aper_size;
- unsigned int bios_version;
-
- /* These are set in PreInit and never changed. */
- long FbMapSize;
- long TotalVideoRam;
- I830MemRange StolenMemory; /* pre-allocated memory */
- long BIOSMemorySize; /* min stolen pool size */
- int BIOSMemSizeLoc;
-
- /* These change according to what has been allocated. */
- long FreeMemory;
- I830MemRange MemoryAperture;
- I830MemPool StolenPool;
- long allocatedMemory;
-
- /* Regions allocated either from the above pools, or from agpgart. */
- /* for single and dual head configurations */
- I830MemRange FrontBuffer;
- I830MemRange FrontBuffer2;
- I830MemRange Scratch;
- I830MemRange Scratch2;
-
- I830RingBuffer *LpRing;
-
- I830MemRange BackBuffer;
- I830MemRange DepthBuffer;
- I830MemRange TexMem;
- int TexGranularity;
- I830MemRange ContextMem;
- int drmMinor;
- Bool have3DWindows;
-
- Bool NeedRingBufferLow;
- Bool allowPageFlip;
- Bool disableTiling;
-
- int Chipset;
- unsigned long LinearAddr;
- unsigned long MMIOAddr;
-
- drmSize registerSize; /**< \brief MMIO register map size */
- drm_handle_t registerHandle; /**< \brief MMIO register map handle */
- // IOADDRESS ioBase;
- int irq; /**< \brief IRQ number */
- int GttBound;
-
- drm_handle_t ring_map;
- unsigned int Fence[8];
-
-} I830Rec;
-
-/*
- * 12288 is set as the maximum, chosen because it is enough for
- * 1920x1440@32bpp with a 2048 pixel line pitch with some to spare.
- */
-#define I830_MAXIMUM_VBIOS_MEM 12288
-#define I830_DEFAULT_VIDEOMEM_2D (MB(32) / 1024)
-#define I830_DEFAULT_VIDEOMEM_3D (MB(64) / 1024)
-
-/* Flags for memory allocation function */
-#define FROM_ANYWHERE 0x00000000
-#define FROM_POOL_ONLY 0x00000001
-#define FROM_NEW_ONLY 0x00000002
-#define FROM_MASK 0x0000000f
-
-#define ALLOCATE_AT_TOP 0x00000010
-#define ALLOCATE_AT_BOTTOM 0x00000020
-#define FORCE_GAPS 0x00000040
-
-#define NEED_PHYSICAL_ADDR 0x00000100
-#define ALIGN_BOTH_ENDS 0x00000200
-#define FORCE_LOW 0x00000400
-
-#define ALLOC_NO_TILING 0x00001000
-#define ALLOC_INITIAL 0x00002000
-
-#define ALLOCATE_DRY_RUN 0x80000000
-
-/* Chipset registers for VIDEO BIOS memory RW access */
-#define _855_DRAM_RW_CONTROL 0x58
-#define _845_DRAM_RW_CONTROL 0x90
-#define DRAM_WRITE 0x33330000
-
-#define KB(x) ((x) * 1024)
-#define MB(x) ((x) * KB(1024))
-
-#define GTT_PAGE_SIZE KB(4)
-#define ROUND_TO(x, y) (((x) + (y) - 1) / (y) * (y))
-#define ROUND_DOWN_TO(x, y) ((x) / (y) * (y))
-#define ROUND_TO_PAGE(x) ROUND_TO((x), GTT_PAGE_SIZE)
-#define ROUND_TO_MB(x) ROUND_TO((x), MB(1))
-#define PRIMARY_RINGBUFFER_SIZE KB(128)
-
-
-/* Ring buffer registers, p277, overview p19
- */
-#define LP_RING 0x2030
-#define HP_RING 0x2040
-
-#define RING_TAIL 0x00
-#define TAIL_ADDR 0x000FFFF8
-#define I830_TAIL_MASK 0x001FFFF8
-
-#define RING_HEAD 0x04
-#define HEAD_WRAP_COUNT 0xFFE00000
-#define HEAD_WRAP_ONE 0x00200000
-#define HEAD_ADDR 0x001FFFFC
-#define I830_HEAD_MASK 0x001FFFFC
-
-#define RING_START 0x08
-#define START_ADDR 0x03FFFFF8
-#define I830_RING_START_MASK 0xFFFFF000
-
-#define RING_LEN 0x0C
-#define RING_NR_PAGES 0x001FF000
-#define I830_RING_NR_PAGES 0x001FF000
-#define RING_REPORT_MASK 0x00000006
-#define RING_REPORT_64K 0x00000002
-#define RING_REPORT_128K 0x00000004
-#define RING_NO_REPORT 0x00000000
-#define RING_VALID_MASK 0x00000001
-#define RING_VALID 0x00000001
-#define RING_INVALID 0x00000000
-
-
-/* Fence/Tiling ranges [0..7]
- */
-#define FENCE 0x2000
-#define FENCE_NR 8
-
-#define I915G_FENCE_START_MASK 0x0ff00000
-
-#define I830_FENCE_START_MASK 0x07f80000
-
-#define FENCE_START_MASK 0x03F80000
-#define FENCE_X_MAJOR 0x00000000
-#define FENCE_Y_MAJOR 0x00001000
-#define FENCE_SIZE_MASK 0x00000700
-#define FENCE_SIZE_512K 0x00000000
-#define FENCE_SIZE_1M 0x00000100
-#define FENCE_SIZE_2M 0x00000200
-#define FENCE_SIZE_4M 0x00000300
-#define FENCE_SIZE_8M 0x00000400
-#define FENCE_SIZE_16M 0x00000500
-#define FENCE_SIZE_32M 0x00000600
-#define FENCE_SIZE_64M 0x00000700
-#define I915G_FENCE_SIZE_1M 0x00000000
-#define I915G_FENCE_SIZE_2M 0x00000100
-#define I915G_FENCE_SIZE_4M 0x00000200
-#define I915G_FENCE_SIZE_8M 0x00000300
-#define I915G_FENCE_SIZE_16M 0x00000400
-#define I915G_FENCE_SIZE_32M 0x00000500
-#define I915G_FENCE_SIZE_64M 0x00000600
-#define I915G_FENCE_SIZE_128M 0x00000700
-#define FENCE_PITCH_1 0x00000000
-#define FENCE_PITCH_2 0x00000010
-#define FENCE_PITCH_4 0x00000020
-#define FENCE_PITCH_8 0x00000030
-#define FENCE_PITCH_16 0x00000040
-#define FENCE_PITCH_32 0x00000050
-#define FENCE_PITCH_64 0x00000060
-#define FENCE_VALID 0x00000001
-
-#include <mmio.h>
-
-# define MMIO_IN8(base, offset) \
- *(volatile unsigned char *)(((unsigned char*)(base)) + (offset))
-# define MMIO_IN32(base, offset) \
- read_MMIO_LE32(base, offset)
-# define MMIO_OUT8(base, offset, val) \
- *(volatile unsigned char *)(((unsigned char*)(base)) + (offset)) = (val)
-# define MMIO_OUT32(base, offset, val) \
- *(volatile unsigned int *)(void *)(((unsigned char*)(base)) + (offset)) = CPU_TO_LE32(val)
-
-
- /* Memory mapped register access macros */
-#define INREG8(addr) MMIO_IN8(MMIO, addr)
-#define INREG(addr) MMIO_IN32(MMIO, addr)
-#define OUTREG8(addr, val) MMIO_OUT8(MMIO, addr, val)
-#define OUTREG(addr, val) MMIO_OUT32(MMIO, addr, val)
-
-#define DSPABASE 0x70184
-
-#endif
diff --git a/src/mesa/drivers/dri/mach64/mach64_ioctl.h b/src/mesa/drivers/dri/mach64/mach64_ioctl.h
index 1ffda1932f..9145ee6e6c 100644
--- a/src/mesa/drivers/dri/mach64/mach64_ioctl.h
+++ b/src/mesa/drivers/dri/mach64/mach64_ioctl.h
@@ -32,6 +32,9 @@
#ifndef __MACH64_IOCTL_H__
#define __MACH64_IOCTL_H__
+#include <stdio.h>
+#include <stdlib.h>
+
#include "mach64_dri.h"
#include "mach64_reg.h"
#include "mach64_lock.h"
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_fbo.c b/src/mesa/drivers/dri/nouveau/nouveau_fbo.c
index 8be7edb150..bd1273beea 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_fbo.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_fbo.c
@@ -220,7 +220,7 @@ get_tex_format(struct gl_texture_image *ti)
case MESA_FORMAT_RGB565:
return GL_RGB5;
default:
- assert(0);
+ return GL_NONE;
}
}
@@ -231,7 +231,6 @@ nouveau_render_texture(GLcontext *ctx, struct gl_framebuffer *fb,
struct gl_renderbuffer *rb = att->Renderbuffer;
struct gl_texture_image *ti =
att->Texture->Image[att->CubeMapFace][att->TextureLevel];
- int ret;
/* Allocate a renderbuffer object for the texture if we
* haven't already done so. */
@@ -244,9 +243,7 @@ nouveau_render_texture(GLcontext *ctx, struct gl_framebuffer *fb,
}
/* Update the renderbuffer fields from the texture. */
- ret = set_renderbuffer_format(rb, get_tex_format(ti));
- assert(ret);
-
+ set_renderbuffer_format(rb, get_tex_format(ti));
rb->Width = ti->Width;
rb->Height = ti->Height;
nouveau_surface_ref(&to_nouveau_teximage(ti)->surface,
diff --git a/src/mesa/drivers/dri/nouveau/nouveau_texture.c b/src/mesa/drivers/dri/nouveau/nouveau_texture.c
index dbf9a5cc61..442f4e899e 100644
--- a/src/mesa/drivers/dri/nouveau/nouveau_texture.c
+++ b/src/mesa/drivers/dri/nouveau/nouveau_texture.c
@@ -38,6 +38,7 @@
#include "main/mipmap.h"
#include "main/texfetch.h"
#include "main/teximage.h"
+#include "drivers/common/meta.h"
static struct gl_texture_object *
nouveau_texture_new(GLcontext *ctx, GLuint name, GLenum target)
@@ -182,10 +183,10 @@ teximage_fits(struct gl_texture_object *t, int level)
struct nouveau_surface *s = &to_nouveau_texture(t)->surfaces[level];
struct gl_texture_image *ti = t->Image[0][level];
- return ti && (t->Target == GL_TEXTURE_RECTANGLE ||
- (s->bo && s->width == ti->Width &&
- s->height == ti->Height &&
- s->format == ti->TexFormat));
+ return ti && to_nouveau_teximage(ti)->surface.bo &&
+ (t->Target == GL_TEXTURE_RECTANGLE ||
+ (s->bo && s->format == ti->TexFormat &&
+ s->width == ti->Width && s->height == ti->Height));
}
static GLboolean
@@ -589,6 +590,53 @@ nouveau_texture_unmap(GLcontext *ctx, struct gl_texture_object *t)
}
}
+static void
+store_mipmap(GLcontext *ctx, GLenum target, int first, int last,
+ struct gl_texture_object *t)
+{
+ struct gl_pixelstore_attrib packing = {
+ .BufferObj = ctx->Shared->NullBufferObj,
+ .Alignment = 1
+ };
+ GLenum format = t->Image[0][first]->TexFormat;
+ unsigned base_format, type, comps;
+ int i;
+
+ base_format = _mesa_get_format_base_format(format);
+ _mesa_format_to_type_and_comps(format, &type, &comps);
+
+ for (i = first; i <= last; i++) {
+ struct gl_texture_image *ti = t->Image[0][i];
+ void *data = ti->Data;
+
+ nouveau_teximage(ctx, 3, target, i, ti->InternalFormat,
+ ti->Width, ti->Height, ti->Depth,
+ ti->Border, base_format, type, data,
+ &packing, t, ti);
+
+ _mesa_free_texmemory(data);
+ }
+}
+
+static void
+nouveau_generate_mipmap(GLcontext *ctx, GLenum target,
+ struct gl_texture_object *t)
+{
+ if (_mesa_meta_check_generate_mipmap_fallback(ctx, target, t)) {
+ struct gl_texture_image *base = t->Image[0][t->BaseLevel];
+
+ nouveau_teximage_map(ctx, base);
+ _mesa_generate_mipmap(ctx, target, t);
+ nouveau_teximage_unmap(ctx, base);
+
+ store_mipmap(ctx, target, t->BaseLevel + 1,
+ get_last_level(t), t);
+
+ } else {
+ _mesa_meta_GenerateMipmap(ctx, target, t);
+ }
+}
+
void
nouveau_texture_functions_init(struct dd_function_table *functions)
{
@@ -607,4 +655,5 @@ nouveau_texture_functions_init(struct dd_function_table *functions)
functions->BindTexture = nouveau_bind_texture;
functions->MapTexture = nouveau_texture_map;
functions->UnmapTexture = nouveau_texture_unmap;
+ functions->GenerateMipmap = nouveau_generate_mipmap;
}
diff --git a/src/mesa/drivers/dri/nouveau/nv20_state_fb.c b/src/mesa/drivers/dri/nouveau/nv20_state_fb.c
index 21da4f7af1..95691cad04 100644
--- a/src/mesa/drivers/dri/nouveau/nv20_state_fb.c
+++ b/src/mesa/drivers/dri/nouveau/nv20_state_fb.c
@@ -72,7 +72,7 @@ nv20_emit_framebuffer(GLcontext *ctx, int emit)
fb->_ColorDrawBuffers[0])->surface;
rt_format |= get_rt_format(s->format);
- zeta_pitch = rt_pitch = s->pitch;
+ rt_pitch = s->pitch;
nouveau_bo_markl(bctx, kelvin, NV20TCL_COLOR_OFFSET,
s->bo, 0, bo_flags);
@@ -88,6 +88,9 @@ nv20_emit_framebuffer(GLcontext *ctx, int emit)
nouveau_bo_markl(bctx, kelvin, NV20TCL_ZETA_OFFSET,
s->bo, 0, bo_flags);
+ } else {
+ rt_format |= get_rt_format(MESA_FORMAT_Z24_S8);
+ zeta_pitch = rt_pitch;
}
BEGIN_RING(chan, kelvin, NV20TCL_RT_FORMAT, 2);
diff --git a/src/mesa/drivers/dri/nouveau/nv20_state_tex.c b/src/mesa/drivers/dri/nouveau/nv20_state_tex.c
index e46118e4fc..2d45513bb4 100644
--- a/src/mesa/drivers/dri/nouveau/nv20_state_tex.c
+++ b/src/mesa/drivers/dri/nouveau/nv20_state_tex.c
@@ -194,7 +194,8 @@ nv20_emit_tex_obj(GLcontext *ctx, int emit)
| nvgl_wrap_mode(t->WrapS) << 0;
tx_filter = nvgl_filter_mode(t->MagFilter) << 24
- | nvgl_filter_mode(t->MinFilter) << 16;
+ | nvgl_filter_mode(t->MinFilter) << 16
+ | 2 << 12;
tx_enable = NV20TCL_TX_ENABLE_ENABLE
| log2i(t->MaxAnisotropy) << 4;
diff --git a/src/mesa/drivers/dri/r200/r200_swtcl.c b/src/mesa/drivers/dri/r200/r200_swtcl.c
index 262fe3cdde..dbf4ad477d 100644
--- a/src/mesa/drivers/dri/r200/r200_swtcl.c
+++ b/src/mesa/drivers/dri/r200/r200_swtcl.c
@@ -612,6 +612,8 @@ static void r200RasterPrimitive( GLcontext *ctx, GLuint hwprim )
{
r200ContextPtr rmesa = R200_CONTEXT(ctx);
+ radeon_prepare_render(&rmesa->radeon);
+
if (rmesa->radeon.swtcl.hw_primitive != hwprim) {
/* need to disable perspective-correct texturing for point sprites */
if ((hwprim & 0xf) == R200_VF_PRIM_POINT_SPRITES && ctx->Point.PointSprite) {
diff --git a/src/mesa/drivers/dri/r200/r200_tcl.c b/src/mesa/drivers/dri/r200/r200_tcl.c
index d43e14581e..4ae0f30491 100644
--- a/src/mesa/drivers/dri/r200/r200_tcl.c
+++ b/src/mesa/drivers/dri/r200/r200_tcl.c
@@ -264,6 +264,8 @@ void r200TclPrimitive( GLcontext *ctx,
r200ContextPtr rmesa = R200_CONTEXT(ctx);
GLuint newprim = hw_prim | R200_VF_TCL_OUTPUT_VTX_ENABLE;
+ radeon_prepare_render(&rmesa->radeon);
+
if (newprim != rmesa->tcl.hw_primitive ||
!discrete_prim[hw_prim&0xf]) {
/* need to disable perspective-correct texturing for point sprites */
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
index a326ee4c4f..c6246a81a2 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c
@@ -109,13 +109,13 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c)
debug_program_log(c, "before compilation");
if (c->Base.is_r500){
- r500_transform_unroll_loops(&c->Base, &loop_state);
- debug_program_log(c, "after r500 transform loops");
+ rc_unroll_loops(&c->Base, R500_PFS_MAX_INST);
+ debug_program_log(c, "after unroll loops");
}
else{
- rc_transform_unroll_loops(&c->Base, &loop_state);
+ rc_transform_loops(&c->Base, &loop_state);
debug_program_log(c, "after transform loops");
-
+
rc_emulate_branches(&c->Base);
debug_program_log(c, "after emulate branches");
}
diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
index d347b4df9c..e940fedec2 100644
--- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c
@@ -633,7 +633,7 @@ static struct rc_swizzle_caps r300_vertprog_swizzle_caps = {
void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
{
struct emulate_loop_state loop_state;
-
+
compiler->Base.SwizzleCaps = &r300_vertprog_swizzle_caps;
addArtificialOutputs(compiler);
@@ -643,13 +643,12 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler)
/* XXX Ideally this should be done only for r3xx, but since
* we don't have branching support for r5xx, we use the emulation
* on all chipsets. */
- rc_transform_unroll_loops(&compiler->Base, &loop_state);
-
- debug_program_log(compiler, "after transform loops");
-
+
if (compiler->Base.is_r500){
+ rc_transform_loops(&compiler->Base, &loop_state);
rc_emulate_loops(&loop_state, R500_VS_MAX_ALU);
} else {
+ rc_transform_loops(&compiler->Base, &loop_state);
rc_emulate_loops(&loop_state, R300_VS_MAX_ALU);
}
debug_program_log(compiler, "after emulate loops");
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
index e6b5522c5b..80a120497e 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c
@@ -30,7 +30,6 @@
#include <stdio.h>
#include "../r300_reg.h"
-#include "radeon_emulate_loops.h"
/**
* Rewrite IF instructions to use the ALU result special register.
@@ -60,31 +59,6 @@ int r500_transform_IF(
return 1;
}
-/**
- * Rewrite loops to make them easier to emit. This is not a local
- * transformation, because it modifies and reorders an entire block of code.
- */
-void r500_transform_unroll_loops(struct radeon_compiler * c,
- struct emulate_loop_state *s)
-{
- int i;
-
- rc_transform_unroll_loops(c, s);
-
- for( i = s->LoopCount - 1; i >= 0; i-- ){
- struct rc_instruction * inst_continue;
- if(!s->Loops[i].EndLoop){
- continue;
- }
- /* Insert a continue instruction at the end of the loop. This
- * is required in order to emit loops correctly. */
- inst_continue = rc_insert_new_instruction(c,
- s->Loops[i].EndIf->Prev);
- inst_continue->U.I.Opcode = RC_OPCODE_CONTINUE;
- }
-
-}
-
static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg)
{
unsigned int relevant;
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
index 0d005a794f..34173351f8 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h
@@ -49,6 +49,4 @@ extern int r500_transform_IF(
struct rc_instruction * inst,
void* data);
-void r500_transform_unroll_loops(struct radeon_compiler * c,
- struct emulate_loop_state * s);
#endif
diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
index 0bd8f0a239..c3f817ad4e 100644
--- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
+++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c
@@ -64,7 +64,12 @@ struct branch_info {
};
struct loop_info {
- int LoopStart;
+ int BgnLoop;
+
+ int BranchDepth;
+ int * Brks;
+ int BrkCount;
+ int BrkReserved;
};
struct emit_state {
@@ -368,6 +373,12 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst
unsigned int newip = ++s->Code->inst_end;
+ /* Currently all loops use the same integer constant to intialize
+ * the loop variables. */
+ if(!s->Code->int_constants[0]) {
+ s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff);
+ s->Code->int_constant_count = 1;
+ }
s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT;
switch(inst->U.I.Opcode){
@@ -378,32 +389,69 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst
s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1);
loop = &s->Loops[s->CurrentLoopDepth++];
-
- /* We don't emit an instruction for BGNLOOP, so we need to
- * decrement the instruction counter, but first we need to
- * set LoopStart to the current value of inst_end, which
- * will end up being the first real instruction in the loop.*/
- loop->LoopStart = s->Code->inst_end--;
+ memset(loop, 0, sizeof(struct loop_info));
+ loop->BranchDepth = s->CurrentBranchDepth;
+ loop->BgnLoop = newip;
+
+ s->Code->inst[newip].inst2 = R500_FC_OP_LOOP
+ | R500_FC_JUMP_FUNC(0x00)
+ | R500_FC_IGNORE_UNCOVERED
+ ;
break;
-
case RC_OPCODE_BRK:
- /* Don't emit an instruction for BRK */
- s->Code->inst_end--;
+ loop = &s->Loops[s->CurrentLoopDepth - 1];
+ memory_pool_array_reserve(&s->C->Pool, int, loop->Brks,
+ loop->BrkCount, loop->BrkReserved, 1);
+
+ loop->Brks[loop->BrkCount++] = newip;
+ s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP
+ | R500_FC_JUMP_FUNC(0xff)
+ | R500_FC_B_OP1_DECR
+ | R500_FC_B_POP_CNT(
+ s->CurrentBranchDepth - loop->BranchDepth)
+ | R500_FC_IGNORE_UNCOVERED
+ ;
break;
case RC_OPCODE_CONTINUE:
loop = &s->Loops[s->CurrentLoopDepth - 1];
- s->Code->inst[newip].inst2 = R500_FC_OP_JUMP |
- R500_FC_JUMP_FUNC(0xff);
- s->Code->inst[newip].inst3 = R500_FC_JUMP_ADDR(loop->LoopStart);
+ s->Code->inst[newip].inst2 = R500_FC_OP_JUMP
+ | R500_FC_JUMP_FUNC(0xff)
+ | R500_FC_B_OP1_DECR
+ | R500_FC_B_POP_CNT(
+ s->CurrentBranchDepth - loop->BranchDepth)
+ ;
+ s->Code->inst[newip].inst3 = R500_FC_JUMP_ADDR(loop->BgnLoop);
break;
case RC_OPCODE_ENDLOOP:
- /* Don't emit an instruction for ENDLOOP */
- s->Code->inst_end--;
+ {
+ unsigned int i;
+ loop = &s->Loops[s->CurrentLoopDepth - 1];
+ /* Emit ENDLOOP */
+ s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP
+ | R500_FC_JUMP_FUNC(0xff)
+ | R500_FC_JUMP_ANY
+ | R500_FC_IGNORE_UNCOVERED
+ ;
+ /* The constant integer at index 0 is used by all loops. */
+ s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0)
+ | R500_FC_JUMP_ADDR(loop->BgnLoop + 1)
+ ;
+
+ /* Set jump address and int constant for BGNLOOP */
+ s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0)
+ | R500_FC_JUMP_ADDR(newip)
+ ;
+
+ /* Set jump address for the BRK instructions. */
+ while(loop->BrkCount--) {
+ s->Code->inst[loop->Brks[loop->BrkCount]].inst3 =
+ R500_FC_JUMP_ADDR(newip + 1);
+ }
s->CurrentLoopDepth--;
break;
-
+ }
case RC_OPCODE_IF:
if ( s->CurrentBranchDepth >= MAX_BRANCH_DEPTH_FULL) {
rc_error(s->C, "Branch depth exceeds hardware limit");
@@ -442,24 +490,16 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst
}
branch = &s->Branches[s->CurrentBranchDepth - 1];
-
- if(inst->Prev->U.I.Opcode == RC_OPCODE_BRK){
- branch->Endif = --s->Code->inst_end;
- s->Code->inst[branch->Endif].inst2 |=
- R500_FC_B_OP0_DECR;
- }
- else{
- branch->Endif = newip;
-
- s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
- | R500_FC_A_OP_NONE /* no address stack */
- | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
- | R500_FC_B_OP0_DECR /* decrement branch counter if stay */
- | R500_FC_B_OP1_NONE /* no branch counter if stay */
- | R500_FC_B_POP_CNT(1)
+ branch->Endif = newip;
+
+ s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
+ | R500_FC_A_OP_NONE /* no address stack */
+ | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
+ | R500_FC_B_OP0_DECR /* decrement branch counter if stay */
+ | R500_FC_B_OP1_NONE /* no branch counter if stay */
+ | R500_FC_B_POP_CNT(1)
;
- s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
- }
+ s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP
| R500_FC_A_OP_NONE /* no address stack */
| R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
@@ -544,11 +584,9 @@ void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi
code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
}
- /* Use FULL flow control mode if branches are nested deep enough.
- * We don not need to enable FULL flow control mode for loops, becasue
- * we aren't using the hardware loop instructions.
- */
- if (s.MaxBranchDepth >= 4) {
+ /* Enable full flow control mode if we are using loops or have if
+ * statements nested at least four deep. */
+ if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) {
if (code->max_temp_idx < 1)
code->max_temp_idx = 1;
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
index d03689763b..e14a3520dd 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h
@@ -221,6 +221,9 @@ struct r500_fragment_program_code {
int max_temp_idx;
uint32_t us_fc_ctrl;
+
+ uint32_t int_constants[32];
+ uint32_t int_constant_count;
};
struct rX00_fragment_program_code {
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
index fbb4235c22..31566a937f 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c
@@ -43,6 +43,12 @@ struct instruction_state {
unsigned char SrcReg[3];
};
+struct loopinfo {
+ struct updatemask_state * Breaks;
+ unsigned int BreakCount;
+ unsigned int BreaksReserved;
+};
+
struct branchinfo {
unsigned int HaveElse:1;
@@ -59,6 +65,10 @@ struct deadcode_state {
struct branchinfo * BranchStack;
unsigned int BranchStackSize;
unsigned int BranchStackReserved;
+
+ struct loopinfo * LoopStack;
+ unsigned int LoopStackSize;
+ unsigned int LoopStackReserved;
};
@@ -78,6 +88,22 @@ static void or_updatemasks(
dst->Address = a->Address | b->Address;
}
+static void push_break(struct deadcode_state *s)
+{
+ struct loopinfo * loop = &s->LoopStack[s->LoopStackSize - 1];
+ memory_pool_array_reserve(&s->C->Pool, struct updatemask_state,
+ loop->Breaks, loop->BreakCount, loop->BreaksReserved, 1);
+
+ memcpy(&loop->Breaks[loop->BreakCount++], &s->R, sizeof(s->R));
+}
+
+static void push_loop(struct deadcode_state * s)
+{
+ memory_pool_array_reserve(&s->C->Pool, struct loopinfo, s->LoopStack,
+ s->LoopStackSize, s->LoopStackReserved, 1);
+ memset(&s->LoopStack[s->LoopStackSize++], 0, sizeof(struct loopinfo));
+}
+
static void push_branch(struct deadcode_state * s)
{
memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack,
@@ -233,11 +259,22 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f
}
}
}
+ push_loop(&s);
break;
}
- case RC_OPCODE_CONTINUE:
case RC_OPCODE_BRK:
+ push_break(&s);
+ break;
case RC_OPCODE_BGNLOOP:
+ {
+ unsigned int i;
+ struct loopinfo * loop = &s.LoopStack[s.LoopStackSize-1];
+ for(i = 0; i < loop->BreakCount; i++) {
+ or_updatemasks(&s.R, &s.R, &loop->Breaks[i]);
+ }
+ break;
+ }
+ case RC_OPCODE_CONTINUE:
break;
case RC_OPCODE_ENDIF:
push_branch(&s);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
index 131e9e7436..24c3ae57b6 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c
@@ -39,7 +39,6 @@
#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
struct const_value {
-
struct radeon_compiler * C;
struct rc_src_register * Src;
float Value;
@@ -78,17 +77,17 @@ static int src_reg_is_immediate(struct rc_src_register * src,
c->Program.Constants.Constants[src->Index].Type==RC_CONSTANT_IMMEDIATE;
}
-static unsigned int loop_calc_iterations(struct emulate_loop_state *s,
- struct loop_info * loop, unsigned int max_instructions)
+static unsigned int loop_max_possible_iterations(struct radeon_compiler *c,
+ struct loop_info * loop, unsigned int prog_inst_limit)
{
- unsigned int total_i = rc_recompute_ips(s->C);
+ unsigned int total_i = rc_recompute_ips(c);
unsigned int loop_i = (loop->EndLoop->IP - loop->BeginLoop->IP) - 1;
/* +1 because the program already has one iteration of the loop. */
- return 1 + ((max_instructions - total_i) / (s->LoopCount * loop_i));
+ return 1 + ((prog_inst_limit - total_i) / loop_i);
}
-static void loop_unroll(struct emulate_loop_state * s,
- struct loop_info *loop, unsigned int iterations)
+static void unroll_loop(struct radeon_compiler * c, struct loop_info * loop,
+ unsigned int iterations)
{
unsigned int i;
struct rc_instruction * ptr;
@@ -99,7 +98,7 @@ static void loop_unroll(struct emulate_loop_state * s,
rc_remove_instruction(loop->EndLoop);
for( i = 1; i < iterations; i++){
for(ptr = first; ptr != last->Next; ptr = ptr->Next){
- struct rc_instruction *new = rc_alloc_instruction(s->C);
+ struct rc_instruction *new = rc_alloc_instruction(c);
memcpy(new, ptr, sizeof(struct rc_instruction));
rc_insert_instruction(append_to, new);
append_to = new;
@@ -115,7 +114,7 @@ static void update_const_value(void * data, struct rc_instruction * inst,
if(value->Src->File != file ||
value->Src->Index != index ||
!(1 << GET_SWZ(value->Src->Swizzle, 0) & mask)){
- return;
+ return;
}
switch(inst->U.I.Opcode){
case RC_OPCODE_MOV:
@@ -140,7 +139,7 @@ static void get_incr_amount(void * data, struct rc_instruction * inst,
if(file != RC_FILE_TEMPORARY ||
count_inst->Index != index ||
(1 << GET_SWZ(count_inst->Swz,0) != mask)){
- return;
+ return;
}
/* Find the index of the counter register. */
opcode = rc_get_opcode_info(inst->U.I.Opcode);
@@ -185,13 +184,16 @@ static void get_incr_amount(void * data, struct rc_instruction * inst,
count_inst->Unknown = 1;
return;
}
-
}
-static int transform_const_loop(struct emulate_loop_state * s,
- struct loop_info * loop)
+/**
+ * If prog_inst_limit is -1, then all eligible loops will be unrolled regardless
+ * of how many iterations they have.
+ */
+static int try_unroll_loop(struct radeon_compiler * c, struct loop_info * loop,
+ unsigned int prog_inst_limit)
{
- int end_loops = 1;
+ int end_loops;
int iterations;
struct count_inst count_inst;
float limit_value;
@@ -201,12 +203,12 @@ static int transform_const_loop(struct emulate_loop_state * s,
struct rc_instruction * inst;
/* Find the counter and the upper limit */
-
- if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[0], s->C)){
+
+ if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[0], c)){
limit = &loop->Cond->U.I.SrcReg[0];
counter = &loop->Cond->U.I.SrcReg[1];
}
- else if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[1], s->C)){
+ else if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[1], c)){
limit = &loop->Cond->U.I.SrcReg[1];
counter = &loop->Cond->U.I.SrcReg[0];
}
@@ -214,13 +216,13 @@ static int transform_const_loop(struct emulate_loop_state * s,
DBG("No constant limit.\n");
return 0;
}
-
+
/* Find the initial value of the counter */
counter_value.Src = counter;
counter_value.Value = 0.0f;
counter_value.HasValue = 0;
- counter_value.C = s->C;
- for(inst = s->C->Program.Instructions.Next; inst != loop->BeginLoop;
+ counter_value.C = c;
+ for(inst = c->Program.Instructions.Next; inst != loop->BeginLoop;
inst = inst->Next){
rc_for_all_writes_mask(inst, update_const_value, &counter_value);
}
@@ -230,11 +232,12 @@ static int transform_const_loop(struct emulate_loop_state * s,
}
DBG("Initial counter value is %f\n", counter_value.Value);
/* Determine how the counter is modified each loop */
- count_inst.C = s->C;
+ count_inst.C = c;
count_inst.Index = counter->Index;
count_inst.Swz = counter->Swizzle;
count_inst.Amount = 0.0f;
count_inst.Unknown = 0;
+ end_loops = 1;
for(inst = loop->BeginLoop->Next; end_loops > 0; inst = inst->Next){
switch(inst->U.I.Opcode){
/* XXX In the future we might want to try to unroll nested
@@ -246,6 +249,16 @@ static int transform_const_loop(struct emulate_loop_state * s,
loop->EndLoop = inst;
end_loops--;
break;
+ case RC_OPCODE_BRK:
+ /* Don't unroll loops if it has a BRK instruction
+ * other one used when testing the main conditional
+ * of the loop. */
+
+ /* Make sure we haven't entered a nested loops. */
+ if(inst != loop->Brk && end_loops == 1) {
+ return 0;
+ }
+ break;
/* XXX Check if the counter is modified within an if statement.
*/
case RC_OPCODE_IF:
@@ -266,17 +279,20 @@ static int transform_const_loop(struct emulate_loop_state * s,
/* Calculate the number of iterations of this loop. Keeping this
* simple, since we only support increment and decrement loops.
*/
- limit_value = get_constant_value(s->C, limit, 0);
+ limit_value = get_constant_value(c, limit, 0);
DBG("Limit is %f.\n", limit_value);
+ /* The iteration calculations are opposite of what you would expect.
+ * In a normal loop, if the condition is met, then loop continues, but
+ * with our loops, if the condition is met, the is exited. */
switch(loop->Cond->U.I.Opcode){
- case RC_OPCODE_SGT:
- case RC_OPCODE_SLT:
+ case RC_OPCODE_SGE:
+ case RC_OPCODE_SLE:
iterations = (int) ceilf((limit_value - counter_value.Value) /
count_inst.Amount);
break;
- case RC_OPCODE_SLE:
- case RC_OPCODE_SGE:
+ case RC_OPCODE_SGT:
+ case RC_OPCODE_SLT:
iterations = (int) floorf((limit_value - counter_value.Value) /
count_inst.Amount) + 1;
break;
@@ -284,77 +300,85 @@ static int transform_const_loop(struct emulate_loop_state * s,
return 0;
}
+ if (prog_inst_limit > 0
+ && iterations > loop_max_possible_iterations(c, loop,
+ prog_inst_limit)) {
+ return 0;
+ }
+
DBG("Loop will have %d iterations.\n", iterations);
-
+
/* Prepare loop for unrolling */
rc_remove_instruction(loop->Cond);
rc_remove_instruction(loop->If);
rc_remove_instruction(loop->Brk);
rc_remove_instruction(loop->EndIf);
-
- loop_unroll(s, loop, iterations);
+
+ unroll_loop(c, loop, iterations);
loop->EndLoop = NULL;
return 1;
}
-/**
- * This function prepares a loop to be unrolled by converting it into an if
- * statement. Here is an outline of the conversion process:
- * BGNLOOP; -> BGNLOOP;
- * <Additional conditional code> -> <Additional conditional code>
- * SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2];
- * IF temp[0]; -> IF temp[0];
- * BRK; ->
- * ENDIF; -> <Loop Body>
- * <Loop Body> -> ENDIF;
- * ENDLOOP; -> ENDLOOP
- *
+/**
+ * @param c
+ * @param loop
* @param inst A pointer to a BGNLOOP instruction.
- * @return If the loop can be unrolled, a pointer to the first instruction of
- * the unrolled loop.
- * Otherwise, A pointer to the ENDLOOP instruction.
- * Null if there is an error.
+ * @return 1 if all of the members of loop where set.
+ * @return 0 if there was an error and some members of loop are still NULL.
*/
-static struct rc_instruction * transform_loop(struct emulate_loop_state * s,
+static int build_loop_info(struct radeon_compiler * c, struct loop_info * loop,
struct rc_instruction * inst)
{
- struct loop_info *loop;
struct rc_instruction * ptr;
- memory_pool_array_reserve(&s->C->Pool, struct loop_info,
- s->Loops, s->LoopCount, s->LoopReserved, 1);
-
- loop = &s->Loops[s->LoopCount++];
- memset(loop, 0, sizeof(struct loop_info));
if(inst->U.I.Opcode != RC_OPCODE_BGNLOOP){
- rc_error(s->C, "expected BGNLOOP\n", __FUNCTION__);
- return NULL;
+ rc_error(c, "%s: expected BGNLOOP", __FUNCTION__);
+ return 0;
}
+
+ memset(loop, 0, sizeof(struct loop_info));
+
loop->BeginLoop = inst;
- for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next){
+ for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next) {
+
+ if (ptr == &c->Program.Instructions) {
+ rc_error(c, "%s: BGNLOOP without an ENDLOOOP.\n",
+ __FUNCTION__);
+ return 0;
+ }
+
switch(ptr->U.I.Opcode){
case RC_OPCODE_BGNLOOP:
- /* Nested loop */
- ptr = transform_loop(s, ptr);
- if(!ptr){
- return NULL;
+ {
+ /* Nested loop, skip ahead to the end. */
+ unsigned int loop_depth = 1;
+ for(ptr = ptr->Next; ptr != &c->Program.Instructions;
+ ptr = ptr->Next){
+ if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+ loop_depth++;
+ } else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) {
+ if (!--loop_depth) {
+ break;
+ }
+ }
+ }
+ if (ptr == &c->Program.Instructions) {
+ rc_error(c, "%s: BGNLOOP without an ENDLOOOP\n",
+ __FUNCTION__);
+ return 0;
}
break;
+ }
case RC_OPCODE_BRK:
- loop->Brk = ptr;
- if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF){
- rc_error(s->C,
- "%s: expected ENDIF\n",__FUNCTION__);
- return NULL;
- }
- loop->EndIf = ptr->Next;
- if(ptr->Prev->U.I.Opcode != RC_OPCODE_IF){
- rc_error(s->C,
- "%s: expected IF\n", __FUNCTION__);
- return NULL;
+ if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF
+ || ptr->Prev->U.I.Opcode != RC_OPCODE_IF
+ || loop->Brk){
+ continue;
}
+ loop->Brk = ptr;
loop->If = ptr->Prev;
+ loop->EndIf = ptr->Next;
switch(loop->If->Prev->U.I.Opcode){
case RC_OPCODE_SLT:
case RC_OPCODE_SGE:
@@ -364,18 +388,61 @@ static struct rc_instruction * transform_loop(struct emulate_loop_state * s,
case RC_OPCODE_SNE:
break;
default:
- rc_error(s->C, "%s expected conditional\n",
+ rc_error(c, "%s: expected conditional",
__FUNCTION__);
- return NULL;
+ return 0;
}
loop->Cond = loop->If->Prev;
- ptr = loop->EndIf;
break;
+
case RC_OPCODE_ENDLOOP:
loop->EndLoop = ptr;
break;
}
}
+
+ if (loop->BeginLoop && loop->Brk && loop->If && loop->EndIf
+ && loop->Cond && loop->EndLoop) {
+ return 1;
+ }
+ return 0;
+}
+
+/**
+ * This function prepares a loop to be unrolled by converting it into an if
+ * statement. Here is an outline of the conversion process:
+ * BGNLOOP; -> BGNLOOP;
+ * <Additional conditional code> -> <Additional conditional code>
+ * SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2];
+ * IF temp[0]; -> IF temp[0];
+ * BRK; ->
+ * ENDIF; -> <Loop Body>
+ * <Loop Body> -> ENDIF;
+ * ENDLOOP; -> ENDLOOP
+ *
+ * @param inst A pointer to a BGNLOOP instruction.
+ * @return If the loop can be unrolled, a pointer to the first instruction of
+ * the unrolled loop.
+ * Otherwise, A pointer to the ENDLOOP instruction.
+ * Null if there is an error.
+ */
+static struct rc_instruction * transform_loop(struct emulate_loop_state * s,
+ struct rc_instruction * inst)
+{
+ struct loop_info * loop;
+
+ memory_pool_array_reserve(&s->C->Pool, struct loop_info,
+ s->Loops, s->LoopCount, s->LoopReserved, 1);
+
+ loop = &s->Loops[s->LoopCount++];
+
+ if (!build_loop_info(s->C, loop, inst))
+ return NULL;
+
+ if(try_unroll_loop(s->C, loop, -1)){
+ return loop->BeginLoop->Next;
+ }
+
/* Reverse the conditional instruction */
switch(loop->Cond->U.I.Opcode){
case RC_OPCODE_SGE:
@@ -400,24 +467,19 @@ static struct rc_instruction * transform_loop(struct emulate_loop_state * s,
rc_error(s->C, "loop->Cond is not a conditional.\n");
return NULL;
}
-
- /* Check if the number of loops is known at compile time. */
- if(transform_const_loop(s, loop)){
- return loop->BeginLoop->Next;
- }
- /* Prepare the loop to be unrolled */
+ /* Prepare the loop to be emulated */
rc_remove_instruction(loop->Brk);
rc_remove_instruction(loop->EndIf);
rc_insert_instruction(loop->EndLoop->Prev, loop->EndIf);
return loop->EndLoop;
}
-void rc_transform_unroll_loops(struct radeon_compiler *c,
- struct emulate_loop_state * s)
+void rc_transform_loops(struct radeon_compiler *c,
+ struct emulate_loop_state * s)
{
struct rc_instruction * ptr;
-
+
memset(s, 0, sizeof(struct emulate_loop_state));
s->C = c;
ptr = s->C->Program.Instructions.Next;
@@ -433,8 +495,23 @@ void rc_transform_unroll_loops(struct radeon_compiler *c,
}
}
-void rc_emulate_loops(struct emulate_loop_state *s,
- unsigned int max_instructions)
+void rc_unroll_loops(struct radeon_compiler *c, int prog_inst_limit)
+{
+ struct rc_instruction * inst;
+ struct loop_info loop;
+
+ for(inst = c->Program.Instructions.Next;
+ inst != &c->Program.Instructions; inst = inst->Next) {
+
+ if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) {
+ if (build_loop_info(c, &loop, inst)) {
+ try_unroll_loop(c, &loop, prog_inst_limit);
+ }
+ }
+ }
+}
+
+void rc_emulate_loops(struct emulate_loop_state *s, int prog_inst_limit)
{
int i;
/* Iterate backwards of the list of loops so that loops that nested
@@ -444,8 +521,8 @@ void rc_emulate_loops(struct emulate_loop_state *s,
if(!s->Loops[i].EndLoop){
continue;
}
- unsigned int iterations = loop_calc_iterations(s, &s->Loops[i],
- max_instructions);
- loop_unroll(s, &s->Loops[i], iterations);
+ unsigned int iterations = loop_max_possible_iterations(
+ s->C, &s->Loops[i], prog_inst_limit);
+ unroll_loop(s->C, &s->Loops[i], iterations);
}
}
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h
index 7748813c4e..86d91ef14b 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h
@@ -23,10 +23,11 @@ struct emulate_loop_state {
unsigned int LoopReserved;
};
-void rc_transform_unroll_loops(struct radeon_compiler *c,
- struct emulate_loop_state * s);
+void rc_transform_loops(struct radeon_compiler *c,
+ struct emulate_loop_state * s);
-void rc_emulate_loops(struct emulate_loop_state *s,
- unsigned int max_instructions);
+void rc_unroll_loops(struct radeon_compiler * c, int prog_inst_limit);
+
+void rc_emulate_loops(struct emulate_loop_state * s, int prog_inst_limit);
#endif /* RADEON_EMULATE_LOOPS_H */
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
index eca0651536..7a3f35950a 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c
@@ -164,7 +164,8 @@ static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mo
inst = inst->Next) {
/* XXX In the future we might be able to make the optimizer
* smart enough to handle loops. */
- if(inst->U.I.Opcode == RC_OPCODE_BGNLOOP){
+ if(inst->U.I.Opcode == RC_OPCODE_BGNLOOP
+ || inst->U.I.Opcode == RC_OPCODE_ENDLOOP){
return;
}
rc_for_all_reads_mask(inst, peephole_scan_read, &s);
diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
index 3cc2897293..857aae5514 100644
--- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
+++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c
@@ -988,17 +988,22 @@ void radeonTransformKILP(struct radeon_compiler * c)
for (inst = c->Program.Instructions.Next;
inst != &c->Program.Instructions; inst = inst->Next) {
- if (inst->U.I.Opcode != RC_OPCODE_KILP
- || inst->Prev->U.I.Opcode != RC_OPCODE_IF
- || inst->Next->U.I.Opcode != RC_OPCODE_ENDIF) {
+ if (inst->U.I.Opcode != RC_OPCODE_KILP)
continue;
- }
+
inst->U.I.Opcode = RC_OPCODE_KIL;
- inst->U.I.SrcReg[0] = negate(absolute(inst->Prev->U.I.SrcReg[0]));
- /* Remove IF */
- rc_remove_instruction(inst->Prev);
- /* Remove ENDIF */
- rc_remove_instruction(inst->Next);
+ if (inst->Prev->U.I.Opcode != RC_OPCODE_IF
+ || inst->Next->U.I.Opcode != RC_OPCODE_ENDIF) {
+ inst->U.I.SrcReg[0] = negate(builtin_one);
+ } else {
+
+ inst->U.I.SrcReg[0] =
+ negate(absolute(inst->Prev->U.I.SrcReg[0]));
+ /* Remove IF */
+ rc_remove_instruction(inst->Prev);
+ /* Remove ENDIF */
+ rc_remove_instruction(inst->Next);
+ }
}
}
diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c
index bb8f91491f..cf89ab7ec3 100644
--- a/src/mesa/drivers/dri/r300/r300_render.c
+++ b/src/mesa/drivers/dri/r300/r300_render.c
@@ -327,6 +327,8 @@ void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim)
BATCH_LOCALS(&rmesa->radeon);
int type, num_verts;
+ radeon_prepare_render(&rmesa->radeon);
+
type = r300PrimitiveType(rmesa, prim);
num_verts = r300NumVerts(rmesa, end - start, prim);
diff --git a/src/mesa/drivers/dri/r600/r600_blit.c b/src/mesa/drivers/dri/r600/r600_blit.c
index 172f85eb26..619678214f 100644
--- a/src/mesa/drivers/dri/r600/r600_blit.c
+++ b/src/mesa/drivers/dri/r600/r600_blit.c
@@ -390,13 +390,20 @@ set_render_target(context_t *context, struct radeon_bo *bo, gl_format mesa_forma
0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0);
END_BATCH();
- BEGIN_BATCH_NO_AUTOSTATE(12);
+ BEGIN_BATCH_NO_AUTOSTATE(9);
R600_OUT_BATCH_REGVAL(CB_COLOR0_SIZE + (4 * id), cb_color0_size);
R600_OUT_BATCH_REGVAL(CB_COLOR0_VIEW + (4 * id), cb_color0_view);
- R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), cb_color0_info);
R600_OUT_BATCH_REGVAL(CB_COLOR0_MASK + (4 * id), 0);
END_BATCH();
+ BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+ R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), cb_color0_info);
+ R600_OUT_BATCH_RELOC(0,
+ bo,
+ 0,
+ 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0);
+ END_BATCH();
+
COMMIT_BATCH();
}
@@ -1447,7 +1454,7 @@ set_default_state(context_t *context)
SETbit(sq_dyn_gpr_cntl_ps_flush_req, VS_PC_LIMIT_ENABLE_bit);
}
- BEGIN_BATCH_NO_AUTOSTATE(117);
+ BEGIN_BATCH_NO_AUTOSTATE(114);
R600_OUT_BATCH_REGSEQ(SQ_CONFIG, 6);
R600_OUT_BATCH(sq_config);
R600_OUT_BATCH(sq_gpr_resource_mgmt_1);
@@ -1477,7 +1484,6 @@ set_default_state(context_t *context)
(CLRCMP_SEL_SRC << CLRCMP_FCN_SEL_shift));
R600_OUT_BATCH_REGVAL(SQ_VTX_BASE_VTX_LOC, 0);
R600_OUT_BATCH_REGVAL(SQ_VTX_START_INST_LOC, 0);
- R600_OUT_BATCH_REGVAL(DB_DEPTH_INFO, 0);
R600_OUT_BATCH_REGVAL(DB_DEPTH_CONTROL, 0);
R600_OUT_BATCH_REGVAL(CB_SHADER_MASK, (OUTPUT0_ENABLE_mask));
R600_OUT_BATCH_REGVAL(CB_TARGET_MASK, (TARGET0_ENABLE_mask));
@@ -1607,7 +1613,7 @@ unsigned r600_blit(GLcontext *ctx,
/* Flush is needed to make sure that source buffer has correct data */
radeonFlush(ctx);
- rcommonEnsureCmdBufSpace(&context->radeon, 304, __FUNCTION__);
+ rcommonEnsureCmdBufSpace(&context->radeon, 305, __FUNCTION__);
/* load shaders */
load_shaders(context->radeon.glCtx);
@@ -1616,7 +1622,7 @@ unsigned r600_blit(GLcontext *ctx,
return GL_FALSE;
/* set clear state */
- /* 117 */
+ /* 114 */
set_default_state(context);
/* shaders */
@@ -1632,7 +1638,7 @@ unsigned r600_blit(GLcontext *ctx,
set_tex_sampler(context);
/* dst */
- /* 27 */
+ /* 31 */
set_render_target(context, dst_bo, dst_mesaformat,
dst_pitch, dst_width, dst_height, dst_offset);
/* scissors */
diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c
index 99a33df4fc..9c954cbf70 100644
--- a/src/mesa/drivers/dri/r600/r700_assembler.c
+++ b/src/mesa/drivers/dri/r600/r700_assembler.c
@@ -275,7 +275,10 @@ GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size)
case 2:
format = FMT_8_8; break;
case 3:
- format = FMT_8_8_8; break;
+ /* for some (small/unaligned) strides using 4 comps works
+ * better, probably same as GL_SHORT below
+ * test piglit/draw-vertices */
+ format = FMT_8_8_8_8; break;
case 4:
format = FMT_8_8_8_8; break;
default:
@@ -2872,25 +2875,92 @@ GLboolean assemble_CMP(r700_AssemblerBase *pAsm)
GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode)
{
+ /*
+ * r600 - trunc to -PI..PI range
+ * r700 - normalize by dividing by 2PI
+ * see fdo bug 27901
+ */
+
int tmp;
checkop1(pAsm);
tmp = gethelpr(pAsm);
- pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
+ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ pAsm->D.dst.op3 = 1;
+
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
pAsm->D.dst.rtype = DST_REG_TEMPORARY;
pAsm->D.dst.reg = tmp;
- pAsm->D.dst.writex = 1;
assemble_src(pAsm, 0, -1);
pAsm->S[1].src.rtype = SRC_REC_LITERAL;
setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
+
+ pAsm->S[2].src.rtype = SRC_REC_LITERAL;
+ setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
+
pAsm->D2.dst2.literal_slots = 1;
pAsm->C[0].f = 1/(3.1415926535 * 2);
- pAsm->C[1].f = 0.0F;
- next_ins(pAsm);
+ pAsm->C[1].f = 0.5f;
+
+ if ( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+ pAsm->D.dst.writex = 1;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+
+ if(( GL_FALSE == next_ins(pAsm) ))
+ {
+ return GL_FALSE;
+ }
+ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ pAsm->D.dst.op3 = 1;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+
+ pAsm->S[1].src.rtype = SRC_REC_LITERAL;
+ setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
+
+ pAsm->S[2].src.rtype = SRC_REC_LITERAL;
+ setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
+
+ pAsm->D2.dst2.literal_slots = 1;
+
+ if (pAsm->bR6xx)
+ {
+ pAsm->C[0].f = 3.1415926535897f * 2.0f;
+ pAsm->C[1].f = -3.1415926535897f;
+ }
+ else
+ {
+ pAsm->C[0].f = 1.0f;
+ pAsm->C[1].f = -0.5f;
+ }
+
+ if(( GL_FALSE == next_ins(pAsm) ))
+ {
+ return GL_FALSE;
+ }
pAsm->D.dst.opcode = opcode;
pAsm->D.dst.math = 1;
@@ -4030,22 +4100,79 @@ GLboolean assemble_SCS(r700_AssemblerBase *pAsm)
checkop1(pAsm);
tmp = gethelpr(pAsm);
- /* tmp.x = src /2*PI */
- pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
+
+ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ pAsm->D.dst.op3 = 1;
+
setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
pAsm->D.dst.rtype = DST_REG_TEMPORARY;
pAsm->D.dst.reg = tmp;
- pAsm->D.dst.writex = 1;
assemble_src(pAsm, 0, -1);
pAsm->S[1].src.rtype = SRC_REC_LITERAL;
setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
+
+ pAsm->S[2].src.rtype = SRC_REC_LITERAL;
+ setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
+
pAsm->D2.dst2.literal_slots = 1;
pAsm->C[0].f = 1/(3.1415926535 * 2);
- pAsm->C[1].f = 0.0F;
+ pAsm->C[1].f = 0.5F;
- next_ins(pAsm);
+ if ( GL_FALSE == next_ins(pAsm) )
+ {
+ return GL_FALSE;
+ }
+
+ pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+ pAsm->D.dst.writex = 1;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+
+ if(( GL_FALSE == next_ins(pAsm) ))
+ {
+ return GL_FALSE;
+ }
+ pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
+ pAsm->D.dst.op3 = 1;
+
+ setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
+ pAsm->D.dst.rtype = DST_REG_TEMPORARY;
+ pAsm->D.dst.reg = tmp;
+
+ setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
+ pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
+ pAsm->S[0].src.reg = tmp;
+ setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
+
+ pAsm->S[1].src.rtype = SRC_REC_LITERAL;
+ setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
+
+ pAsm->S[2].src.rtype = SRC_REC_LITERAL;
+ setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
+
+ pAsm->D2.dst2.literal_slots = 1;
+
+ if(pAsm->bR6xx) {
+ pAsm->C[0].f = 3.1415926535897f * 2.0f;
+ pAsm->C[1].f = -3.1415926535897f;
+ } else {
+ pAsm->C[0].f = 1.0f;
+ pAsm->C[1].f = -0.5f;
+ }
+
+ if(( GL_FALSE == next_ins(pAsm) ))
+ {
+ return GL_FALSE;
+ }
// COS dst.x, a.x
pAsm->D.dst.opcode = SQ_OP2_INST_COS;
@@ -6473,7 +6600,7 @@ GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode,
* results are undefined anyway */
if(export_count == 0)
{
- Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, 0, GL_FALSE);
+ Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, pR700AsmCode->starting_export_register_number, GL_FALSE);
}
if(pR700AsmCode->cf_last_export_ptr != NULL)
diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c
index cefda3ac4b..1e955b93b2 100644
--- a/src/mesa/drivers/dri/r600/r700_chip.c
+++ b/src/mesa/drivers/dri/r600/r700_chip.c
@@ -617,18 +617,25 @@ static void r700SendDepthTargetState(GLcontext *ctx, struct radeon_state_atom *a
r700SetDepthTarget(context);
- BEGIN_BATCH_NO_AUTOSTATE(8 + 2);
+ BEGIN_BATCH_NO_AUTOSTATE(7 + 2);
R600_OUT_BATCH_REGSEQ(DB_DEPTH_SIZE, 2);
R600_OUT_BATCH(r700->DB_DEPTH_SIZE.u32All);
R600_OUT_BATCH(r700->DB_DEPTH_VIEW.u32All);
- R600_OUT_BATCH_REGSEQ(DB_DEPTH_BASE, 2);
+ R600_OUT_BATCH_REGSEQ(DB_DEPTH_BASE, 1);
R600_OUT_BATCH(r700->DB_DEPTH_BASE.u32All);
- R600_OUT_BATCH(r700->DB_DEPTH_INFO.u32All);
R600_OUT_BATCH_RELOC(r700->DB_DEPTH_BASE.u32All,
rrb->bo,
r700->DB_DEPTH_BASE.u32All,
0, RADEON_GEM_DOMAIN_VRAM, 0);
END_BATCH();
+ BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+ R600_OUT_BATCH_REGSEQ(DB_DEPTH_INFO, 1);
+ R600_OUT_BATCH(r700->DB_DEPTH_INFO.u32All);
+ R600_OUT_BATCH_RELOC(r700->DB_DEPTH_INFO.u32All,
+ rrb->bo,
+ r700->DB_DEPTH_INFO.u32All,
+ 0, RADEON_GEM_DOMAIN_VRAM, 0);
+ END_BATCH();
if ((context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) &&
(context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)) {
@@ -687,27 +694,35 @@ static void r700SendRenderTargetState(GLcontext *ctx, struct radeon_state_atom *
BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
R600_OUT_BATCH_REGSEQ(CB_COLOR0_TILE + (4 * id), 1);
R600_OUT_BATCH(r700->render_target[id].CB_COLOR0_TILE.u32All);
- R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_BASE.u32All,
+ R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_TILE.u32All,
rrb->bo,
- r700->render_target[id].CB_COLOR0_BASE.u32All,
+ r700->render_target[id].CB_COLOR0_TILE.u32All,
0, RADEON_GEM_DOMAIN_VRAM, 0);
END_BATCH();
BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
R600_OUT_BATCH_REGSEQ(CB_COLOR0_FRAG + (4 * id), 1);
R600_OUT_BATCH(r700->render_target[id].CB_COLOR0_FRAG.u32All);
- R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_BASE.u32All,
+ R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_FRAG.u32All,
rrb->bo,
- r700->render_target[id].CB_COLOR0_BASE.u32All,
+ r700->render_target[id].CB_COLOR0_FRAG.u32All,
0, RADEON_GEM_DOMAIN_VRAM, 0);
END_BATCH();
- BEGIN_BATCH_NO_AUTOSTATE(12);
+ BEGIN_BATCH_NO_AUTOSTATE(9);
R600_OUT_BATCH_REGVAL(CB_COLOR0_SIZE + (4 * id), r700->render_target[id].CB_COLOR0_SIZE.u32All);
R600_OUT_BATCH_REGVAL(CB_COLOR0_VIEW + (4 * id), r700->render_target[id].CB_COLOR0_VIEW.u32All);
- R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), r700->render_target[id].CB_COLOR0_INFO.u32All);
R600_OUT_BATCH_REGVAL(CB_COLOR0_MASK + (4 * id), r700->render_target[id].CB_COLOR0_MASK.u32All);
END_BATCH();
+ BEGIN_BATCH_NO_AUTOSTATE(3 + 2);
+ R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), r700->render_target[id].CB_COLOR0_INFO.u32All);
+ R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_INFO.u32All,
+ rrb->bo,
+ r700->render_target[id].CB_COLOR0_INFO.u32All,
+ 0, RADEON_GEM_DOMAIN_VRAM, 0);
+
+ END_BATCH();
+
COMMIT_BATCH();
}
@@ -1567,7 +1582,7 @@ void r600InitAtoms(context_t *context)
ALLOC_STATE(sq, always, 34, r700SendSQConfig);
ALLOC_STATE(db, always, 17, r700SendDBState);
ALLOC_STATE(stencil, always, 4, r700SendStencilState);
- ALLOC_STATE(db_target, always, 12, r700SendDepthTargetState);
+ ALLOC_STATE(db_target, always, 16, r700SendDepthTargetState);
ALLOC_STATE(sc, always, 15, r700SendSCState);
ALLOC_STATE(scissor, always, 22, r700SendScissorState);
ALLOC_STATE(aa, always, 12, r700SendAAState);
@@ -1578,7 +1593,7 @@ void r600InitAtoms(context_t *context)
ALLOC_STATE(poly, always, 10, r700SendPolyState);
ALLOC_STATE(cb, cb, 18, r700SendCBState);
ALLOC_STATE(clrcmp, always, 6, r700SendCBCLRCMPState);
- ALLOC_STATE(cb_target, always, 29, r700SendRenderTargetState);
+ ALLOC_STATE(cb_target, always, 31, r700SendRenderTargetState);
ALLOC_STATE(blnd, blnd, (6 + (R700_MAX_RENDER_TARGETS * 3)), r700SendCBBlendState);
ALLOC_STATE(blnd_clr, always, 6, r700SendCBBlendColorState);
ALLOC_STATE(sx, always, 9, r700SendSXState);
diff --git a/src/mesa/drivers/dri/r600/r700_clear.c b/src/mesa/drivers/dri/r600/r700_clear.c
index 09c48565b6..d1008f28b9 100644
--- a/src/mesa/drivers/dri/r600/r700_clear.c
+++ b/src/mesa/drivers/dri/r600/r700_clear.c
@@ -48,6 +48,7 @@ static GLboolean r700ClearFast(context_t *context, GLbitfield mask)
void r700Clear(GLcontext * ctx, GLbitfield mask)
{
context_t *context = R700_CONTEXT(ctx);
+ radeonContextPtr radeon = &context->radeon;
__DRIdrawable *dPriv = radeon_get_drawable(&context->radeon);
const GLuint colorMask = *((GLuint *) & ctx->Color.ColorMask[0]);
GLbitfield swrast_mask = 0, tri_mask = 0;
@@ -60,6 +61,8 @@ void r700Clear(GLcontext * ctx, GLbitfield mask)
context->radeon.front_buffer_dirty = GL_TRUE;
}
+ radeon_prepare_render(radeon);
+
if( GL_TRUE == r700ClearFast(context, mask) )
{
return;
diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c
index 1929b7cc12..ba55f38e05 100644
--- a/src/mesa/drivers/dri/r600/r700_render.c
+++ b/src/mesa/drivers/dri/r600/r700_render.c
@@ -625,11 +625,11 @@ static void r700SetupStreams(GLcontext *ctx, const struct gl_client_array *input
stride = (input[i]->StrideB == 0) ? getTypeSize(input[i]->Type) * input[i]->Size : input[i]->StrideB;
- if (input[i]->Type == GL_DOUBLE || input[i]->Type == GL_UNSIGNED_INT || input[i]->Type == GL_INT ||
+ if (input[i]->Type == GL_DOUBLE || input[i]->Type == GL_UNSIGNED_INT || input[i]->Type == GL_INT
#if MESA_BIG_ENDIAN
- getTypeSize(input[i]->Type) != 4 ||
+ || getTypeSize(input[i]->Type) != 4
#endif
- stride < 4)
+ )
{
r700ConvertAttrib(ctx, count, input[i], &context->stream_desc[index]);
}
@@ -637,19 +637,10 @@ static void r700SetupStreams(GLcontext *ctx, const struct gl_client_array *input
{
if (input[i]->BufferObj->Name)
{
- if (stride % 4 != 0)
- {
- assert(((intptr_t) input[i]->Ptr) % input[i]->StrideB == 0);
- r700AlignDataToDword(ctx, input[i], count, &context->stream_desc[index]);
- context->stream_desc[index].is_named_bo = GL_FALSE;
- }
- else
- {
- context->stream_desc[index].stride = input[i]->StrideB;
- context->stream_desc[index].bo_offset = (intptr_t) input[i]->Ptr;
- context->stream_desc[index].bo = get_radeon_buffer_object(input[i]->BufferObj)->bo;
- context->stream_desc[index].is_named_bo = GL_TRUE;
- }
+ context->stream_desc[index].stride = input[i]->StrideB;
+ context->stream_desc[index].bo_offset = (intptr_t) input[i]->Ptr;
+ context->stream_desc[index].bo = get_radeon_buffer_object(input[i]->BufferObj)->bo;
+ context->stream_desc[index].is_named_bo = GL_TRUE;
}
else
{
@@ -977,6 +968,10 @@ static void r700DrawPrims(GLcontext *ctx,
{
GLboolean retval = GL_FALSE;
+ context_t *context = R700_CONTEXT(ctx);
+ radeonContextPtr radeon = &context->radeon;
+ radeon_prepare_render(radeon);
+
/* This check should get folded into just the places that
* min/max index are really needed.
*/
diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c
index 137f3007ce..6a2a09eaf1 100644
--- a/src/mesa/drivers/dri/r600/r700_vertprog.c
+++ b/src/mesa/drivers/dri/r600/r700_vertprog.c
@@ -461,11 +461,11 @@ static void r700TranslateAttrib(GLcontext *ctx, GLuint unLoc, int count, const s
stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size
: input->StrideB;
- if (input->Type == GL_DOUBLE || input->Type == GL_UNSIGNED_INT || input->Type == GL_INT ||
+ if (input->Type == GL_DOUBLE || input->Type == GL_UNSIGNED_INT || input->Type == GL_INT
#if MESA_BIG_ENDIAN
- getTypeSize(input->Type) != 4 ||
+ || getTypeSize(input->Type) != 4
#endif
- stride < 4)
+ )
{
pStreamDesc->type = GL_FLOAT;
diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c
index 13f1f0611b..c1a660af3d 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common.c
+++ b/src/mesa/drivers/dri/radeon/radeon_common.c
@@ -708,7 +708,6 @@ void radeon_draw_buffer(GLcontext *ctx, struct gl_framebuffer *fb)
if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) {
rrbColor = radeon_renderbuffer(fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer);
radeon->front_cliprects = GL_TRUE;
- radeon->front_buffer_dirty = GL_TRUE;
} else {
rrbColor = radeon_renderbuffer(fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer);
radeon->front_cliprects = GL_FALSE;
@@ -1132,17 +1131,13 @@ flush_front:
if (screen->dri2.loader && (screen->dri2.loader->base.version >= 2)
&& (screen->dri2.loader->flushFrontBuffer != NULL)) {
__DRIdrawable * drawable = radeon_get_drawable(radeon);
- (*screen->dri2.loader->flushFrontBuffer)(drawable, drawable->loaderPrivate);
- /* Only clear the dirty bit if front-buffer rendering is no longer
- * enabled. This is done so that the dirty bit can only be set in
- * glDrawBuffer. Otherwise the dirty bit would have to be set at
- * each of N places that do rendering. This has worse performances,
- * but it is much easier to get correct.
+ /* We set the dirty bit in radeon_prepare_render() if we're
+ * front buffer rendering once we get there.
*/
- if (!radeon->is_front_buffer_rendering) {
- radeon->front_buffer_dirty = GL_FALSE;
- }
+ radeon->front_buffer_dirty = GL_FALSE;
+
+ (*screen->dri2.loader->flushFrontBuffer)(drawable, drawable->loaderPrivate);
}
}
}
diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c
index 5a7d52c4d2..92663bf66d 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common_context.c
+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c
@@ -493,6 +493,50 @@ radeon_bits_per_pixel(const struct radeon_renderbuffer *rb)
return _mesa_get_format_bytes(rb->base.Format) * 8;
}
+/*
+ * Check if drawable has been invalidated by dri2InvalidateDrawable().
+ * Update renderbuffers if so. This prevents a client from accessing
+ * a backbuffer that has a swap pending but not yet completed.
+ *
+ * See intel_prepare_render for equivalent code in intel driver.
+ *
+ */
+void radeon_prepare_render(radeonContextPtr radeon)
+{
+ __DRIcontext *driContext = radeon->dri.context;
+ __DRIdrawable *drawable;
+ __DRIscreen *screen;
+
+ screen = driContext->driScreenPriv;
+ if (!screen->dri2.loader)
+ return;
+
+ drawable = driContext->driDrawablePriv;
+ if (drawable->dri2.stamp != driContext->dri2.draw_stamp) {
+ if (drawable->lastStamp != drawable->dri2.stamp)
+ radeon_update_renderbuffers(driContext, drawable, GL_FALSE);
+
+ /* Intel driver does the equivalent of this, no clue if it is needed:
+ * radeon_draw_buffer(radeon->glCtx, &(drawable->driverPrivate)->base);
+ */
+ driContext->dri2.draw_stamp = drawable->dri2.stamp;
+ }
+
+ drawable = driContext->driReadablePriv;
+ if (drawable->dri2.stamp != driContext->dri2.read_stamp) {
+ if (drawable->lastStamp != drawable->dri2.stamp)
+ radeon_update_renderbuffers(driContext, drawable, GL_FALSE);
+ driContext->dri2.read_stamp = drawable->dri2.stamp;
+ }
+
+ /* If we're currently rendering to the front buffer, the rendering
+ * that will happen next will probably dirty the front buffer. So
+ * mark it as dirty here.
+ */
+ if (radeon->is_front_buffer_rendering)
+ radeon->front_buffer_dirty = GL_TRUE;
+}
+
void
radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable,
GLboolean front_only)
@@ -514,6 +558,11 @@ radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable,
screen = context->driScreenPriv;
radeon = (radeonContextPtr) context->driverPrivate;
+ /* Set this up front, so that in case our buffers get invalidated
+ * while we're getting new buffers, we don't clobber the stamp and
+ * thus ignore the invalidate. */
+ drawable->lastStamp = drawable->dri2.stamp;
+
if (screen->dri2.loader
&& (screen->dri2.loader->base.version > 2)
&& (screen->dri2.loader->getBuffersWithFormat != NULL)) {
@@ -650,6 +699,13 @@ radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable,
rb->base.Height = drawable->h;
rb->has_surface = 0;
+ /* r6xx+ tiling */
+ rb->tile_config = radeon->radeonScreen->tile_config;
+ rb->group_bytes = radeon->radeonScreen->group_bytes;
+ rb->num_channels = radeon->radeonScreen->num_channels;
+ rb->num_banks = radeon->radeonScreen->num_banks;
+ rb->r7xx_bank_op = radeon->radeonScreen->r7xx_bank_op;
+
if (buffers[i].attachment == __DRI_BUFFER_STENCIL && depth_bo) {
if (RADEON_DEBUG & RADEON_DRI)
fprintf(stderr, "(reusing depth buffer as stencil)\n");
@@ -678,7 +734,7 @@ radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable,
bo->flags |= RADEON_BO_FLAGS_MACRO_TILE;
if (tiling_flags & RADEON_TILING_MICRO)
bo->flags |= RADEON_BO_FLAGS_MICRO_TILE;
-
+
}
if (buffers[i].attachment == __DRI_BUFFER_DEPTH) {
diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h
index 5156c5d0d0..f06e5fdf24 100644
--- a/src/mesa/drivers/dri/radeon/radeon_common_context.h
+++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h
@@ -93,6 +93,13 @@ struct radeon_renderbuffer
GLuint pf_pending; /**< sequence number of pending flip */
GLuint vbl_pending; /**< vblank sequence number of pending flip */
__DRIdrawable *dPriv;
+
+ /* r6xx+ tiling */
+ GLuint tile_config;
+ GLint group_bytes;
+ GLint num_channels;
+ GLint num_banks;
+ GLint r7xx_bank_op;
};
struct radeon_framebuffer
@@ -614,5 +621,6 @@ GLboolean radeonMakeCurrent(__DRIcontext * driContextPriv,
__DRIdrawable * driDrawPriv,
__DRIdrawable * driReadPriv);
extern void radeonDestroyContext(__DRIcontext * driContextPriv);
+void radeon_prepare_render(radeonContextPtr radeon);
#endif
diff --git a/src/mesa/drivers/dri/radeon/radeon_pixel_read.c b/src/mesa/drivers/dri/radeon/radeon_pixel_read.c
index dadb8002c7..fb741173ca 100644
--- a/src/mesa/drivers/dri/radeon/radeon_pixel_read.c
+++ b/src/mesa/drivers/dri/radeon/radeon_pixel_read.c
@@ -179,6 +179,9 @@ radeonReadPixels(GLcontext * ctx,
GLenum format, GLenum type,
const struct gl_pixelstore_attrib *pack, GLvoid * pixels)
{
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+ radeon_prepare_render(radeon);
+
if (do_blit_readpixels(ctx, x, y, width, height, format, type, pack, pixels))
return;
diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c
index 82107cc6ae..fa97a19302 100644
--- a/src/mesa/drivers/dri/radeon/radeon_screen.c
+++ b/src/mesa/drivers/dri/radeon/radeon_screen.c
@@ -213,6 +213,10 @@ static const GLuint __driNConfigOptions = 17;
static int getSwapInfo( __DRIdrawable *dPriv, __DRIswapInfo * sInfo );
+#ifndef RADEON_INFO_TILE_CONFIG
+#define RADEON_INFO_TILE_CONFIG 0x6
+#endif
+
static int
radeonGetParam(__DRIscreen *sPriv, int param, void *value)
{
@@ -232,6 +236,9 @@ radeonGetParam(__DRIscreen *sPriv, int param, void *value)
case RADEON_PARAM_NUM_Z_PIPES:
info.request = RADEON_INFO_NUM_Z_PIPES;
break;
+ case RADEON_INFO_TILE_CONFIG:
+ info.request = RADEON_INFO_TILE_CONFIG;
+ break;
default:
return -EINVAL;
}
@@ -376,6 +383,21 @@ static const __DRItexBufferExtension r600TexBufferExtension = {
};
#endif
+static void
+radeonDRI2Flush(__DRIdrawable *drawable)
+{
+ radeonContextPtr rmesa;
+
+ rmesa = (radeonContextPtr) drawable->driContextPriv->driverPrivate;
+ radeonFlush(rmesa->glCtx);
+}
+
+static const struct __DRI2flushExtensionRec radeonFlushExtension = {
+ { __DRI2_FLUSH, __DRI2_FLUSH_VERSION },
+ radeonDRI2Flush,
+ dri2InvalidateDrawable,
+};
+
static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id)
{
screen->device_id = device_id;
@@ -1305,6 +1327,56 @@ radeonCreateScreen2(__DRIscreen *sPriv)
else
screen->chip_flags |= RADEON_CLASS_R600;
+ /* r6xx+ tiling */
+ if (IS_R600_CLASS(screen) && (sPriv->drm_version.minor >= 6)) {
+ ret = radeonGetParam(sPriv, RADEON_INFO_TILE_CONFIG, &temp);
+ if (ret)
+ fprintf(stderr, "failed to get tiling info\n");
+ else {
+ screen->tile_config = temp;
+ screen->r7xx_bank_op = 0;
+ switch((screen->tile_config & 0xe) >> 1) {
+ case 0:
+ screen->num_channels = 1;
+ break;
+ case 1:
+ screen->num_channels = 2;
+ break;
+ case 2:
+ screen->num_channels = 4;
+ break;
+ case 3:
+ screen->num_channels = 8;
+ break;
+ default:
+ fprintf(stderr, "bad channels\n");
+ break;
+ }
+ switch((screen->tile_config & 0x30) >> 4) {
+ case 0:
+ screen->num_banks = 4;
+ break;
+ case 1:
+ screen->num_banks = 8;
+ break;
+ default:
+ fprintf(stderr, "bad banks\n");
+ break;
+ }
+ switch((screen->tile_config & 0xc0) >> 6) {
+ case 0:
+ screen->group_bytes = 256;
+ break;
+ case 1:
+ screen->group_bytes = 512;
+ break;
+ default:
+ fprintf(stderr, "bad group_bytes\n");
+ break;
+ }
+ }
+ }
+
if (IS_R300_CLASS(screen)) {
ret = radeonGetParam(sPriv, RADEON_PARAM_NUM_GB_PIPES, &temp);
if (ret) {
@@ -1379,6 +1451,8 @@ radeonCreateScreen2(__DRIscreen *sPriv)
screen->extensions[i++] = &r600TexBufferExtension.base;
#endif
+ screen->extensions[i++] = &radeonFlushExtension.base;
+
screen->extensions[i++] = NULL;
sPriv->extensions = screen->extensions;
diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.h b/src/mesa/drivers/dri/radeon/radeon_screen.h
index 0d7e335fa3..2b33201a53 100644
--- a/src/mesa/drivers/dri/radeon/radeon_screen.h
+++ b/src/mesa/drivers/dri/radeon/radeon_screen.h
@@ -112,6 +112,13 @@ typedef struct radeon_screen {
int kernel_mm;
drm_radeon_sarea_t *sarea; /* Private SAREA data */
struct radeon_bo_manager *bom;
+
+ /* r6xx+ tiling */
+ GLuint tile_config;
+ GLint group_bytes;
+ GLint num_channels;
+ GLint num_banks;
+ GLint r7xx_bank_op;
} radeonScreenRec, *radeonScreenPtr;
#define IS_R100_CLASS(screen) \
diff --git a/src/mesa/drivers/dri/radeon/radeon_span.c b/src/mesa/drivers/dri/radeon/radeon_span.c
index 1adb609603..9dfe2dd243 100644
--- a/src/mesa/drivers/dri/radeon/radeon_span.c
+++ b/src/mesa/drivers/dri/radeon/radeon_span.c
@@ -111,7 +111,6 @@ static GLubyte *r200_depth_4byte(const struct radeon_renderbuffer * rrb,
* two main types:
* - 1D (akin to macro-linear/micro-tiled on older asics)
* - 2D (akin to macro-tiled/micro-tiled on older asics)
- * only 1D tiling is implemented below
*/
#if defined(RADEON_R600)
static inline GLint r600_1d_tile_helper(const struct radeon_renderbuffer * rrb,
@@ -208,12 +207,190 @@ static inline GLint r600_1d_tile_helper(const struct radeon_renderbuffer * rrb,
return offset;
}
+static inline GLint r600_log2(GLint n)
+{
+ GLint log2 = 0;
+
+ while (n >>= 1)
+ ++log2;
+ return log2;
+}
+
+static inline GLint r600_2d_tile_helper(const struct radeon_renderbuffer * rrb,
+ GLint x, GLint y, GLint is_depth, GLint is_stencil)
+{
+ GLint group_bytes = rrb->group_bytes;
+ GLint num_channels = rrb->num_channels;
+ GLint num_banks = rrb->num_banks;
+ GLint r7xx_bank_op = rrb->r7xx_bank_op;
+ /* */
+ GLint group_bits = r600_log2(group_bytes);
+ GLint channel_bits = r600_log2(num_channels);
+ GLint bank_bits = r600_log2(num_banks);
+ GLint element_bytes = rrb->cpp;
+ GLint num_samples = 1;
+ GLint tile_width = 8;
+ GLint tile_height = 8;
+ GLint tile_thickness = 1;
+ GLint macro_tile_width = num_banks;
+ GLint macro_tile_height = num_channels;
+ GLint pitch_elements = (rrb->pitch / element_bytes) / tile_width;
+ GLint height = rrb->base.Height / tile_height;
+ GLint z = 0;
+ GLint sample_number = 0;
+ /* */
+ GLint tile_bytes;
+ GLint macro_tile_bytes;
+ GLint macro_tiles_per_row;
+ GLint macro_tiles_per_slice;
+ GLint slice_offset;
+ GLint macro_tile_row_index;
+ GLint macro_tile_column_index;
+ GLint macro_tile_offset;
+ GLint pixel_number = 0;
+ GLint element_offset;
+ GLint bank = 0;
+ GLint channel = 0;
+ GLint total_offset;
+ GLint group_mask = (1 << group_bits) - 1;
+ GLint offset_low;
+ GLint offset_high;
+ GLint offset = 0;
+
+ switch (num_channels) {
+ case 2:
+ default:
+ // channel[0] = x[3] ^ y[3]
+ channel |= (((x >> 3) ^ (y >> 3)) & 1) << 0;
+ break;
+ case 4:
+ // channel[0] = x[4] ^ y[3]
+ channel |= (((x >> 4) ^ (y >> 3)) & 1) << 0;
+ // channel[1] = x[3] ^ y[4]
+ channel |= (((x >> 3) ^ (y >> 4)) & 1) << 1;
+ break;
+ case 8:
+ // channel[0] = x[5] ^ y[3]
+ channel |= (((x >> 5) ^ (y >> 3)) & 1) << 0;
+ // channel[0] = x[4] ^ x[5] ^ y[4]
+ channel |= (((x >> 4) ^ (x >> 5) ^ (y >> 4)) & 1) << 1;
+ // channel[0] = x[3] ^ y[5]
+ channel |= (((x >> 3) ^ (y >> 5)) & 1) << 2;
+ break;
+ }
+
+ switch (num_banks) {
+ case 4:
+ // bank[0] = x[3] ^ y[4 + log2(num_channels)]
+ bank |= (((x >> 3) ^ (y >> (4 + channel_bits))) & 1) << 0;
+ if (r7xx_bank_op)
+ // bank[1] = x[3] ^ y[4 + log2(num_channels)] ^ x[5]
+ bank |= (((x >> 4) ^ (y >> (3 + channel_bits)) ^ (x >> 5)) & 1) << 1;
+ else
+ // bank[1] = x[4] ^ y[3 + log2(num_channels)]
+ bank |= (((x >> 4) ^ (y >> (3 + channel_bits))) & 1) << 1;
+ break;
+ case 8:
+ // bank[0] = x[3] ^ y[5 + log2(num_channels)]
+ bank |= (((x >> 3) ^ (y >> (5 + channel_bits))) & 1) << 0;
+ // bank[1] = x[4] ^ y[4 + log2(num_channels)] ^ y[5 + log2(num_channels)]
+ bank |= (((x >> 4) ^ (y >> (4 + channel_bits)) ^ (y >> (5 + channel_bits))) & 1) << 1;
+ if (r7xx_bank_op)
+ // bank[2] = x[5] ^ y[3 + log2(num_channels)] ^ x[6]
+ bank |= (((x >> 5) ^ (y >> (3 + channel_bits)) ^ (x >> 6)) & 1) << 2;
+ else
+ // bank[2] = x[5] ^ y[3 + log2(num_channels)]
+ bank |= (((x >> 5) ^ (y >> (3 + channel_bits))) & 1) << 2;
+ break;
+ }
+
+ tile_bytes = tile_width * tile_height * tile_thickness * element_bytes * num_samples;
+ macro_tile_bytes = macro_tile_width * macro_tile_height * tile_bytes;
+ macro_tiles_per_row = pitch_elements / macro_tile_width;
+ macro_tiles_per_slice = macro_tiles_per_row * (height / macro_tile_height);
+ slice_offset = (z / tile_thickness) * macro_tiles_per_slice * macro_tile_bytes;
+ macro_tile_row_index = (y / tile_height) / macro_tile_height;
+ macro_tile_column_index = (x / tile_width) / macro_tile_width;
+ macro_tile_offset = ((macro_tile_row_index * macro_tiles_per_row) + macro_tile_column_index) * macro_tile_bytes;
+
+ if (is_depth) {
+ GLint pixel_offset = 0;
+
+ pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
+ pixel_number |= ((y >> 0) & 1) << 1; // pn[1] = y[0]
+ pixel_number |= ((x >> 1) & 1) << 2; // pn[2] = x[1]
+ pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1]
+ pixel_number |= ((x >> 2) & 1) << 4; // pn[4] = x[2]
+ pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
+ switch (element_bytes) {
+ case 2:
+ pixel_offset = pixel_number * element_bytes * num_samples;
+ break;
+ case 4:
+ /* stencil and depth data are stored separately within a tile.
+ * stencil is stored in a contiguous tile before the depth tile.
+ * stencil element is 1 byte, depth element is 3 bytes.
+ * stencil tile is 64 bytes.
+ */
+ if (is_stencil)
+ pixel_offset = pixel_number * 1 * num_samples;
+ else
+ pixel_offset = (pixel_number * 3 * num_samples) + 64;
+ break;
+ }
+ element_offset = pixel_offset + (sample_number * element_bytes);
+ } else {
+ GLint sample_offset;
+
+ switch (element_bytes) {
+ case 1:
+ pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
+ pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
+ pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2]
+ pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1]
+ pixel_number |= ((y >> 0) & 1) << 4; // pn[4] = y[0]
+ pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
+ break;
+ case 2:
+ pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
+ pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
+ pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2]
+ pixel_number |= ((y >> 0) & 1) << 3; // pn[3] = y[0]
+ pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1]
+ pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
+ break;
+ case 4:
+ pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
+ pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
+ pixel_number |= ((y >> 0) & 1) << 2; // pn[2] = y[0]
+ pixel_number |= ((x >> 2) & 1) << 3; // pn[3] = x[2]
+ pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1]
+ pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
+ break;
+ }
+ sample_offset = sample_number * (tile_bytes / num_samples);
+ element_offset = sample_offset + (pixel_number * element_bytes);
+ }
+ total_offset = (slice_offset + macro_tile_offset) >> (channel_bits + bank_bits);
+ total_offset += element_offset;
+
+ offset_low = total_offset & group_mask;
+ offset_high = (total_offset & ~group_mask) << (channel_bits + bank_bits);
+ offset = (bank << (group_bits + channel_bits)) + (channel << group_bits) + offset_low + offset_high;
+
+ return offset;
+}
+
/* depth buffers */
static GLubyte *r600_ptr_depth(const struct radeon_renderbuffer * rrb,
GLint x, GLint y)
{
GLubyte *ptr = rrb->bo->ptr;
- GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 0);
+ GLint offset;
+ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
+ offset = r600_2d_tile_helper(rrb, x, y, 1, 0);
+ else
+ offset = r600_1d_tile_helper(rrb, x, y, 1, 0);
return &ptr[offset];
}
@@ -221,7 +398,11 @@ static GLubyte *r600_ptr_stencil(const struct radeon_renderbuffer * rrb,
GLint x, GLint y)
{
GLubyte *ptr = rrb->bo->ptr;
- GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 1);
+ GLint offset;
+ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
+ offset = r600_2d_tile_helper(rrb, x, y, 1, 1);
+ else
+ offset = r600_1d_tile_helper(rrb, x, y, 1, 1);
return &ptr[offset];
}
@@ -235,7 +416,10 @@ static GLubyte *r600_ptr_color(const struct radeon_renderbuffer * rrb,
if (rrb->has_surface || !(rrb->bo->flags & mask)) {
offset = x * rrb->cpp + y * rrb->pitch;
} else {
- offset = r600_1d_tile_helper(rrb, x, y, 0, 0);
+ if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
+ offset = r600_2d_tile_helper(rrb, x, y, 0, 0);
+ else
+ offset = r600_1d_tile_helper(rrb, x, y, 0, 0);
}
return &ptr[offset];
}
diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
index f2fcb46688..67be466c3f 100644
--- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c
@@ -408,6 +408,8 @@ static GLboolean radeon_run_render( GLcontext *ctx,
!radeon_dma_validate_render( ctx, VB ))
return GL_TRUE;
+ radeon_prepare_render(&rmesa->radeon);
+
tnl->Driver.Render.Start( ctx );
for (i = 0 ; i < VB->PrimitiveCount ; i++)
diff --git a/src/mesa/drivers/dri/radeon/radeon_tcl.c b/src/mesa/drivers/dri/radeon/radeon_tcl.c
index ea796e1a45..5e1718f9df 100644
--- a/src/mesa/drivers/dri/radeon/radeon_tcl.c
+++ b/src/mesa/drivers/dri/radeon/radeon_tcl.c
@@ -252,6 +252,8 @@ void radeonTclPrimitive( GLcontext *ctx,
GLuint se_cntl;
GLuint newprim = hw_prim | RADEON_CP_VC_CNTL_TCL_ENABLE;
+ radeon_prepare_render(&rmesa->radeon);
+
if (newprim != rmesa->tcl.hw_primitive ||
!discrete_prim[hw_prim&0xf]) {
RADEON_NEWPRIM( rmesa );
diff --git a/src/mesa/drivers/dri/radeon/radeon_tex_copy.c b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c
index 29fd31ac23..4cb0bb60c8 100644
--- a/src/mesa/drivers/dri/radeon/radeon_tex_copy.c
+++ b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c
@@ -153,6 +153,9 @@ radeonCopyTexImage2D(GLcontext *ctx, GLenum target, GLint level,
_mesa_select_tex_image(ctx, texObj, target, level);
int srcx, srcy, dstx, dsty;
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+ radeon_prepare_render(radeon);
+
if (border)
goto fail;
@@ -202,6 +205,9 @@ radeonCopyTexSubImage2D(GLcontext *ctx, GLenum target, GLint level,
struct gl_texture_object *texObj = _mesa_select_tex_object(ctx, texUnit, target);
struct gl_texture_image *texImage = _mesa_select_tex_image(ctx, texObj, target, level);
+ radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+ radeon_prepare_render(radeon);
+
if (!do_copy_texsubimage(ctx, target, level,
radeon_tex_obj(texObj), (radeon_texture_image *)texImage,
xoffset, yoffset, x, y, width, height)) {