From 7d349f588af88f9c5cfe37a331bcef6292a9111e Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Tue, 27 Jul 2010 15:21:19 -0400 Subject: intel: Remove unused intel/server files --- src/mesa/drivers/dri/i915/Makefile | 2 +- src/mesa/drivers/dri/i965/Makefile | 2 +- src/mesa/drivers/dri/intel/intel_context.c | 2 - src/mesa/drivers/dri/intel/server/i830_dri.h | 62 ----- src/mesa/drivers/dri/intel/server/intel.h | 331 --------------------------- 5 files changed, 2 insertions(+), 397 deletions(-) delete mode 100644 src/mesa/drivers/dri/intel/server/i830_dri.h delete mode 100644 src/mesa/drivers/dri/intel/server/intel.h (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/i915/Makefile b/src/mesa/drivers/dri/i915/Makefile index 71ee753748..65fd658c04 100644 --- a/src/mesa/drivers/dri/i915/Makefile +++ b/src/mesa/drivers/dri/i915/Makefile @@ -56,7 +56,7 @@ C_SOURCES = \ ASM_SOURCES = -DRIVER_DEFINES = -I../intel -I../intel/server -DI915 \ +DRIVER_DEFINES = -I../intel -DI915 \ $(shell pkg-config libdrm --atleast-version=2.3.1 \ && echo "-DDRM_VBLANK_FLIP=DRM_VBLANK_FLIP") diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile index 831981558d..e381a5c714 100644 --- a/src/mesa/drivers/dri/i965/Makefile +++ b/src/mesa/drivers/dri/i965/Makefile @@ -106,7 +106,7 @@ C_SOURCES = \ ASM_SOURCES = -DRIVER_DEFINES = -I../intel -I../intel/server +DRIVER_DEFINES = -I../intel INCLUDES += $(INTEL_CFLAGS) DRI_LIB_DEPS += $(INTEL_LIBS) diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 5f2035d79c..87513a2a59 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -39,8 +39,6 @@ #include "drivers/common/driverfuncs.h" #include "drivers/common/meta.h" -#include "i830_dri.h" - #include "intel_chipset.h" #include "intel_buffers.h" #include "intel_tex.h" diff --git a/src/mesa/drivers/dri/intel/server/i830_dri.h b/src/mesa/drivers/dri/intel/server/i830_dri.h deleted file mode 100644 index def049e7a6..0000000000 --- a/src/mesa/drivers/dri/intel/server/i830_dri.h +++ /dev/null @@ -1,62 +0,0 @@ -/* $XFree86: xc/programs/Xserver/hw/xfree86/drivers/i810/i830_dri.h,v 1.6 2003/09/28 20:15:59 alanh Exp $ */ - -#ifndef _I830_DRI_H -#define _I830_DRI_H - -#include "xf86drm.h" - -#define I830_MAX_DRAWABLES 256 - -#define I830_MAJOR_VERSION 1 -#define I830_MINOR_VERSION 9 -#define I830_PATCHLEVEL 0 - -#define I830_REG_SIZE 0x80000 - -typedef struct _I830DRIRec { - drm_handle_t regs; - drmSize regsSize; - - drmSize unused1; /* backbufferSize */ - drm_handle_t unused2; /* backbuffer */ - - drmSize unused3; /* depthbufferSize */ - drm_handle_t unused4; /* depthbuffer */ - - drmSize unused5; /* rotatedSize */ - drm_handle_t unused6; /* rotatedbuffer */ - - drm_handle_t unused7; /* textures */ - int unused8; /* textureSize */ - - drm_handle_t unused9; /* agp_buffers */ - drmSize unused10; /* agp_buf_size */ - - int deviceID; - int width; - int height; - int mem; - int cpp; - int bitsPerPixel; - - int unused11[8]; /* was front/back/depth/rotated offset/pitch */ - - int unused12; /* logTextureGranularity */ - int unused13; /* textureOffset */ - - int irq; - int sarea_priv_offset; -} I830DRIRec, *I830DRIPtr; - -typedef struct { - /* Nothing here yet */ - int dummy; -} I830ConfigPrivRec, *I830ConfigPrivPtr; - -typedef struct { - /* Nothing here yet */ - int dummy; -} I830DRIContextRec, *I830DRIContextPtr; - - -#endif diff --git a/src/mesa/drivers/dri/intel/server/intel.h b/src/mesa/drivers/dri/intel/server/intel.h deleted file mode 100644 index 6ea72499c1..0000000000 --- a/src/mesa/drivers/dri/intel/server/intel.h +++ /dev/null @@ -1,331 +0,0 @@ -#ifndef _INTEL_H_ -#define _INTEL_H_ - -#include "xf86drm.h" /* drm_handle_t, etc */ - -/* Intel */ -#ifndef PCI_CHIP_I810 -#define PCI_CHIP_I810 0x7121 -#define PCI_CHIP_I810_DC100 0x7123 -#define PCI_CHIP_I810_E 0x7125 -#define PCI_CHIP_I815 0x1132 -#define PCI_CHIP_I810_BRIDGE 0x7120 -#define PCI_CHIP_I810_DC100_BRIDGE 0x7122 -#define PCI_CHIP_I810_E_BRIDGE 0x7124 -#define PCI_CHIP_I815_BRIDGE 0x1130 -#endif - -#define PCI_CHIP_845_G 0x2562 -#define PCI_CHIP_I830_M 0x3577 - -#ifndef PCI_CHIP_I855_GM -#define PCI_CHIP_I855_GM 0x3582 -#define PCI_CHIP_I855_GM_BRIDGE 0x3580 -#endif - -#ifndef PCI_CHIP_I865_G -#define PCI_CHIP_I865_G 0x2572 -#define PCI_CHIP_I865_G_BRIDGE 0x2570 -#endif - -#ifndef PCI_CHIP_I915_G -#define PCI_CHIP_I915_G 0x2582 -#define PCI_CHIP_I915_G_BRIDGE 0x2580 -#endif - -#ifndef PCI_CHIP_I915_GM -#define PCI_CHIP_I915_GM 0x2592 -#define PCI_CHIP_I915_GM_BRIDGE 0x2590 -#endif - -#ifndef PCI_CHIP_E7221_G -#define PCI_CHIP_E7221_G 0x258A -/* Same as I915_G_BRIDGE */ -#define PCI_CHIP_E7221_G_BRIDGE 0x2580 -#endif - -#ifndef PCI_CHIP_I945_G -#define PCI_CHIP_I945_G 0x2772 -#define PCI_CHIP_I945_G_BRIDGE 0x2770 -#endif - -#ifndef PCI_CHIP_I945_GM -#define PCI_CHIP_I945_GM 0x27A2 -#define PCI_CHIP_I945_GM_BRIDGE 0x27A0 -#endif - -#define IS_I810(pI810) (pI810->Chipset == PCI_CHIP_I810 || \ - pI810->Chipset == PCI_CHIP_I810_DC100 || \ - pI810->Chipset == PCI_CHIP_I810_E) -#define IS_I815(pI810) (pI810->Chipset == PCI_CHIP_I815) -#define IS_I830(pI810) (pI810->Chipset == PCI_CHIP_I830_M) -#define IS_845G(pI810) (pI810->Chipset == PCI_CHIP_845_G) -#define IS_I85X(pI810) (pI810->Chipset == PCI_CHIP_I855_GM) -#define IS_I852(pI810) (pI810->Chipset == PCI_CHIP_I855_GM && (pI810->variant == I852_GM || pI810->variant == I852_GME)) -#define IS_I855(pI810) (pI810->Chipset == PCI_CHIP_I855_GM && (pI810->variant == I855_GM || pI810->variant == I855_GME)) -#define IS_I865G(pI810) (pI810->Chipset == PCI_CHIP_I865_G) - -#define IS_I915G(pI810) (pI810->Chipset == PCI_CHIP_I915_G || pI810->Chipset == PCI_CHIP_E7221_G) -#define IS_I915GM(pI810) (pI810->Chipset == PCI_CHIP_I915_GM) -#define IS_I945G(pI810) (pI810->Chipset == PCI_CHIP_I945_G) -#define IS_I945GM(pI810) (pI810->Chipset == PCI_CHIP_I945_GM) -#define IS_I9XX(pI810) (IS_I915G(pI810) || IS_I915GM(pI810) || IS_I945G(pI810) || IS_I945GM(pI810)) - -#define IS_MOBILE(pI810) (IS_I830(pI810) || IS_I85X(pI810) || IS_I915GM(pI810) || IS_I945GM(pI810)) - -#define I830_GMCH_CTRL 0x52 - -#define I830_GMCH_MEM_MASK 0x1 -#define I830_GMCH_MEM_64M 0x1 -#define I830_GMCH_MEM_128M 0 - -#define I830_GMCH_GMS_MASK 0x70 -#define I830_GMCH_GMS_DISABLED 0x00 -#define I830_GMCH_GMS_LOCAL 0x10 -#define I830_GMCH_GMS_STOLEN_512 0x20 -#define I830_GMCH_GMS_STOLEN_1024 0x30 -#define I830_GMCH_GMS_STOLEN_8192 0x40 - -#define I855_GMCH_GMS_MASK (0x7 << 4) -#define I855_GMCH_GMS_DISABLED 0x00 -#define I855_GMCH_GMS_STOLEN_1M (0x1 << 4) -#define I855_GMCH_GMS_STOLEN_4M (0x2 << 4) -#define I855_GMCH_GMS_STOLEN_8M (0x3 << 4) -#define I855_GMCH_GMS_STOLEN_16M (0x4 << 4) -#define I855_GMCH_GMS_STOLEN_32M (0x5 << 4) -#define I915G_GMCH_GMS_STOLEN_48M (0x6 << 4) -#define I915G_GMCH_GMS_STOLEN_64M (0x7 << 4) - -typedef unsigned char Bool; -#define TRUE 1 -#define FALSE 0 - -#define PIPE_NONE 0<<0 -#define PIPE_CRT 1<<0 -#define PIPE_TV 1<<1 -#define PIPE_DFP 1<<2 -#define PIPE_LFP 1<<3 -#define PIPE_CRT2 1<<4 -#define PIPE_TV2 1<<5 -#define PIPE_DFP2 1<<6 -#define PIPE_LFP2 1<<7 - -typedef struct _I830MemPool *I830MemPoolPtr; -typedef struct _I830MemRange *I830MemRangePtr; -typedef struct _I830MemRange { - long Start; - long End; - long Size; - unsigned long Physical; - unsigned long Offset; /* Offset of AGP-allocated portion */ - unsigned long Alignment; - drm_handle_t Key; - unsigned long Pitch; // add pitch - I830MemPoolPtr Pool; -} I830MemRange; - -typedef struct _I830MemPool { - I830MemRange Total; - I830MemRange Free; - I830MemRange Fixed; - I830MemRange Allocated; -} I830MemPool; - -typedef struct { - int tail_mask; - I830MemRange mem; - unsigned char *virtual_start; - int head; - int tail; - int space; -} I830RingBuffer; - -typedef struct _I830Rec { - unsigned char *MMIOBase; - unsigned char *FbBase; - int cpp; - uint32_t aper_size; - unsigned int bios_version; - - /* These are set in PreInit and never changed. */ - long FbMapSize; - long TotalVideoRam; - I830MemRange StolenMemory; /* pre-allocated memory */ - long BIOSMemorySize; /* min stolen pool size */ - int BIOSMemSizeLoc; - - /* These change according to what has been allocated. */ - long FreeMemory; - I830MemRange MemoryAperture; - I830MemPool StolenPool; - long allocatedMemory; - - /* Regions allocated either from the above pools, or from agpgart. */ - /* for single and dual head configurations */ - I830MemRange FrontBuffer; - I830MemRange FrontBuffer2; - I830MemRange Scratch; - I830MemRange Scratch2; - - I830RingBuffer *LpRing; - - I830MemRange BackBuffer; - I830MemRange DepthBuffer; - I830MemRange TexMem; - int TexGranularity; - I830MemRange ContextMem; - int drmMinor; - Bool have3DWindows; - - Bool NeedRingBufferLow; - Bool allowPageFlip; - Bool disableTiling; - - int Chipset; - unsigned long LinearAddr; - unsigned long MMIOAddr; - - drmSize registerSize; /**< \brief MMIO register map size */ - drm_handle_t registerHandle; /**< \brief MMIO register map handle */ - // IOADDRESS ioBase; - int irq; /**< \brief IRQ number */ - int GttBound; - - drm_handle_t ring_map; - unsigned int Fence[8]; - -} I830Rec; - -/* - * 12288 is set as the maximum, chosen because it is enough for - * 1920x1440@32bpp with a 2048 pixel line pitch with some to spare. - */ -#define I830_MAXIMUM_VBIOS_MEM 12288 -#define I830_DEFAULT_VIDEOMEM_2D (MB(32) / 1024) -#define I830_DEFAULT_VIDEOMEM_3D (MB(64) / 1024) - -/* Flags for memory allocation function */ -#define FROM_ANYWHERE 0x00000000 -#define FROM_POOL_ONLY 0x00000001 -#define FROM_NEW_ONLY 0x00000002 -#define FROM_MASK 0x0000000f - -#define ALLOCATE_AT_TOP 0x00000010 -#define ALLOCATE_AT_BOTTOM 0x00000020 -#define FORCE_GAPS 0x00000040 - -#define NEED_PHYSICAL_ADDR 0x00000100 -#define ALIGN_BOTH_ENDS 0x00000200 -#define FORCE_LOW 0x00000400 - -#define ALLOC_NO_TILING 0x00001000 -#define ALLOC_INITIAL 0x00002000 - -#define ALLOCATE_DRY_RUN 0x80000000 - -/* Chipset registers for VIDEO BIOS memory RW access */ -#define _855_DRAM_RW_CONTROL 0x58 -#define _845_DRAM_RW_CONTROL 0x90 -#define DRAM_WRITE 0x33330000 - -#define KB(x) ((x) * 1024) -#define MB(x) ((x) * KB(1024)) - -#define GTT_PAGE_SIZE KB(4) -#define ROUND_TO(x, y) (((x) + (y) - 1) / (y) * (y)) -#define ROUND_DOWN_TO(x, y) ((x) / (y) * (y)) -#define ROUND_TO_PAGE(x) ROUND_TO((x), GTT_PAGE_SIZE) -#define ROUND_TO_MB(x) ROUND_TO((x), MB(1)) -#define PRIMARY_RINGBUFFER_SIZE KB(128) - - -/* Ring buffer registers, p277, overview p19 - */ -#define LP_RING 0x2030 -#define HP_RING 0x2040 - -#define RING_TAIL 0x00 -#define TAIL_ADDR 0x000FFFF8 -#define I830_TAIL_MASK 0x001FFFF8 - -#define RING_HEAD 0x04 -#define HEAD_WRAP_COUNT 0xFFE00000 -#define HEAD_WRAP_ONE 0x00200000 -#define HEAD_ADDR 0x001FFFFC -#define I830_HEAD_MASK 0x001FFFFC - -#define RING_START 0x08 -#define START_ADDR 0x03FFFFF8 -#define I830_RING_START_MASK 0xFFFFF000 - -#define RING_LEN 0x0C -#define RING_NR_PAGES 0x001FF000 -#define I830_RING_NR_PAGES 0x001FF000 -#define RING_REPORT_MASK 0x00000006 -#define RING_REPORT_64K 0x00000002 -#define RING_REPORT_128K 0x00000004 -#define RING_NO_REPORT 0x00000000 -#define RING_VALID_MASK 0x00000001 -#define RING_VALID 0x00000001 -#define RING_INVALID 0x00000000 - - -/* Fence/Tiling ranges [0..7] - */ -#define FENCE 0x2000 -#define FENCE_NR 8 - -#define I915G_FENCE_START_MASK 0x0ff00000 - -#define I830_FENCE_START_MASK 0x07f80000 - -#define FENCE_START_MASK 0x03F80000 -#define FENCE_X_MAJOR 0x00000000 -#define FENCE_Y_MAJOR 0x00001000 -#define FENCE_SIZE_MASK 0x00000700 -#define FENCE_SIZE_512K 0x00000000 -#define FENCE_SIZE_1M 0x00000100 -#define FENCE_SIZE_2M 0x00000200 -#define FENCE_SIZE_4M 0x00000300 -#define FENCE_SIZE_8M 0x00000400 -#define FENCE_SIZE_16M 0x00000500 -#define FENCE_SIZE_32M 0x00000600 -#define FENCE_SIZE_64M 0x00000700 -#define I915G_FENCE_SIZE_1M 0x00000000 -#define I915G_FENCE_SIZE_2M 0x00000100 -#define I915G_FENCE_SIZE_4M 0x00000200 -#define I915G_FENCE_SIZE_8M 0x00000300 -#define I915G_FENCE_SIZE_16M 0x00000400 -#define I915G_FENCE_SIZE_32M 0x00000500 -#define I915G_FENCE_SIZE_64M 0x00000600 -#define I915G_FENCE_SIZE_128M 0x00000700 -#define FENCE_PITCH_1 0x00000000 -#define FENCE_PITCH_2 0x00000010 -#define FENCE_PITCH_4 0x00000020 -#define FENCE_PITCH_8 0x00000030 -#define FENCE_PITCH_16 0x00000040 -#define FENCE_PITCH_32 0x00000050 -#define FENCE_PITCH_64 0x00000060 -#define FENCE_VALID 0x00000001 - -#include - -# define MMIO_IN8(base, offset) \ - *(volatile unsigned char *)(((unsigned char*)(base)) + (offset)) -# define MMIO_IN32(base, offset) \ - read_MMIO_LE32(base, offset) -# define MMIO_OUT8(base, offset, val) \ - *(volatile unsigned char *)(((unsigned char*)(base)) + (offset)) = (val) -# define MMIO_OUT32(base, offset, val) \ - *(volatile unsigned int *)(void *)(((unsigned char*)(base)) + (offset)) = CPU_TO_LE32(val) - - - /* Memory mapped register access macros */ -#define INREG8(addr) MMIO_IN8(MMIO, addr) -#define INREG(addr) MMIO_IN32(MMIO, addr) -#define OUTREG8(addr, val) MMIO_OUT8(MMIO, addr, val) -#define OUTREG(addr, val) MMIO_OUT32(MMIO, addr, val) - -#define DSPABASE 0x70184 - -#endif -- cgit v1.2.3 From 9456e22c7a8803bed1146a89e7581badf0ae8064 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Fri, 4 Jun 2010 14:28:59 -0400 Subject: intel: Implement EGL_KHR_surfaceless extension --- src/mesa/drivers/dri/intel/intel_context.c | 27 ++++++++++++++++++++------- src/mesa/main/fbobject.c | 5 +++++ src/mesa/main/fbobject.h | 3 +++ src/mesa/main/framebuffer.c | 1 - 4 files changed, 28 insertions(+), 8 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 87513a2a59..531f62b880 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -418,7 +418,7 @@ intel_prepare_render(struct intel_context *intel) __DRIdrawable *drawable; drawable = driContext->driDrawablePriv; - if (drawable->dri2.stamp != driContext->dri2.draw_stamp) { + if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) { if (drawable->lastStamp != drawable->dri2.stamp) intel_update_renderbuffers(driContext, drawable); intel_draw_buffer(&intel->ctx, intel->ctx.DrawBuffer); @@ -426,7 +426,7 @@ intel_prepare_render(struct intel_context *intel) } drawable = driContext->driReadablePriv; - if (drawable->dri2.stamp != driContext->dri2.read_stamp) { + if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) { if (drawable->lastStamp != drawable->dri2.stamp) intel_update_renderbuffers(driContext, drawable); driContext->dri2.read_stamp = drawable->dri2.stamp; @@ -611,6 +611,7 @@ intelInitContext(struct intel_context *intel, __DRIscreen *sPriv = driContextPriv->driScreenPriv; struct intel_screen *intelScreen = sPriv->private; int bo_reuse_mode; + __GLcontextModes visual; /* we can't do anything without a connection to the device */ if (intelScreen->bufmgr == NULL) @@ -622,6 +623,11 @@ intelInitContext(struct intel_context *intel, functions->Viewport = intel_viewport; } + if (mesaVis == NULL) { + memset(&visual, 0, sizeof visual); + mesaVis = &visual; + } + if (!_mesa_initialize_context_for_api(&intel->ctx, api, mesaVis, shareCtx, functions, (void *) intel)) { printf("%s: failed to init mesa context\n", __FUNCTION__); @@ -888,14 +894,21 @@ intelMakeCurrent(__DRIcontext * driContextPriv, } if (driContextPriv) { - struct gl_framebuffer *fb = driDrawPriv->driverPrivate; - struct gl_framebuffer *readFb = driReadPriv->driverPrivate; + struct gl_framebuffer *fb, *readFb; + + if (driDrawPriv == NULL && driReadPriv == NULL) { + fb = _mesa_get_incomplete_framebuffer(); + readFb = _mesa_get_incomplete_framebuffer(); + } else { + fb = driDrawPriv->driverPrivate; + readFb = driReadPriv->driverPrivate; + driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1; + driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1; + } - driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1; - driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1; intel_prepare_render(intel); _mesa_make_current(&intel->ctx, fb, readFb); - + /* We do this in intel_prepare_render() too, but intel->ctx.DrawBuffer * is NULL at that point. We can't call _mesa_makecurrent() * first, since we need the buffer size for the initial diff --git a/src/mesa/main/fbobject.c b/src/mesa/main/fbobject.c index 8c86b392c7..9a84e5a79c 100644 --- a/src/mesa/main/fbobject.c +++ b/src/mesa/main/fbobject.c @@ -97,6 +97,11 @@ _mesa_init_fbobjects(GLcontext *ctx) DummyRenderbuffer.Delete = delete_dummy_renderbuffer; } +struct gl_framebuffer * +_mesa_get_incomplete_framebuffer(void) +{ + return &DummyFramebuffer; +} /** * Helper routine for getting a gl_renderbuffer. diff --git a/src/mesa/main/fbobject.h b/src/mesa/main/fbobject.h index ff946033a4..5215e930f9 100644 --- a/src/mesa/main/fbobject.h +++ b/src/mesa/main/fbobject.h @@ -30,6 +30,9 @@ extern void _mesa_init_fbobjects(GLcontext *ctx); +extern struct gl_framebuffer * +_mesa_get_incomplete_framebuffer(void); + extern struct gl_renderbuffer * _mesa_lookup_renderbuffer(GLcontext *ctx, GLuint id); diff --git a/src/mesa/main/framebuffer.c b/src/mesa/main/framebuffer.c index 56558cfcc1..e0aac26f62 100644 --- a/src/mesa/main/framebuffer.c +++ b/src/mesa/main/framebuffer.c @@ -75,7 +75,6 @@ compute_depth_max(struct gl_framebuffer *fb) fb->_MRD = (GLfloat)1.0 / fb->_DepthMaxF; } - /** * Create and initialize a gl_framebuffer object. * This is intended for creating _window_system_ framebuffers, not generic -- cgit v1.2.3 From 9b3bf392e1af72d29afa0804260cac4d8ffe24e1 Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Thu, 29 Jul 2010 15:18:19 +0300 Subject: r600: since 8744c36e added asserts - use another random register for shader with no output --- src/mesa/drivers/dri/r600/r700_assembler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 99a33df4fc..8f6cc1d875 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -6473,7 +6473,7 @@ GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode, * results are undefined anyway */ if(export_count == 0) { - Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, 0, GL_FALSE); + Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, pR700AsmCode->starting_export_register_number, GL_FALSE); } if(pR700AsmCode->cf_last_export_ptr != NULL) -- cgit v1.2.3 From be9276d91299e36a8c4371a87d75c9cacaf995f2 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Thu, 29 Jul 2010 15:39:36 -0400 Subject: intel: Don't depend on context config values when picking texture formats --- src/mesa/drivers/dri/intel/intel_tex_format.c | 36 +++++++++------------------ 1 file changed, 12 insertions(+), 24 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/intel/intel_tex_format.c b/src/mesa/drivers/dri/intel/intel_tex_format.c index 5f813c0efa..e03b203fb4 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_format.c +++ b/src/mesa/drivers/dri/intel/intel_tex_format.c @@ -19,7 +19,6 @@ intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat, GLenum format, GLenum type) { struct intel_context *intel = intel_context(ctx); - const GLboolean do32bpt = (intel->ctx.Visual.rgbBits >= 24); #if 0 printf("%s intFmt=0x%x format=0x%x type=0x%x\n", @@ -30,39 +29,28 @@ intelChooseTextureFormat(GLcontext * ctx, GLint internalFormat, case 4: case GL_RGBA: case GL_COMPRESSED_RGBA: - if (format == GL_BGRA) { - if (type == GL_UNSIGNED_BYTE || type == GL_UNSIGNED_INT_8_8_8_8_REV) { - return MESA_FORMAT_ARGB8888; - } - else if (type == GL_UNSIGNED_SHORT_4_4_4_4_REV) { - return MESA_FORMAT_ARGB4444; - } - else if (type == GL_UNSIGNED_SHORT_1_5_5_5_REV) { - return MESA_FORMAT_ARGB1555; - } - } - return do32bpt ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_ARGB4444; + if (type == GL_UNSIGNED_SHORT_4_4_4_4_REV) + return MESA_FORMAT_ARGB4444; + else if (type == GL_UNSIGNED_SHORT_1_5_5_5_REV) + return MESA_FORMAT_ARGB1555; + else + return MESA_FORMAT_ARGB8888; case 3: case GL_RGB: case GL_COMPRESSED_RGB: - if (format == GL_RGB && type == GL_UNSIGNED_SHORT_5_6_5) { - return MESA_FORMAT_RGB565; - } - if (do32bpt) { - if (intel->has_xrgb_textures) - return MESA_FORMAT_XRGB8888; - else - return MESA_FORMAT_ARGB8888; - } else { + if (type == GL_UNSIGNED_SHORT_5_6_5) return MESA_FORMAT_RGB565; - } + else if (intel->has_xrgb_textures) + return MESA_FORMAT_XRGB8888; + else + return MESA_FORMAT_ARGB8888; case GL_RGBA8: case GL_RGB10_A2: case GL_RGBA12: case GL_RGBA16: - return do32bpt ? MESA_FORMAT_ARGB8888 : MESA_FORMAT_ARGB4444; + return MESA_FORMAT_ARGB8888; case GL_RGBA4: case GL_RGBA2: -- cgit v1.2.3 From 32ea5394173ecbfb766c5c02eccb21642aec0483 Mon Sep 17 00:00:00 2001 From: Kristian Høgsberg Date: Thu, 29 Jul 2010 15:45:20 -0400 Subject: intel: Declare the various tracked state variables using "extern" --- src/mesa/drivers/dri/i965/brw_state.h | 124 +++++++++++++++++----------------- 1 file changed, 62 insertions(+), 62 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 40eece276b..af08446f2d 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -46,68 +46,68 @@ brw_add_validated_bo(struct brw_context *brw, drm_intel_bo *bo) } }; -const struct brw_tracked_state brw_blend_constant_color; -const struct brw_tracked_state brw_cc_unit; -const struct brw_tracked_state brw_check_fallback; -const struct brw_tracked_state brw_clip_prog; -const struct brw_tracked_state brw_clip_unit; -const struct brw_tracked_state brw_vs_constants; -const struct brw_tracked_state brw_wm_constants; -const struct brw_tracked_state brw_constant_buffer; -const struct brw_tracked_state brw_curbe_offsets; -const struct brw_tracked_state brw_invarient_state; -const struct brw_tracked_state brw_gs_prog; -const struct brw_tracked_state brw_gs_unit; -const struct brw_tracked_state brw_line_stipple; -const struct brw_tracked_state brw_aa_line_parameters; -const struct brw_tracked_state brw_pipelined_state_pointers; -const struct brw_tracked_state brw_binding_table_pointers; -const struct brw_tracked_state brw_depthbuffer; -const struct brw_tracked_state brw_polygon_stipple_offset; -const struct brw_tracked_state brw_polygon_stipple; -const struct brw_tracked_state brw_program_parameters; -const struct brw_tracked_state brw_recalculate_urb_fence; -const struct brw_tracked_state brw_sf_prog; -const struct brw_tracked_state brw_sf_unit; -const struct brw_tracked_state brw_sf_vp; -const struct brw_tracked_state brw_state_base_address; -const struct brw_tracked_state brw_urb_fence; -const struct brw_tracked_state brw_vertex_state; -const struct brw_tracked_state brw_vs_surfaces; -const struct brw_tracked_state brw_vs_prog; -const struct brw_tracked_state brw_vs_unit; -const struct brw_tracked_state brw_wm_input_sizes; -const struct brw_tracked_state brw_wm_prog; -const struct brw_tracked_state brw_wm_samplers; -const struct brw_tracked_state brw_wm_constant_surface; -const struct brw_tracked_state brw_wm_surfaces; -const struct brw_tracked_state brw_wm_binding_table; -const struct brw_tracked_state brw_wm_unit; - -const struct brw_tracked_state brw_psp_urb_cbs; - -const struct brw_tracked_state brw_pipe_control; - -const struct brw_tracked_state brw_drawing_rect; -const struct brw_tracked_state brw_indices; -const struct brw_tracked_state brw_vertices; -const struct brw_tracked_state brw_index_buffer; -const struct brw_tracked_state gen6_binding_table_pointers; -const struct brw_tracked_state gen6_blend_state; -const struct brw_tracked_state gen6_cc_state_pointers; -const struct brw_tracked_state gen6_clip_state; -const struct brw_tracked_state gen6_clip_vp; -const struct brw_tracked_state gen6_color_calc_state; -const struct brw_tracked_state gen6_depth_stencil_state; -const struct brw_tracked_state gen6_gs_state; -const struct brw_tracked_state gen6_sampler_state; -const struct brw_tracked_state gen6_scissor_state; -const struct brw_tracked_state gen6_sf_state; -const struct brw_tracked_state gen6_sf_vp; -const struct brw_tracked_state gen6_urb; -const struct brw_tracked_state gen6_viewport_state; -const struct brw_tracked_state gen6_vs_state; -const struct brw_tracked_state gen6_wm_state; +extern const struct brw_tracked_state brw_blend_constant_color; +extern const struct brw_tracked_state brw_cc_unit; +extern const struct brw_tracked_state brw_check_fallback; +extern const struct brw_tracked_state brw_clip_prog; +extern const struct brw_tracked_state brw_clip_unit; +extern const struct brw_tracked_state brw_vs_constants; +extern const struct brw_tracked_state brw_wm_constants; +extern const struct brw_tracked_state brw_constant_buffer; +extern const struct brw_tracked_state brw_curbe_offsets; +extern const struct brw_tracked_state brw_invarient_state; +extern const struct brw_tracked_state brw_gs_prog; +extern const struct brw_tracked_state brw_gs_unit; +extern const struct brw_tracked_state brw_line_stipple; +extern const struct brw_tracked_state brw_aa_line_parameters; +extern const struct brw_tracked_state brw_pipelined_state_pointers; +extern const struct brw_tracked_state brw_binding_table_pointers; +extern const struct brw_tracked_state brw_depthbuffer; +extern const struct brw_tracked_state brw_polygon_stipple_offset; +extern const struct brw_tracked_state brw_polygon_stipple; +extern const struct brw_tracked_state brw_program_parameters; +extern const struct brw_tracked_state brw_recalculate_urb_fence; +extern const struct brw_tracked_state brw_sf_prog; +extern const struct brw_tracked_state brw_sf_unit; +extern const struct brw_tracked_state brw_sf_vp; +extern const struct brw_tracked_state brw_state_base_address; +extern const struct brw_tracked_state brw_urb_fence; +extern const struct brw_tracked_state brw_vertex_state; +extern const struct brw_tracked_state brw_vs_surfaces; +extern const struct brw_tracked_state brw_vs_prog; +extern const struct brw_tracked_state brw_vs_unit; +extern const struct brw_tracked_state brw_wm_input_sizes; +extern const struct brw_tracked_state brw_wm_prog; +extern const struct brw_tracked_state brw_wm_samplers; +extern const struct brw_tracked_state brw_wm_constant_surface; +extern const struct brw_tracked_state brw_wm_surfaces; +extern const struct brw_tracked_state brw_wm_binding_table; +extern const struct brw_tracked_state brw_wm_unit; + +extern const struct brw_tracked_state brw_psp_urb_cbs; + +extern const struct brw_tracked_state brw_pipe_control; + +extern const struct brw_tracked_state brw_drawing_rect; +extern const struct brw_tracked_state brw_indices; +extern const struct brw_tracked_state brw_vertices; +extern const struct brw_tracked_state brw_index_buffer; +extern const struct brw_tracked_state gen6_binding_table_pointers; +extern const struct brw_tracked_state gen6_blend_state; +extern const struct brw_tracked_state gen6_cc_state_pointers; +extern const struct brw_tracked_state gen6_clip_state; +extern const struct brw_tracked_state gen6_clip_vp; +extern const struct brw_tracked_state gen6_color_calc_state; +extern const struct brw_tracked_state gen6_depth_stencil_state; +extern const struct brw_tracked_state gen6_gs_state; +extern const struct brw_tracked_state gen6_sampler_state; +extern const struct brw_tracked_state gen6_scissor_state; +extern const struct brw_tracked_state gen6_sf_state; +extern const struct brw_tracked_state gen6_sf_vp; +extern const struct brw_tracked_state gen6_urb; +extern const struct brw_tracked_state gen6_viewport_state; +extern const struct brw_tracked_state gen6_vs_state; +extern const struct brw_tracked_state gen6_wm_state; /*********************************************************************** * brw_state.c -- cgit v1.2.3 From ebdc537ff7e272da6cb423a7b32a09618c44ba84 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Thu, 29 Jul 2010 19:04:43 -0700 Subject: dri: Add missing header to dri_metaops.c. Add context.h for FLUSH_VERTICES symbol. --- src/mesa/drivers/dri/common/dri_metaops.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/common/dri_metaops.c b/src/mesa/drivers/dri/common/dri_metaops.c index 86e59a8e51..a2f404b616 100644 --- a/src/mesa/drivers/dri/common/dri_metaops.c +++ b/src/mesa/drivers/dri/common/dri_metaops.c @@ -29,6 +29,7 @@ #include "main/arbprogram.h" #include "main/arrayobj.h" #include "main/bufferobj.h" +#include "main/context.h" #include "main/enable.h" #include "main/matrix.h" #include "main/texstate.h" -- cgit v1.2.3 From dfc7b7212f57080d18c4d1122435c4c4575694c7 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Thu, 29 Jul 2010 19:10:14 -0700 Subject: intel: Add missing header. Add context.h for NEED_SECONDARY_COLOR symbol. --- src/mesa/drivers/dri/intel/intel_pixel_bitmap.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c index 0e2fe893fe..02c0ffce31 100644 --- a/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c +++ b/src/mesa/drivers/dri/intel/intel_pixel_bitmap.c @@ -45,6 +45,7 @@ #include "main/attrib.h" #include "main/enable.h" #include "main/viewport.h" +#include "main/context.h" #include "swrast/swrast.h" #include "intel_screen.h" -- cgit v1.2.3 From 11fce3a821b64e1d53f893e82e5c92f549f3ab1d Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Fri, 30 Jul 2010 00:06:40 -0700 Subject: intel: Add missing header to intel_context.c. Fixes "implicit declaration of function _mesa_get_incomplete_framebuffer" warning. --- src/mesa/drivers/dri/intel/intel_context.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c index 531f62b880..e19f44035f 100644 --- a/src/mesa/drivers/dri/intel/intel_context.c +++ b/src/mesa/drivers/dri/intel/intel_context.c @@ -29,6 +29,7 @@ #include "main/glheader.h" #include "main/context.h" #include "main/extensions.h" +#include "main/fbobject.h" #include "main/framebuffer.h" #include "main/imports.h" #include "main/points.h" -- cgit v1.2.3 From 9846b0627149e221c9fbd7c3379e33fb68e68511 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Sat, 31 Jul 2010 23:04:41 -0700 Subject: mesa: Remove inclusion of compiler.h from mtypes.h. mtypes.h does not use any symbols from compiler.h. Also add the required headers for files that depended on symbols from compiler.h but were indirectly including compiler.h through mtypes.h. --- src/mesa/drivers/dri/i965/brw_util.c | 2 ++ src/mesa/drivers/dri/mach64/mach64_ioctl.h | 3 +++ src/mesa/main/mtypes.h | 1 - src/mesa/main/texstate.h | 1 + src/mesa/program/prog_parameter_layout.c | 1 + src/mesa/state_tracker/st_atom_depth.c | 2 ++ src/mesa/state_tracker/st_atom_stipple.c | 2 ++ src/mesa/state_tracker/st_mesa_to_tgsi.c | 4 ++-- src/mesa/state_tracker/st_texture.c | 4 ++-- src/mesa/vbo/vbo_exec_draw.c | 1 + 10 files changed, 16 insertions(+), 5 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/i965/brw_util.c b/src/mesa/drivers/dri/i965/brw_util.c index 1db2a210d4..e878da3850 100644 --- a/src/mesa/drivers/dri/i965/brw_util.c +++ b/src/mesa/drivers/dri/i965/brw_util.c @@ -30,6 +30,8 @@ */ +#include + #include "main/mtypes.h" #include "program/prog_parameter.h" #include "brw_util.h" diff --git a/src/mesa/drivers/dri/mach64/mach64_ioctl.h b/src/mesa/drivers/dri/mach64/mach64_ioctl.h index 1ffda1932f..9145ee6e6c 100644 --- a/src/mesa/drivers/dri/mach64/mach64_ioctl.h +++ b/src/mesa/drivers/dri/mach64/mach64_ioctl.h @@ -32,6 +32,9 @@ #ifndef __MACH64_IOCTL_H__ #define __MACH64_IOCTL_H__ +#include +#include + #include "mach64_dri.h" #include "mach64_reg.h" #include "mach64_lock.h" diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 7bb554d519..3c2addb3a3 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -36,7 +36,6 @@ #include "main/glheader.h" #include "main/config.h" -#include "main/compiler.h" #include "main/mfeatures.h" #include "glapi/glapi.h" #include "math/m_matrix.h" /* GLmatrix */ diff --git a/src/mesa/main/texstate.h b/src/mesa/main/texstate.h index 17ac68000c..912cb67798 100644 --- a/src/mesa/main/texstate.h +++ b/src/mesa/main/texstate.h @@ -32,6 +32,7 @@ #define TEXSTATE_H +#include "compiler.h" #include "mtypes.h" diff --git a/src/mesa/program/prog_parameter_layout.c b/src/mesa/program/prog_parameter_layout.c index a888573832..d7dc97edbf 100644 --- a/src/mesa/program/prog_parameter_layout.c +++ b/src/mesa/program/prog_parameter_layout.c @@ -28,6 +28,7 @@ * \author Ian Romanick */ +#include "main/compiler.h" #include "main/mtypes.h" #include "prog_parameter.h" #include "prog_parameter_layout.h" diff --git a/src/mesa/state_tracker/st_atom_depth.c b/src/mesa/state_tracker/st_atom_depth.c index 3c07afba9a..1616e945fe 100644 --- a/src/mesa/state_tracker/st_atom_depth.c +++ b/src/mesa/state_tracker/st_atom_depth.c @@ -33,6 +33,8 @@ */ +#include + #include "st_context.h" #include "st_atom.h" #include "pipe/p_context.h" diff --git a/src/mesa/state_tracker/st_atom_stipple.c b/src/mesa/state_tracker/st_atom_stipple.c index 31e124b329..ecdd9f06f6 100644 --- a/src/mesa/state_tracker/st_atom_stipple.c +++ b/src/mesa/state_tracker/st_atom_stipple.c @@ -33,6 +33,8 @@ */ +#include + #include "st_context.h" #include "st_atom.h" #include "pipe/p_context.h" diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index 1a499e66d0..a19dcc9253 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -214,7 +214,7 @@ src_register( struct st_translate *t, return ureg_src_undef(); case PROGRAM_TEMPORARY: - ASSERT(index >= 0); + assert(index >= 0); if (ureg_dst_is_undef(t->temps[index])) t->temps[index] = ureg_DECL_temporary( t->ureg ); assert(index < Elements(t->temps)); @@ -224,7 +224,7 @@ src_register( struct st_translate *t, case PROGRAM_ENV_PARAM: case PROGRAM_LOCAL_PARAM: case PROGRAM_UNIFORM: - ASSERT(index >= 0); + assert(index >= 0); return t->constants[index]; case PROGRAM_STATE_VAR: case PROGRAM_CONSTANT: /* ie, immediate */ diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c index dbdf1ea1ad..add6e949df 100644 --- a/src/mesa/state_tracker/st_texture.c +++ b/src/mesa/state_tracker/st_texture.c @@ -25,14 +25,14 @@ * **************************************************************************/ +#include + #include "st_context.h" #include "st_format.h" #include "st_texture.h" #include "st_cb_fbo.h" #include "main/enums.h" -#undef Elements /* fix re-defined macro warning */ - #include "pipe/p_state.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" diff --git a/src/mesa/vbo/vbo_exec_draw.c b/src/mesa/vbo/vbo_exec_draw.c index be2b646ee3..84ae1b87f9 100644 --- a/src/mesa/vbo/vbo_exec_draw.c +++ b/src/mesa/vbo/vbo_exec_draw.c @@ -27,6 +27,7 @@ #include "main/glheader.h" #include "main/bufferobj.h" +#include "main/compiler.h" #include "main/enums.h" #include "main/state.h" -- cgit v1.2.3 From d6a5f94ea4d03b05c434fcad125d1f9c50c638e8 Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Mon, 2 Aug 2010 15:11:22 +0300 Subject: r600: fix sin,cos functions on r600 r600 doesnt need the same normalization as r700 - instead it requires range to be truncated to -pi..pi I left the range trunc also effective on r700 althouch according the docs it has sufficent range (-512*PI, +512*PI). The instructions seem to be used not too often to cause perf loss because of this Based on patches and testing by Conn Clark and Alain Perrot --- src/mesa/drivers/dri/r600/r700_assembler.c | 142 +++++++++++++++++++++++++++-- 1 file changed, 133 insertions(+), 9 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index 8f6cc1d875..b555ea683c 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -2872,25 +2872,92 @@ GLboolean assemble_CMP(r700_AssemblerBase *pAsm) GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode) { + /* + * r600 - trunc to -PI..PI range + * r700 - normalize by dividing by 2PI + * see fdo bug 27901 + */ + int tmp; checkop1(pAsm); tmp = gethelpr(pAsm); - pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + pAsm->D.dst.op3 = 1; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); pAsm->D.dst.rtype = DST_REG_TEMPORARY; pAsm->D.dst.reg = tmp; - pAsm->D.dst.writex = 1; assemble_src(pAsm, 0, -1); pAsm->S[1].src.rtype = SRC_REC_LITERAL; setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + + pAsm->S[2].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y); + pAsm->D2.dst2.literal_slots = 1; pAsm->C[0].f = 1/(3.1415926535 * 2); - pAsm->C[1].f = 0.0F; - next_ins(pAsm); + pAsm->C[1].f = 0.5f; + + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_FRACT; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + + if(( GL_FALSE == next_ins(pAsm) )) + { + return GL_FALSE; + } + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + pAsm->D.dst.op3 = 1; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + + pAsm->S[1].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + + pAsm->S[2].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y); + + pAsm->D2.dst2.literal_slots = 1; + + if (pAsm->bR6xx) + { + pAsm->C[0].f = 3.1415926535897f * 2.0f; + pAsm->C[1].f = -3.1415926535897f; + } + else + { + pAsm->C[0].f = 1.0f; + pAsm->C[1].f = -0.5f; + } + + if(( GL_FALSE == next_ins(pAsm) )) + { + return GL_FALSE; + } pAsm->D.dst.opcode = opcode; pAsm->D.dst.math = 1; @@ -4030,22 +4097,79 @@ GLboolean assemble_SCS(r700_AssemblerBase *pAsm) checkop1(pAsm); tmp = gethelpr(pAsm); - /* tmp.x = src /2*PI */ - pAsm->D.dst.opcode = SQ_OP2_INST_MUL; + + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + pAsm->D.dst.op3 = 1; + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); pAsm->D.dst.rtype = DST_REG_TEMPORARY; pAsm->D.dst.reg = tmp; - pAsm->D.dst.writex = 1; assemble_src(pAsm, 0, -1); pAsm->S[1].src.rtype = SRC_REC_LITERAL; setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + + pAsm->S[2].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y); + pAsm->D2.dst2.literal_slots = 1; pAsm->C[0].f = 1/(3.1415926535 * 2); - pAsm->C[1].f = 0.0F; + pAsm->C[1].f = 0.5F; - next_ins(pAsm); + if ( GL_FALSE == next_ins(pAsm) ) + { + return GL_FALSE; + } + + pAsm->D.dst.opcode = SQ_OP2_INST_FRACT; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + pAsm->D.dst.writex = 1; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + + if(( GL_FALSE == next_ins(pAsm) )) + { + return GL_FALSE; + } + pAsm->D.dst.opcode = SQ_OP3_INST_MULADD; + pAsm->D.dst.op3 = 1; + + setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE); + pAsm->D.dst.rtype = DST_REG_TEMPORARY; + pAsm->D.dst.reg = tmp; + + setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE); + pAsm->S[0].src.rtype = SRC_REG_TEMPORARY; + pAsm->S[0].src.reg = tmp; + setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X); + + pAsm->S[1].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X); + + pAsm->S[2].src.rtype = SRC_REC_LITERAL; + setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y); + + pAsm->D2.dst2.literal_slots = 1; + + if(pAsm->bR6xx) { + pAsm->C[0].f = 3.1415926535897f * 2.0f; + pAsm->C[1].f = -3.1415926535897f; + } else { + pAsm->C[0].f = 1.0f; + pAsm->C[1].f = -0.5f; + } + + if(( GL_FALSE == next_ins(pAsm) )) + { + return GL_FALSE; + } // COS dst.x, a.x pAsm->D.dst.opcode = SQ_OP2_INST_COS; -- cgit v1.2.3 From 8446f257b3e3ca4a3eb2c79bc357e46343e04e87 Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Sun, 25 Jul 2010 16:29:24 +0200 Subject: radeon: Add DRI2 flush extension to so we synchronize properly. When DRI2 swap buffer is pending (copy buffer not pageflipping) we need to make sure we have the flush extension so radeon doesn't resume rendering on the not yet blitted front buffer. Modified version of Jerome's patch to add flush extension in the correct place. This prepares a possible fix for: https://bugs.freedesktop.org/show_bug.cgi?id=28341 https://bugs.freedesktop.org/show_bug.cgi?id=28410 Signed-off-by: Jerome Glisse Signed-off-by: Mario Kleiner --- src/mesa/drivers/dri/r200/r200_swtcl.c | 2 ++ src/mesa/drivers/dri/r200/r200_tcl.c | 2 ++ src/mesa/drivers/dri/r300/r300_render.c | 2 ++ src/mesa/drivers/dri/r600/r700_clear.c | 3 ++ src/mesa/drivers/dri/r600/r700_render.c | 4 +++ .../drivers/dri/radeon/radeon_common_context.c | 42 ++++++++++++++++++++++ .../drivers/dri/radeon/radeon_common_context.h | 1 + src/mesa/drivers/dri/radeon/radeon_screen.c | 17 +++++++++ src/mesa/drivers/dri/radeon/radeon_swtcl.c | 2 ++ src/mesa/drivers/dri/radeon/radeon_tcl.c | 2 ++ 10 files changed, 77 insertions(+) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r200/r200_swtcl.c b/src/mesa/drivers/dri/r200/r200_swtcl.c index 262fe3cdde..dbf4ad477d 100644 --- a/src/mesa/drivers/dri/r200/r200_swtcl.c +++ b/src/mesa/drivers/dri/r200/r200_swtcl.c @@ -612,6 +612,8 @@ static void r200RasterPrimitive( GLcontext *ctx, GLuint hwprim ) { r200ContextPtr rmesa = R200_CONTEXT(ctx); + radeon_prepare_render(&rmesa->radeon); + if (rmesa->radeon.swtcl.hw_primitive != hwprim) { /* need to disable perspective-correct texturing for point sprites */ if ((hwprim & 0xf) == R200_VF_PRIM_POINT_SPRITES && ctx->Point.PointSprite) { diff --git a/src/mesa/drivers/dri/r200/r200_tcl.c b/src/mesa/drivers/dri/r200/r200_tcl.c index d43e14581e..4ae0f30491 100644 --- a/src/mesa/drivers/dri/r200/r200_tcl.c +++ b/src/mesa/drivers/dri/r200/r200_tcl.c @@ -264,6 +264,8 @@ void r200TclPrimitive( GLcontext *ctx, r200ContextPtr rmesa = R200_CONTEXT(ctx); GLuint newprim = hw_prim | R200_VF_TCL_OUTPUT_VTX_ENABLE; + radeon_prepare_render(&rmesa->radeon); + if (newprim != rmesa->tcl.hw_primitive || !discrete_prim[hw_prim&0xf]) { /* need to disable perspective-correct texturing for point sprites */ diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index bb8f91491f..cf89ab7ec3 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -327,6 +327,8 @@ void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim) BATCH_LOCALS(&rmesa->radeon); int type, num_verts; + radeon_prepare_render(&rmesa->radeon); + type = r300PrimitiveType(rmesa, prim); num_verts = r300NumVerts(rmesa, end - start, prim); diff --git a/src/mesa/drivers/dri/r600/r700_clear.c b/src/mesa/drivers/dri/r600/r700_clear.c index 09c48565b6..d1008f28b9 100644 --- a/src/mesa/drivers/dri/r600/r700_clear.c +++ b/src/mesa/drivers/dri/r600/r700_clear.c @@ -48,6 +48,7 @@ static GLboolean r700ClearFast(context_t *context, GLbitfield mask) void r700Clear(GLcontext * ctx, GLbitfield mask) { context_t *context = R700_CONTEXT(ctx); + radeonContextPtr radeon = &context->radeon; __DRIdrawable *dPriv = radeon_get_drawable(&context->radeon); const GLuint colorMask = *((GLuint *) & ctx->Color.ColorMask[0]); GLbitfield swrast_mask = 0, tri_mask = 0; @@ -60,6 +61,8 @@ void r700Clear(GLcontext * ctx, GLbitfield mask) context->radeon.front_buffer_dirty = GL_TRUE; } + radeon_prepare_render(radeon); + if( GL_TRUE == r700ClearFast(context, mask) ) { return; diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index 1929b7cc12..316a0943f4 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -977,6 +977,10 @@ static void r700DrawPrims(GLcontext *ctx, { GLboolean retval = GL_FALSE; + context_t *context = R700_CONTEXT(ctx); + radeonContextPtr radeon = &context->radeon; + radeon_prepare_render(radeon); + /* This check should get folded into just the places that * min/max index are really needed. */ diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c index 5a7d52c4d2..3665944c3e 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c @@ -493,6 +493,43 @@ radeon_bits_per_pixel(const struct radeon_renderbuffer *rb) return _mesa_get_format_bytes(rb->base.Format) * 8; } +/* + * Check if drawable has been invalidated by dri2InvalidateDrawable(). + * Update renderbuffers if so. This prevents a client from accessing + * a backbuffer that has a swap pending but not yet completed. + * + * See intel_prepare_render for equivalent code in intel driver. + * + */ +void radeon_prepare_render(radeonContextPtr radeon) +{ + __DRIcontext *driContext = radeon->dri.context; + __DRIdrawable *drawable; + __DRIscreen *screen; + + screen = driContext->driScreenPriv; + if (!screen->dri2.loader) + return; + + drawable = driContext->driDrawablePriv; + if (drawable->dri2.stamp != driContext->dri2.draw_stamp) { + if (drawable->lastStamp != drawable->dri2.stamp) + radeon_update_renderbuffers(driContext, drawable, GL_FALSE); + + /* Intel driver does the equivalent of this, no clue if it is needed: + * radeon_draw_buffer(radeon->glCtx, &(drawable->driverPrivate)->base); + */ + driContext->dri2.draw_stamp = drawable->dri2.stamp; + } + + drawable = driContext->driReadablePriv; + if (drawable->dri2.stamp != driContext->dri2.read_stamp) { + if (drawable->lastStamp != drawable->dri2.stamp) + radeon_update_renderbuffers(driContext, drawable, GL_FALSE); + driContext->dri2.read_stamp = drawable->dri2.stamp; + } +} + void radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable, GLboolean front_only) @@ -514,6 +551,11 @@ radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable, screen = context->driScreenPriv; radeon = (radeonContextPtr) context->driverPrivate; + /* Set this up front, so that in case our buffers get invalidated + * while we're getting new buffers, we don't clobber the stamp and + * thus ignore the invalidate. */ + drawable->lastStamp = drawable->dri2.stamp; + if (screen->dri2.loader && (screen->dri2.loader->base.version > 2) && (screen->dri2.loader->getBuffersWithFormat != NULL)) { diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h index 5156c5d0d0..ec773cfa52 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.h +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h @@ -614,5 +614,6 @@ GLboolean radeonMakeCurrent(__DRIcontext * driContextPriv, __DRIdrawable * driDrawPriv, __DRIdrawable * driReadPriv); extern void radeonDestroyContext(__DRIcontext * driContextPriv); +void radeon_prepare_render(radeonContextPtr radeon); #endif diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 82107cc6ae..d3d7b216ba 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -376,6 +376,21 @@ static const __DRItexBufferExtension r600TexBufferExtension = { }; #endif +static void +radeonDRI2Flush(__DRIdrawable *drawable) +{ + radeonContextPtr rmesa; + + rmesa = (radeonContextPtr) drawable->driContextPriv->driverPrivate; + radeonFlush(rmesa->glCtx); +} + +static const struct __DRI2flushExtensionRec radeonFlushExtension = { + { __DRI2_FLUSH, __DRI2_FLUSH_VERSION }, + radeonDRI2Flush, + dri2InvalidateDrawable, +}; + static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id) { screen->device_id = device_id; @@ -1379,6 +1394,8 @@ radeonCreateScreen2(__DRIscreen *sPriv) screen->extensions[i++] = &r600TexBufferExtension.base; #endif + screen->extensions[i++] = &radeonFlushExtension.base; + screen->extensions[i++] = NULL; sPriv->extensions = screen->extensions; diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c index f2fcb46688..67be466c3f 100644 --- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c +++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c @@ -408,6 +408,8 @@ static GLboolean radeon_run_render( GLcontext *ctx, !radeon_dma_validate_render( ctx, VB )) return GL_TRUE; + radeon_prepare_render(&rmesa->radeon); + tnl->Driver.Render.Start( ctx ); for (i = 0 ; i < VB->PrimitiveCount ; i++) diff --git a/src/mesa/drivers/dri/radeon/radeon_tcl.c b/src/mesa/drivers/dri/radeon/radeon_tcl.c index ea796e1a45..5e1718f9df 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tcl.c +++ b/src/mesa/drivers/dri/radeon/radeon_tcl.c @@ -252,6 +252,8 @@ void radeonTclPrimitive( GLcontext *ctx, GLuint se_cntl; GLuint newprim = hw_prim | RADEON_CP_VC_CNTL_TCL_ENABLE; + radeon_prepare_render(&rmesa->radeon); + if (newprim != rmesa->tcl.hw_primitive || !discrete_prim[hw_prim&0xf]) { RADEON_NEWPRIM( rmesa ); -- cgit v1.2.3 From 3d39f56a8721ec6aa6d00965b7740fc8cb5edaae Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Mon, 2 Aug 2010 13:28:15 -0400 Subject: Revert "radeon: Add DRI2 flush extension to so we synchronize properly." This reverts commit 8446f257b3e3ca4a3eb2c79bc357e46343e04e87. --- src/mesa/drivers/dri/r200/r200_swtcl.c | 2 -- src/mesa/drivers/dri/r200/r200_tcl.c | 2 -- src/mesa/drivers/dri/r300/r300_render.c | 2 -- src/mesa/drivers/dri/r600/r700_clear.c | 3 -- src/mesa/drivers/dri/r600/r700_render.c | 4 --- .../drivers/dri/radeon/radeon_common_context.c | 42 ---------------------- .../drivers/dri/radeon/radeon_common_context.h | 1 - src/mesa/drivers/dri/radeon/radeon_screen.c | 17 --------- src/mesa/drivers/dri/radeon/radeon_swtcl.c | 2 -- src/mesa/drivers/dri/radeon/radeon_tcl.c | 2 -- 10 files changed, 77 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r200/r200_swtcl.c b/src/mesa/drivers/dri/r200/r200_swtcl.c index dbf4ad477d..262fe3cdde 100644 --- a/src/mesa/drivers/dri/r200/r200_swtcl.c +++ b/src/mesa/drivers/dri/r200/r200_swtcl.c @@ -612,8 +612,6 @@ static void r200RasterPrimitive( GLcontext *ctx, GLuint hwprim ) { r200ContextPtr rmesa = R200_CONTEXT(ctx); - radeon_prepare_render(&rmesa->radeon); - if (rmesa->radeon.swtcl.hw_primitive != hwprim) { /* need to disable perspective-correct texturing for point sprites */ if ((hwprim & 0xf) == R200_VF_PRIM_POINT_SPRITES && ctx->Point.PointSprite) { diff --git a/src/mesa/drivers/dri/r200/r200_tcl.c b/src/mesa/drivers/dri/r200/r200_tcl.c index 4ae0f30491..d43e14581e 100644 --- a/src/mesa/drivers/dri/r200/r200_tcl.c +++ b/src/mesa/drivers/dri/r200/r200_tcl.c @@ -264,8 +264,6 @@ void r200TclPrimitive( GLcontext *ctx, r200ContextPtr rmesa = R200_CONTEXT(ctx); GLuint newprim = hw_prim | R200_VF_TCL_OUTPUT_VTX_ENABLE; - radeon_prepare_render(&rmesa->radeon); - if (newprim != rmesa->tcl.hw_primitive || !discrete_prim[hw_prim&0xf]) { /* need to disable perspective-correct texturing for point sprites */ diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index cf89ab7ec3..bb8f91491f 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -327,8 +327,6 @@ void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim) BATCH_LOCALS(&rmesa->radeon); int type, num_verts; - radeon_prepare_render(&rmesa->radeon); - type = r300PrimitiveType(rmesa, prim); num_verts = r300NumVerts(rmesa, end - start, prim); diff --git a/src/mesa/drivers/dri/r600/r700_clear.c b/src/mesa/drivers/dri/r600/r700_clear.c index d1008f28b9..09c48565b6 100644 --- a/src/mesa/drivers/dri/r600/r700_clear.c +++ b/src/mesa/drivers/dri/r600/r700_clear.c @@ -48,7 +48,6 @@ static GLboolean r700ClearFast(context_t *context, GLbitfield mask) void r700Clear(GLcontext * ctx, GLbitfield mask) { context_t *context = R700_CONTEXT(ctx); - radeonContextPtr radeon = &context->radeon; __DRIdrawable *dPriv = radeon_get_drawable(&context->radeon); const GLuint colorMask = *((GLuint *) & ctx->Color.ColorMask[0]); GLbitfield swrast_mask = 0, tri_mask = 0; @@ -61,8 +60,6 @@ void r700Clear(GLcontext * ctx, GLbitfield mask) context->radeon.front_buffer_dirty = GL_TRUE; } - radeon_prepare_render(radeon); - if( GL_TRUE == r700ClearFast(context, mask) ) { return; diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index 316a0943f4..1929b7cc12 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -977,10 +977,6 @@ static void r700DrawPrims(GLcontext *ctx, { GLboolean retval = GL_FALSE; - context_t *context = R700_CONTEXT(ctx); - radeonContextPtr radeon = &context->radeon; - radeon_prepare_render(radeon); - /* This check should get folded into just the places that * min/max index are really needed. */ diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c index 3665944c3e..5a7d52c4d2 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c @@ -493,43 +493,6 @@ radeon_bits_per_pixel(const struct radeon_renderbuffer *rb) return _mesa_get_format_bytes(rb->base.Format) * 8; } -/* - * Check if drawable has been invalidated by dri2InvalidateDrawable(). - * Update renderbuffers if so. This prevents a client from accessing - * a backbuffer that has a swap pending but not yet completed. - * - * See intel_prepare_render for equivalent code in intel driver. - * - */ -void radeon_prepare_render(radeonContextPtr radeon) -{ - __DRIcontext *driContext = radeon->dri.context; - __DRIdrawable *drawable; - __DRIscreen *screen; - - screen = driContext->driScreenPriv; - if (!screen->dri2.loader) - return; - - drawable = driContext->driDrawablePriv; - if (drawable->dri2.stamp != driContext->dri2.draw_stamp) { - if (drawable->lastStamp != drawable->dri2.stamp) - radeon_update_renderbuffers(driContext, drawable, GL_FALSE); - - /* Intel driver does the equivalent of this, no clue if it is needed: - * radeon_draw_buffer(radeon->glCtx, &(drawable->driverPrivate)->base); - */ - driContext->dri2.draw_stamp = drawable->dri2.stamp; - } - - drawable = driContext->driReadablePriv; - if (drawable->dri2.stamp != driContext->dri2.read_stamp) { - if (drawable->lastStamp != drawable->dri2.stamp) - radeon_update_renderbuffers(driContext, drawable, GL_FALSE); - driContext->dri2.read_stamp = drawable->dri2.stamp; - } -} - void radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable, GLboolean front_only) @@ -551,11 +514,6 @@ radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable, screen = context->driScreenPriv; radeon = (radeonContextPtr) context->driverPrivate; - /* Set this up front, so that in case our buffers get invalidated - * while we're getting new buffers, we don't clobber the stamp and - * thus ignore the invalidate. */ - drawable->lastStamp = drawable->dri2.stamp; - if (screen->dri2.loader && (screen->dri2.loader->base.version > 2) && (screen->dri2.loader->getBuffersWithFormat != NULL)) { diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h index ec773cfa52..5156c5d0d0 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.h +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h @@ -614,6 +614,5 @@ GLboolean radeonMakeCurrent(__DRIcontext * driContextPriv, __DRIdrawable * driDrawPriv, __DRIdrawable * driReadPriv); extern void radeonDestroyContext(__DRIcontext * driContextPriv); -void radeon_prepare_render(radeonContextPtr radeon); #endif diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index d3d7b216ba..82107cc6ae 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -376,21 +376,6 @@ static const __DRItexBufferExtension r600TexBufferExtension = { }; #endif -static void -radeonDRI2Flush(__DRIdrawable *drawable) -{ - radeonContextPtr rmesa; - - rmesa = (radeonContextPtr) drawable->driContextPriv->driverPrivate; - radeonFlush(rmesa->glCtx); -} - -static const struct __DRI2flushExtensionRec radeonFlushExtension = { - { __DRI2_FLUSH, __DRI2_FLUSH_VERSION }, - radeonDRI2Flush, - dri2InvalidateDrawable, -}; - static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id) { screen->device_id = device_id; @@ -1394,8 +1379,6 @@ radeonCreateScreen2(__DRIscreen *sPriv) screen->extensions[i++] = &r600TexBufferExtension.base; #endif - screen->extensions[i++] = &radeonFlushExtension.base; - screen->extensions[i++] = NULL; sPriv->extensions = screen->extensions; diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c index 67be466c3f..f2fcb46688 100644 --- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c +++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c @@ -408,8 +408,6 @@ static GLboolean radeon_run_render( GLcontext *ctx, !radeon_dma_validate_render( ctx, VB )) return GL_TRUE; - radeon_prepare_render(&rmesa->radeon); - tnl->Driver.Render.Start( ctx ); for (i = 0 ; i < VB->PrimitiveCount ; i++) diff --git a/src/mesa/drivers/dri/radeon/radeon_tcl.c b/src/mesa/drivers/dri/radeon/radeon_tcl.c index 5e1718f9df..ea796e1a45 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tcl.c +++ b/src/mesa/drivers/dri/radeon/radeon_tcl.c @@ -252,8 +252,6 @@ void radeonTclPrimitive( GLcontext *ctx, GLuint se_cntl; GLuint newprim = hw_prim | RADEON_CP_VC_CNTL_TCL_ENABLE; - radeon_prepare_render(&rmesa->radeon); - if (newprim != rmesa->tcl.hw_primitive || !discrete_prim[hw_prim&0xf]) { RADEON_NEWPRIM( rmesa ); -- cgit v1.2.3 From 646d2e9fbc41bf49075013009e9583bec4a51168 Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Mon, 2 Aug 2010 04:17:03 +0200 Subject: radeon: Add DRI2 flush extension support, so we synchronize properly. When a DRI2 swap buffer is pending we need to make sure we have the flush extension so radeon doesn't resume rendering to or reading from the not yet blitted front buffer. This fixes: https://bugs.freedesktop.org/show_bug.cgi?id=28341 https://bugs.freedesktop.org/show_bug.cgi?id=28410 Signed-off-by: Jerome Glisse Signed-off-by: Mario Kleiner --- src/mesa/drivers/dri/r200/r200_swtcl.c | 2 + src/mesa/drivers/dri/r200/r200_tcl.c | 2 + src/mesa/drivers/dri/r300/r300_render.c | 2 + src/mesa/drivers/dri/r600/r700_clear.c | 3 ++ src/mesa/drivers/dri/r600/r700_render.c | 4 ++ src/mesa/drivers/dri/radeon/radeon_common.c | 15 +++---- .../drivers/dri/radeon/radeon_common_context.c | 49 ++++++++++++++++++++++ .../drivers/dri/radeon/radeon_common_context.h | 1 + src/mesa/drivers/dri/radeon/radeon_pixel_read.c | 3 ++ src/mesa/drivers/dri/radeon/radeon_screen.c | 17 ++++++++ src/mesa/drivers/dri/radeon/radeon_swtcl.c | 2 + src/mesa/drivers/dri/radeon/radeon_tcl.c | 2 + src/mesa/drivers/dri/radeon/radeon_tex_copy.c | 6 +++ 13 files changed, 98 insertions(+), 10 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r200/r200_swtcl.c b/src/mesa/drivers/dri/r200/r200_swtcl.c index 262fe3cdde..dbf4ad477d 100644 --- a/src/mesa/drivers/dri/r200/r200_swtcl.c +++ b/src/mesa/drivers/dri/r200/r200_swtcl.c @@ -612,6 +612,8 @@ static void r200RasterPrimitive( GLcontext *ctx, GLuint hwprim ) { r200ContextPtr rmesa = R200_CONTEXT(ctx); + radeon_prepare_render(&rmesa->radeon); + if (rmesa->radeon.swtcl.hw_primitive != hwprim) { /* need to disable perspective-correct texturing for point sprites */ if ((hwprim & 0xf) == R200_VF_PRIM_POINT_SPRITES && ctx->Point.PointSprite) { diff --git a/src/mesa/drivers/dri/r200/r200_tcl.c b/src/mesa/drivers/dri/r200/r200_tcl.c index d43e14581e..4ae0f30491 100644 --- a/src/mesa/drivers/dri/r200/r200_tcl.c +++ b/src/mesa/drivers/dri/r200/r200_tcl.c @@ -264,6 +264,8 @@ void r200TclPrimitive( GLcontext *ctx, r200ContextPtr rmesa = R200_CONTEXT(ctx); GLuint newprim = hw_prim | R200_VF_TCL_OUTPUT_VTX_ENABLE; + radeon_prepare_render(&rmesa->radeon); + if (newprim != rmesa->tcl.hw_primitive || !discrete_prim[hw_prim&0xf]) { /* need to disable perspective-correct texturing for point sprites */ diff --git a/src/mesa/drivers/dri/r300/r300_render.c b/src/mesa/drivers/dri/r300/r300_render.c index bb8f91491f..cf89ab7ec3 100644 --- a/src/mesa/drivers/dri/r300/r300_render.c +++ b/src/mesa/drivers/dri/r300/r300_render.c @@ -327,6 +327,8 @@ void r300RunRenderPrimitive(GLcontext * ctx, int start, int end, int prim) BATCH_LOCALS(&rmesa->radeon); int type, num_verts; + radeon_prepare_render(&rmesa->radeon); + type = r300PrimitiveType(rmesa, prim); num_verts = r300NumVerts(rmesa, end - start, prim); diff --git a/src/mesa/drivers/dri/r600/r700_clear.c b/src/mesa/drivers/dri/r600/r700_clear.c index 09c48565b6..d1008f28b9 100644 --- a/src/mesa/drivers/dri/r600/r700_clear.c +++ b/src/mesa/drivers/dri/r600/r700_clear.c @@ -48,6 +48,7 @@ static GLboolean r700ClearFast(context_t *context, GLbitfield mask) void r700Clear(GLcontext * ctx, GLbitfield mask) { context_t *context = R700_CONTEXT(ctx); + radeonContextPtr radeon = &context->radeon; __DRIdrawable *dPriv = radeon_get_drawable(&context->radeon); const GLuint colorMask = *((GLuint *) & ctx->Color.ColorMask[0]); GLbitfield swrast_mask = 0, tri_mask = 0; @@ -60,6 +61,8 @@ void r700Clear(GLcontext * ctx, GLbitfield mask) context->radeon.front_buffer_dirty = GL_TRUE; } + radeon_prepare_render(radeon); + if( GL_TRUE == r700ClearFast(context, mask) ) { return; diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index 1929b7cc12..316a0943f4 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -977,6 +977,10 @@ static void r700DrawPrims(GLcontext *ctx, { GLboolean retval = GL_FALSE; + context_t *context = R700_CONTEXT(ctx); + radeonContextPtr radeon = &context->radeon; + radeon_prepare_render(radeon); + /* This check should get folded into just the places that * min/max index are really needed. */ diff --git a/src/mesa/drivers/dri/radeon/radeon_common.c b/src/mesa/drivers/dri/radeon/radeon_common.c index 13f1f0611b..c1a660af3d 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common.c +++ b/src/mesa/drivers/dri/radeon/radeon_common.c @@ -708,7 +708,6 @@ void radeon_draw_buffer(GLcontext *ctx, struct gl_framebuffer *fb) if (fb->_ColorDrawBufferIndexes[0] == BUFFER_FRONT_LEFT) { rrbColor = radeon_renderbuffer(fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer); radeon->front_cliprects = GL_TRUE; - radeon->front_buffer_dirty = GL_TRUE; } else { rrbColor = radeon_renderbuffer(fb->Attachment[BUFFER_BACK_LEFT].Renderbuffer); radeon->front_cliprects = GL_FALSE; @@ -1132,17 +1131,13 @@ flush_front: if (screen->dri2.loader && (screen->dri2.loader->base.version >= 2) && (screen->dri2.loader->flushFrontBuffer != NULL)) { __DRIdrawable * drawable = radeon_get_drawable(radeon); - (*screen->dri2.loader->flushFrontBuffer)(drawable, drawable->loaderPrivate); - /* Only clear the dirty bit if front-buffer rendering is no longer - * enabled. This is done so that the dirty bit can only be set in - * glDrawBuffer. Otherwise the dirty bit would have to be set at - * each of N places that do rendering. This has worse performances, - * but it is much easier to get correct. + /* We set the dirty bit in radeon_prepare_render() if we're + * front buffer rendering once we get there. */ - if (!radeon->is_front_buffer_rendering) { - radeon->front_buffer_dirty = GL_FALSE; - } + radeon->front_buffer_dirty = GL_FALSE; + + (*screen->dri2.loader->flushFrontBuffer)(drawable, drawable->loaderPrivate); } } } diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c index 5a7d52c4d2..f76c49eada 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c @@ -493,6 +493,50 @@ radeon_bits_per_pixel(const struct radeon_renderbuffer *rb) return _mesa_get_format_bytes(rb->base.Format) * 8; } +/* + * Check if drawable has been invalidated by dri2InvalidateDrawable(). + * Update renderbuffers if so. This prevents a client from accessing + * a backbuffer that has a swap pending but not yet completed. + * + * See intel_prepare_render for equivalent code in intel driver. + * + */ +void radeon_prepare_render(radeonContextPtr radeon) +{ + __DRIcontext *driContext = radeon->dri.context; + __DRIdrawable *drawable; + __DRIscreen *screen; + + screen = driContext->driScreenPriv; + if (!screen->dri2.loader) + return; + + drawable = driContext->driDrawablePriv; + if (drawable->dri2.stamp != driContext->dri2.draw_stamp) { + if (drawable->lastStamp != drawable->dri2.stamp) + radeon_update_renderbuffers(driContext, drawable, GL_FALSE); + + /* Intel driver does the equivalent of this, no clue if it is needed: + * radeon_draw_buffer(radeon->glCtx, &(drawable->driverPrivate)->base); + */ + driContext->dri2.draw_stamp = drawable->dri2.stamp; + } + + drawable = driContext->driReadablePriv; + if (drawable->dri2.stamp != driContext->dri2.read_stamp) { + if (drawable->lastStamp != drawable->dri2.stamp) + radeon_update_renderbuffers(driContext, drawable, GL_FALSE); + driContext->dri2.read_stamp = drawable->dri2.stamp; + } + + /* If we're currently rendering to the front buffer, the rendering + * that will happen next will probably dirty the front buffer. So + * mark it as dirty here. + */ + if (radeon->is_front_buffer_rendering) + radeon->front_buffer_dirty = GL_TRUE; +} + void radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable, GLboolean front_only) @@ -514,6 +558,11 @@ radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable, screen = context->driScreenPriv; radeon = (radeonContextPtr) context->driverPrivate; + /* Set this up front, so that in case our buffers get invalidated + * while we're getting new buffers, we don't clobber the stamp and + * thus ignore the invalidate. */ + drawable->lastStamp = drawable->dri2.stamp; + if (screen->dri2.loader && (screen->dri2.loader->base.version > 2) && (screen->dri2.loader->getBuffersWithFormat != NULL)) { diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h index 5156c5d0d0..ec773cfa52 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.h +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h @@ -614,5 +614,6 @@ GLboolean radeonMakeCurrent(__DRIcontext * driContextPriv, __DRIdrawable * driDrawPriv, __DRIdrawable * driReadPriv); extern void radeonDestroyContext(__DRIcontext * driContextPriv); +void radeon_prepare_render(radeonContextPtr radeon); #endif diff --git a/src/mesa/drivers/dri/radeon/radeon_pixel_read.c b/src/mesa/drivers/dri/radeon/radeon_pixel_read.c index dadb8002c7..fb741173ca 100644 --- a/src/mesa/drivers/dri/radeon/radeon_pixel_read.c +++ b/src/mesa/drivers/dri/radeon/radeon_pixel_read.c @@ -179,6 +179,9 @@ radeonReadPixels(GLcontext * ctx, GLenum format, GLenum type, const struct gl_pixelstore_attrib *pack, GLvoid * pixels) { + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + radeon_prepare_render(radeon); + if (do_blit_readpixels(ctx, x, y, width, height, format, type, pack, pixels)) return; diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 82107cc6ae..d3d7b216ba 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -376,6 +376,21 @@ static const __DRItexBufferExtension r600TexBufferExtension = { }; #endif +static void +radeonDRI2Flush(__DRIdrawable *drawable) +{ + radeonContextPtr rmesa; + + rmesa = (radeonContextPtr) drawable->driContextPriv->driverPrivate; + radeonFlush(rmesa->glCtx); +} + +static const struct __DRI2flushExtensionRec radeonFlushExtension = { + { __DRI2_FLUSH, __DRI2_FLUSH_VERSION }, + radeonDRI2Flush, + dri2InvalidateDrawable, +}; + static int radeon_set_screen_flags(radeonScreenPtr screen, int device_id) { screen->device_id = device_id; @@ -1379,6 +1394,8 @@ radeonCreateScreen2(__DRIscreen *sPriv) screen->extensions[i++] = &r600TexBufferExtension.base; #endif + screen->extensions[i++] = &radeonFlushExtension.base; + screen->extensions[i++] = NULL; sPriv->extensions = screen->extensions; diff --git a/src/mesa/drivers/dri/radeon/radeon_swtcl.c b/src/mesa/drivers/dri/radeon/radeon_swtcl.c index f2fcb46688..67be466c3f 100644 --- a/src/mesa/drivers/dri/radeon/radeon_swtcl.c +++ b/src/mesa/drivers/dri/radeon/radeon_swtcl.c @@ -408,6 +408,8 @@ static GLboolean radeon_run_render( GLcontext *ctx, !radeon_dma_validate_render( ctx, VB )) return GL_TRUE; + radeon_prepare_render(&rmesa->radeon); + tnl->Driver.Render.Start( ctx ); for (i = 0 ; i < VB->PrimitiveCount ; i++) diff --git a/src/mesa/drivers/dri/radeon/radeon_tcl.c b/src/mesa/drivers/dri/radeon/radeon_tcl.c index ea796e1a45..5e1718f9df 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tcl.c +++ b/src/mesa/drivers/dri/radeon/radeon_tcl.c @@ -252,6 +252,8 @@ void radeonTclPrimitive( GLcontext *ctx, GLuint se_cntl; GLuint newprim = hw_prim | RADEON_CP_VC_CNTL_TCL_ENABLE; + radeon_prepare_render(&rmesa->radeon); + if (newprim != rmesa->tcl.hw_primitive || !discrete_prim[hw_prim&0xf]) { RADEON_NEWPRIM( rmesa ); diff --git a/src/mesa/drivers/dri/radeon/radeon_tex_copy.c b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c index 29fd31ac23..4cb0bb60c8 100644 --- a/src/mesa/drivers/dri/radeon/radeon_tex_copy.c +++ b/src/mesa/drivers/dri/radeon/radeon_tex_copy.c @@ -153,6 +153,9 @@ radeonCopyTexImage2D(GLcontext *ctx, GLenum target, GLint level, _mesa_select_tex_image(ctx, texObj, target, level); int srcx, srcy, dstx, dsty; + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + radeon_prepare_render(radeon); + if (border) goto fail; @@ -202,6 +205,9 @@ radeonCopyTexSubImage2D(GLcontext *ctx, GLenum target, GLint level, struct gl_texture_object *texObj = _mesa_select_tex_object(ctx, texUnit, target); struct gl_texture_image *texImage = _mesa_select_tex_image(ctx, texObj, target, level); + radeonContextPtr radeon = RADEON_CONTEXT(ctx); + radeon_prepare_render(radeon); + if (!do_copy_texsubimage(ctx, target, level, radeon_tex_obj(texObj), (radeon_texture_image *)texImage, xoffset, yoffset, x, y, width, height)) { -- cgit v1.2.3 From 680f486ffd4aab8b9354f1b5a035b3881ac2310c Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 12 Jul 2010 13:14:38 -0700 Subject: r300/compiler: Don't unroll loops with continue or break. --- src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c index 131e9e7436..fed4d8829a 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c @@ -191,7 +191,7 @@ static void get_incr_amount(void * data, struct rc_instruction * inst, static int transform_const_loop(struct emulate_loop_state * s, struct loop_info * loop) { - int end_loops = 1; + int end_loops; int iterations; struct count_inst count_inst; float limit_value; @@ -235,6 +235,7 @@ static int transform_const_loop(struct emulate_loop_state * s, count_inst.Swz = counter->Swizzle; count_inst.Amount = 0.0f; count_inst.Unknown = 0; + end_loops = 1; for(inst = loop->BeginLoop->Next; end_loops > 0; inst = inst->Next){ switch(inst->U.I.Opcode){ /* XXX In the future we might want to try to unroll nested @@ -246,6 +247,16 @@ static int transform_const_loop(struct emulate_loop_state * s, loop->EndLoop = inst; end_loops--; break; + case RC_OPCODE_BRK: + /* Don't unroll loops if it has a BRK instruction + * other one used when testing the main conditional + * of the loop. */ + + /* Make sure we haven't entered a nested loops. */ + if(inst != loop->Brk && end_loops == 1) { + return 0; + } + break; /* XXX Check if the counter is modified within an if statement. */ case RC_OPCODE_IF: -- cgit v1.2.3 From 9dcc5006660037665fe98bf2d9fb966e620a038b Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Sun, 1 Aug 2010 20:06:53 -0700 Subject: r300/compiler: KILP may not always be inside an IF statement. --- .../drivers/dri/r300/compiler/radeon_program_alu.c | 23 +++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c index 3cc2897293..857aae5514 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_program_alu.c @@ -988,17 +988,22 @@ void radeonTransformKILP(struct radeon_compiler * c) for (inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { - if (inst->U.I.Opcode != RC_OPCODE_KILP - || inst->Prev->U.I.Opcode != RC_OPCODE_IF - || inst->Next->U.I.Opcode != RC_OPCODE_ENDIF) { + if (inst->U.I.Opcode != RC_OPCODE_KILP) continue; - } + inst->U.I.Opcode = RC_OPCODE_KIL; - inst->U.I.SrcReg[0] = negate(absolute(inst->Prev->U.I.SrcReg[0])); - /* Remove IF */ - rc_remove_instruction(inst->Prev); - /* Remove ENDIF */ - rc_remove_instruction(inst->Next); + if (inst->Prev->U.I.Opcode != RC_OPCODE_IF + || inst->Next->U.I.Opcode != RC_OPCODE_ENDIF) { + inst->U.I.SrcReg[0] = negate(builtin_one); + } else { + + inst->U.I.SrcReg[0] = + negate(absolute(inst->Prev->U.I.SrcReg[0])); + /* Remove IF */ + rc_remove_instruction(inst->Prev); + /* Remove ENDIF */ + rc_remove_instruction(inst->Next); + } } } -- cgit v1.2.3 From 2824d5687a19e42ba0da8fd08e80610c4469a3b3 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 3 Aug 2010 15:23:23 -0700 Subject: r300/compiler: r500 hw support for break and continue in loops. The BGNLOOP and ENDLOOP instructions are now being used correctly, which makes break and continue possible. The deadcode pass has been modified to handle breaks, and the compiler is more careful about which loops are unrolled. --- src/gallium/drivers/r300/r300_fs.c | 9 +- src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c | 8 +- src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 9 +- src/mesa/drivers/dri/r300/compiler/r500_fragprog.c | 25 --- src/mesa/drivers/dri/r300/compiler/r500_fragprog.h | 2 - .../drivers/dri/r300/compiler/r500_fragprog_emit.c | 114 ++++++---- src/mesa/drivers/dri/r300/compiler/radeon_code.h | 3 + .../dri/r300/compiler/radeon_dataflow_deadcode.c | 39 +++- .../dri/r300/compiler/radeon_emulate_loops.c | 237 +++++++++++++-------- .../dri/r300/compiler/radeon_emulate_loops.h | 9 +- .../drivers/dri/r300/compiler/radeon_optimize.c | 3 +- 11 files changed, 289 insertions(+), 169 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/gallium/drivers/r300/r300_fs.c b/src/gallium/drivers/r300/r300_fs.c index db5269912e..87ff49a90c 100644 --- a/src/gallium/drivers/r300/r300_fs.c +++ b/src/gallium/drivers/r300/r300_fs.c @@ -248,13 +248,18 @@ static void r300_emit_fs_code_to_buffer( shader->cb_code_size = 19 + ((code->inst_end + 1) * 6) + - imm_count * 7; + imm_count * 7 + + code->int_constant_count * 2; NEW_CB(shader->cb_code, shader->cb_code_size); OUT_CB_REG(R500_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); OUT_CB_REG(R500_US_PIXSIZE, code->max_temp_idx); OUT_CB_REG(R500_US_FC_CTRL, code->us_fc_ctrl); - OUT_CB_REG(R500_US_CODE_RANGE, + for(i = 0; i < code->int_constant_count; i++){ + OUT_CB_REG(R500_US_FC_INT_CONST_0 + (i * 4), + code->int_constants[i]); + } + OUT_CB_REG(R500_US_CODE_RANGE, R500_US_CODE_RANGE_ADDR(0) | R500_US_CODE_RANGE_SIZE(code->inst_end)); OUT_CB_REG(R500_US_CODE_OFFSET, 0); OUT_CB_REG(R500_US_CODE_ADDR, diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index a326ee4c4f..070939497c 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -109,13 +109,13 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) debug_program_log(c, "before compilation"); if (c->Base.is_r500){ - r500_transform_unroll_loops(&c->Base, &loop_state); - debug_program_log(c, "after r500 transform loops"); + rc_unroll_loops(&c->Base, R500_PFS_MAX_INST); + debug_program_log(c, "after unroll loops"); } else{ - rc_transform_unroll_loops(&c->Base, &loop_state); + rc_transform_loops(&c->Base, &loop_state, R300_PFS_MAX_ALU_INST); debug_program_log(c, "after transform loops"); - + rc_emulate_branches(&c->Base); debug_program_log(c, "after emulate branches"); } diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index d347b4df9c..fe34ff67cd 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -633,7 +633,7 @@ static struct rc_swizzle_caps r300_vertprog_swizzle_caps = { void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) { struct emulate_loop_state loop_state; - + compiler->Base.SwizzleCaps = &r300_vertprog_swizzle_caps; addArtificialOutputs(compiler); @@ -643,14 +643,13 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) /* XXX Ideally this should be done only for r3xx, but since * we don't have branching support for r5xx, we use the emulation * on all chipsets. */ - rc_transform_unroll_loops(&compiler->Base, &loop_state); - - debug_program_log(compiler, "after transform loops"); - + if (compiler->Base.is_r500){ + rc_transform_loops(&compiler->Base, &loop_state, R500_VS_MAX_ALU); rc_emulate_loops(&loop_state, R500_VS_MAX_ALU); } else { rc_emulate_loops(&loop_state, R300_VS_MAX_ALU); + rc_transform_loops(&compiler->Base, &loop_state, R300_VS_MAX_ALU); } debug_program_log(compiler, "after emulate loops"); diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c index e6b5522c5b..95be619d5d 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c @@ -60,31 +60,6 @@ int r500_transform_IF( return 1; } -/** - * Rewrite loops to make them easier to emit. This is not a local - * transformation, because it modifies and reorders an entire block of code. - */ -void r500_transform_unroll_loops(struct radeon_compiler * c, - struct emulate_loop_state *s) -{ - int i; - - rc_transform_unroll_loops(c, s); - - for( i = s->LoopCount - 1; i >= 0; i-- ){ - struct rc_instruction * inst_continue; - if(!s->Loops[i].EndLoop){ - continue; - } - /* Insert a continue instruction at the end of the loop. This - * is required in order to emit loops correctly. */ - inst_continue = rc_insert_new_instruction(c, - s->Loops[i].EndIf->Prev); - inst_continue->U.I.Opcode = RC_OPCODE_CONTINUE; - } - -} - static int r500_swizzle_is_native(rc_opcode opcode, struct rc_src_register reg) { unsigned int relevant; diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h index 0d005a794f..34173351f8 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.h @@ -49,6 +49,4 @@ extern int r500_transform_IF( struct rc_instruction * inst, void* data); -void r500_transform_unroll_loops(struct radeon_compiler * c, - struct emulate_loop_state * s); #endif diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c index 0bd8f0a239..c3f817ad4e 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog_emit.c @@ -64,7 +64,12 @@ struct branch_info { }; struct loop_info { - int LoopStart; + int BgnLoop; + + int BranchDepth; + int * Brks; + int BrkCount; + int BrkReserved; }; struct emit_state { @@ -368,6 +373,12 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst unsigned int newip = ++s->Code->inst_end; + /* Currently all loops use the same integer constant to intialize + * the loop variables. */ + if(!s->Code->int_constants[0]) { + s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff); + s->Code->int_constant_count = 1; + } s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT; switch(inst->U.I.Opcode){ @@ -378,32 +389,69 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1); loop = &s->Loops[s->CurrentLoopDepth++]; - - /* We don't emit an instruction for BGNLOOP, so we need to - * decrement the instruction counter, but first we need to - * set LoopStart to the current value of inst_end, which - * will end up being the first real instruction in the loop.*/ - loop->LoopStart = s->Code->inst_end--; + memset(loop, 0, sizeof(struct loop_info)); + loop->BranchDepth = s->CurrentBranchDepth; + loop->BgnLoop = newip; + + s->Code->inst[newip].inst2 = R500_FC_OP_LOOP + | R500_FC_JUMP_FUNC(0x00) + | R500_FC_IGNORE_UNCOVERED + ; break; - case RC_OPCODE_BRK: - /* Don't emit an instruction for BRK */ - s->Code->inst_end--; + loop = &s->Loops[s->CurrentLoopDepth - 1]; + memory_pool_array_reserve(&s->C->Pool, int, loop->Brks, + loop->BrkCount, loop->BrkReserved, 1); + + loop->Brks[loop->BrkCount++] = newip; + s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP + | R500_FC_JUMP_FUNC(0xff) + | R500_FC_B_OP1_DECR + | R500_FC_B_POP_CNT( + s->CurrentBranchDepth - loop->BranchDepth) + | R500_FC_IGNORE_UNCOVERED + ; break; case RC_OPCODE_CONTINUE: loop = &s->Loops[s->CurrentLoopDepth - 1]; - s->Code->inst[newip].inst2 = R500_FC_OP_JUMP | - R500_FC_JUMP_FUNC(0xff); - s->Code->inst[newip].inst3 = R500_FC_JUMP_ADDR(loop->LoopStart); + s->Code->inst[newip].inst2 = R500_FC_OP_JUMP + | R500_FC_JUMP_FUNC(0xff) + | R500_FC_B_OP1_DECR + | R500_FC_B_POP_CNT( + s->CurrentBranchDepth - loop->BranchDepth) + ; + s->Code->inst[newip].inst3 = R500_FC_JUMP_ADDR(loop->BgnLoop); break; case RC_OPCODE_ENDLOOP: - /* Don't emit an instruction for ENDLOOP */ - s->Code->inst_end--; + { + unsigned int i; + loop = &s->Loops[s->CurrentLoopDepth - 1]; + /* Emit ENDLOOP */ + s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP + | R500_FC_JUMP_FUNC(0xff) + | R500_FC_JUMP_ANY + | R500_FC_IGNORE_UNCOVERED + ; + /* The constant integer at index 0 is used by all loops. */ + s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0) + | R500_FC_JUMP_ADDR(loop->BgnLoop + 1) + ; + + /* Set jump address and int constant for BGNLOOP */ + s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0) + | R500_FC_JUMP_ADDR(newip) + ; + + /* Set jump address for the BRK instructions. */ + while(loop->BrkCount--) { + s->Code->inst[loop->Brks[loop->BrkCount]].inst3 = + R500_FC_JUMP_ADDR(newip + 1); + } s->CurrentLoopDepth--; break; - + } case RC_OPCODE_IF: if ( s->CurrentBranchDepth >= MAX_BRANCH_DEPTH_FULL) { rc_error(s->C, "Branch depth exceeds hardware limit"); @@ -442,24 +490,16 @@ static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst } branch = &s->Branches[s->CurrentBranchDepth - 1]; - - if(inst->Prev->U.I.Opcode == RC_OPCODE_BRK){ - branch->Endif = --s->Code->inst_end; - s->Code->inst[branch->Endif].inst2 |= - R500_FC_B_OP0_DECR; - } - else{ - branch->Endif = newip; - - s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP - | R500_FC_A_OP_NONE /* no address stack */ - | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */ - | R500_FC_B_OP0_DECR /* decrement branch counter if stay */ - | R500_FC_B_OP1_NONE /* no branch counter if stay */ - | R500_FC_B_POP_CNT(1) + branch->Endif = newip; + + s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP + | R500_FC_A_OP_NONE /* no address stack */ + | R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */ + | R500_FC_B_OP0_DECR /* decrement branch counter if stay */ + | R500_FC_B_OP1_NONE /* no branch counter if stay */ + | R500_FC_B_POP_CNT(1) ; - s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); - } + s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1); s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP | R500_FC_A_OP_NONE /* no address stack */ | R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */ @@ -544,11 +584,9 @@ void r500BuildFragmentProgramHwCode(struct r300_fragment_program_compiler *compi code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT; } - /* Use FULL flow control mode if branches are nested deep enough. - * We don not need to enable FULL flow control mode for loops, becasue - * we aren't using the hardware loop instructions. - */ - if (s.MaxBranchDepth >= 4) { + /* Enable full flow control mode if we are using loops or have if + * statements nested at least four deep. */ + if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) { if (code->max_temp_idx < 1) code->max_temp_idx = 1; diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_code.h b/src/mesa/drivers/dri/r300/compiler/radeon_code.h index d03689763b..e14a3520dd 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_code.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_code.h @@ -221,6 +221,9 @@ struct r500_fragment_program_code { int max_temp_idx; uint32_t us_fc_ctrl; + + uint32_t int_constants[32]; + uint32_t int_constant_count; }; struct rX00_fragment_program_code { diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c index fbb4235c22..31566a937f 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_dataflow_deadcode.c @@ -43,6 +43,12 @@ struct instruction_state { unsigned char SrcReg[3]; }; +struct loopinfo { + struct updatemask_state * Breaks; + unsigned int BreakCount; + unsigned int BreaksReserved; +}; + struct branchinfo { unsigned int HaveElse:1; @@ -59,6 +65,10 @@ struct deadcode_state { struct branchinfo * BranchStack; unsigned int BranchStackSize; unsigned int BranchStackReserved; + + struct loopinfo * LoopStack; + unsigned int LoopStackSize; + unsigned int LoopStackReserved; }; @@ -78,6 +88,22 @@ static void or_updatemasks( dst->Address = a->Address | b->Address; } +static void push_break(struct deadcode_state *s) +{ + struct loopinfo * loop = &s->LoopStack[s->LoopStackSize - 1]; + memory_pool_array_reserve(&s->C->Pool, struct updatemask_state, + loop->Breaks, loop->BreakCount, loop->BreaksReserved, 1); + + memcpy(&loop->Breaks[loop->BreakCount++], &s->R, sizeof(s->R)); +} + +static void push_loop(struct deadcode_state * s) +{ + memory_pool_array_reserve(&s->C->Pool, struct loopinfo, s->LoopStack, + s->LoopStackSize, s->LoopStackReserved, 1); + memset(&s->LoopStack[s->LoopStackSize++], 0, sizeof(struct loopinfo)); +} + static void push_branch(struct deadcode_state * s) { memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack, @@ -233,11 +259,22 @@ void rc_dataflow_deadcode(struct radeon_compiler * c, rc_dataflow_mark_outputs_f } } } + push_loop(&s); break; } - case RC_OPCODE_CONTINUE: case RC_OPCODE_BRK: + push_break(&s); + break; case RC_OPCODE_BGNLOOP: + { + unsigned int i; + struct loopinfo * loop = &s.LoopStack[s.LoopStackSize-1]; + for(i = 0; i < loop->BreakCount; i++) { + or_updatemasks(&s.R, &s.R, &loop->Breaks[i]); + } + break; + } + case RC_OPCODE_CONTINUE: break; case RC_OPCODE_ENDIF: push_branch(&s); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c index fed4d8829a..94e3e5f4f5 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c @@ -39,7 +39,6 @@ #define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) struct const_value { - struct radeon_compiler * C; struct rc_src_register * Src; float Value; @@ -78,17 +77,17 @@ static int src_reg_is_immediate(struct rc_src_register * src, c->Program.Constants.Constants[src->Index].Type==RC_CONSTANT_IMMEDIATE; } -static unsigned int loop_calc_iterations(struct emulate_loop_state *s, - struct loop_info * loop, unsigned int max_instructions) +static unsigned int loop_max_possible_iterations(struct radeon_compiler *c, + struct loop_info * loop, unsigned int prog_inst_limit) { - unsigned int total_i = rc_recompute_ips(s->C); + unsigned int total_i = rc_recompute_ips(c); unsigned int loop_i = (loop->EndLoop->IP - loop->BeginLoop->IP) - 1; /* +1 because the program already has one iteration of the loop. */ - return 1 + ((max_instructions - total_i) / (s->LoopCount * loop_i)); + return 1 + ((prog_inst_limit - total_i) / loop_i); } -static void loop_unroll(struct emulate_loop_state * s, - struct loop_info *loop, unsigned int iterations) +static void unroll_loop(struct radeon_compiler * c, struct loop_info * loop, + unsigned int iterations) { unsigned int i; struct rc_instruction * ptr; @@ -99,7 +98,7 @@ static void loop_unroll(struct emulate_loop_state * s, rc_remove_instruction(loop->EndLoop); for( i = 1; i < iterations; i++){ for(ptr = first; ptr != last->Next; ptr = ptr->Next){ - struct rc_instruction *new = rc_alloc_instruction(s->C); + struct rc_instruction *new = rc_alloc_instruction(c); memcpy(new, ptr, sizeof(struct rc_instruction)); rc_insert_instruction(append_to, new); append_to = new; @@ -115,7 +114,7 @@ static void update_const_value(void * data, struct rc_instruction * inst, if(value->Src->File != file || value->Src->Index != index || !(1 << GET_SWZ(value->Src->Swizzle, 0) & mask)){ - return; + return; } switch(inst->U.I.Opcode){ case RC_OPCODE_MOV: @@ -140,7 +139,7 @@ static void get_incr_amount(void * data, struct rc_instruction * inst, if(file != RC_FILE_TEMPORARY || count_inst->Index != index || (1 << GET_SWZ(count_inst->Swz,0) != mask)){ - return; + return; } /* Find the index of the counter register. */ opcode = rc_get_opcode_info(inst->U.I.Opcode); @@ -185,11 +184,10 @@ static void get_incr_amount(void * data, struct rc_instruction * inst, count_inst->Unknown = 1; return; } - } -static int transform_const_loop(struct emulate_loop_state * s, - struct loop_info * loop) +static int try_unroll_loop(struct radeon_compiler * c, struct loop_info * loop, + unsigned int prog_inst_limit) { int end_loops; int iterations; @@ -201,12 +199,12 @@ static int transform_const_loop(struct emulate_loop_state * s, struct rc_instruction * inst; /* Find the counter and the upper limit */ - - if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[0], s->C)){ + + if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[0], c)){ limit = &loop->Cond->U.I.SrcReg[0]; counter = &loop->Cond->U.I.SrcReg[1]; } - else if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[1], s->C)){ + else if(src_reg_is_immediate(&loop->Cond->U.I.SrcReg[1], c)){ limit = &loop->Cond->U.I.SrcReg[1]; counter = &loop->Cond->U.I.SrcReg[0]; } @@ -214,13 +212,13 @@ static int transform_const_loop(struct emulate_loop_state * s, DBG("No constant limit.\n"); return 0; } - + /* Find the initial value of the counter */ counter_value.Src = counter; counter_value.Value = 0.0f; counter_value.HasValue = 0; - counter_value.C = s->C; - for(inst = s->C->Program.Instructions.Next; inst != loop->BeginLoop; + counter_value.C = c; + for(inst = c->Program.Instructions.Next; inst != loop->BeginLoop; inst = inst->Next){ rc_for_all_writes_mask(inst, update_const_value, &counter_value); } @@ -230,7 +228,7 @@ static int transform_const_loop(struct emulate_loop_state * s, } DBG("Initial counter value is %f\n", counter_value.Value); /* Determine how the counter is modified each loop */ - count_inst.C = s->C; + count_inst.C = c; count_inst.Index = counter->Index; count_inst.Swz = counter->Swizzle; count_inst.Amount = 0.0f; @@ -277,17 +275,20 @@ static int transform_const_loop(struct emulate_loop_state * s, /* Calculate the number of iterations of this loop. Keeping this * simple, since we only support increment and decrement loops. */ - limit_value = get_constant_value(s->C, limit, 0); + limit_value = get_constant_value(c, limit, 0); DBG("Limit is %f.\n", limit_value); + /* The iteration calculations are opposite of what you would expect. + * In a normal loop, if the condition is met, then loop continues, but + * with our loops, if the condition is met, the is exited. */ switch(loop->Cond->U.I.Opcode){ - case RC_OPCODE_SGT: - case RC_OPCODE_SLT: + case RC_OPCODE_SGE: + case RC_OPCODE_SLE: iterations = (int) ceilf((limit_value - counter_value.Value) / count_inst.Amount); break; - case RC_OPCODE_SLE: - case RC_OPCODE_SGE: + case RC_OPCODE_SGT: + case RC_OPCODE_SLT: iterations = (int) floorf((limit_value - counter_value.Value) / count_inst.Amount) + 1; break; @@ -295,77 +296,84 @@ static int transform_const_loop(struct emulate_loop_state * s, return 0; } + if (iterations > loop_max_possible_iterations(c, loop, + prog_inst_limit)) { + return 0; + } + DBG("Loop will have %d iterations.\n", iterations); - + /* Prepare loop for unrolling */ rc_remove_instruction(loop->Cond); rc_remove_instruction(loop->If); rc_remove_instruction(loop->Brk); rc_remove_instruction(loop->EndIf); - - loop_unroll(s, loop, iterations); + + unroll_loop(c, loop, iterations); loop->EndLoop = NULL; return 1; } -/** - * This function prepares a loop to be unrolled by converting it into an if - * statement. Here is an outline of the conversion process: - * BGNLOOP; -> BGNLOOP; - * -> - * SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2]; - * IF temp[0]; -> IF temp[0]; - * BRK; -> - * ENDIF; -> - * -> ENDIF; - * ENDLOOP; -> ENDLOOP - * +/** + * @param c + * @param loop * @param inst A pointer to a BGNLOOP instruction. - * @return If the loop can be unrolled, a pointer to the first instruction of - * the unrolled loop. - * Otherwise, A pointer to the ENDLOOP instruction. - * Null if there is an error. + * @return 1 if all of the members of loop where set. + * @return 0 if there was an error and some members of loop are still NULL. */ -static struct rc_instruction * transform_loop(struct emulate_loop_state * s, +static int build_loop_info(struct radeon_compiler * c, struct loop_info * loop, struct rc_instruction * inst) { - struct loop_info *loop; struct rc_instruction * ptr; - memory_pool_array_reserve(&s->C->Pool, struct loop_info, - s->Loops, s->LoopCount, s->LoopReserved, 1); - - loop = &s->Loops[s->LoopCount++]; - memset(loop, 0, sizeof(struct loop_info)); if(inst->U.I.Opcode != RC_OPCODE_BGNLOOP){ - rc_error(s->C, "expected BGNLOOP\n", __FUNCTION__); - return NULL; + rc_error(c, "%s: expected BGNLOOP", __FUNCTION__); + return 0; } + + memset(loop, 0, sizeof(struct loop_info)); + loop->BeginLoop = inst; - for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next){ + for(ptr = loop->BeginLoop->Next; !loop->EndLoop; ptr = ptr->Next) { + + if (ptr == &c->Program.Instructions) { + rc_error(c, "%s: BGNLOOP without an ENDLOOOP.\n", + __FUNCTION__); + return 0; + } + switch(ptr->U.I.Opcode){ case RC_OPCODE_BGNLOOP: - /* Nested loop */ - ptr = transform_loop(s, ptr); - if(!ptr){ - return NULL; + { + /* Nested loop, skip ahead to the end. */ + unsigned int loop_depth = 1; + for(ptr = ptr->Next; ptr != &c->Program.Instructions; + ptr = ptr->Next){ + if (ptr->U.I.Opcode == RC_OPCODE_BGNLOOP) { + loop_depth++; + } else if (ptr->U.I.Opcode == RC_OPCODE_ENDLOOP) { + if (!--loop_depth) { + break; + } + } + } + if (ptr == &c->Program.Instructions) { + rc_error(c, "%s: BGNLOOP without an ENDLOOOP\n", + __FUNCTION__); + return 0; } break; + } case RC_OPCODE_BRK: - loop->Brk = ptr; - if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF){ - rc_error(s->C, - "%s: expected ENDIF\n",__FUNCTION__); - return NULL; - } - loop->EndIf = ptr->Next; - if(ptr->Prev->U.I.Opcode != RC_OPCODE_IF){ - rc_error(s->C, - "%s: expected IF\n", __FUNCTION__); - return NULL; + if(ptr->Next->U.I.Opcode != RC_OPCODE_ENDIF + || ptr->Prev->U.I.Opcode != RC_OPCODE_IF + || loop->Brk){ + continue; } + loop->Brk = ptr; loop->If = ptr->Prev; + loop->EndIf = ptr->Next; switch(loop->If->Prev->U.I.Opcode){ case RC_OPCODE_SLT: case RC_OPCODE_SGE: @@ -375,18 +383,62 @@ static struct rc_instruction * transform_loop(struct emulate_loop_state * s, case RC_OPCODE_SNE: break; default: - rc_error(s->C, "%s expected conditional\n", + rc_error(c, "%s: expected conditional", __FUNCTION__); - return NULL; + return 0; } loop->Cond = loop->If->Prev; - ptr = loop->EndIf; break; + case RC_OPCODE_ENDLOOP: loop->EndLoop = ptr; break; } } + + if (loop->BeginLoop && loop->Brk && loop->If && loop->EndIf + && loop->Cond && loop->EndLoop) { + return 1; + } + return 0; +} + +/** + * This function prepares a loop to be unrolled by converting it into an if + * statement. Here is an outline of the conversion process: + * BGNLOOP; -> BGNLOOP; + * -> + * SGE/SLT temp[0], temp[1], temp[2]; -> SLT/SGE temp[0], temp[1], temp[2]; + * IF temp[0]; -> IF temp[0]; + * BRK; -> + * ENDIF; -> + * -> ENDIF; + * ENDLOOP; -> ENDLOOP + * + * @param inst A pointer to a BGNLOOP instruction. + * @return If the loop can be unrolled, a pointer to the first instruction of + * the unrolled loop. + * Otherwise, A pointer to the ENDLOOP instruction. + * Null if there is an error. + */ +static struct rc_instruction * transform_loop(struct emulate_loop_state * s, + struct rc_instruction * inst, + int prog_inst_limit) +{ + struct loop_info * loop; + + memory_pool_array_reserve(&s->C->Pool, struct loop_info, + s->Loops, s->LoopCount, s->LoopReserved, 1); + + loop = &s->Loops[s->LoopCount++]; + + if (!build_loop_info(s->C, loop, inst)) + return NULL; + + if(try_unroll_loop(s->C, loop, prog_inst_limit)){ + return loop->BeginLoop->Next; + } + /* Reverse the conditional instruction */ switch(loop->Cond->U.I.Opcode){ case RC_OPCODE_SGE: @@ -411,31 +463,27 @@ static struct rc_instruction * transform_loop(struct emulate_loop_state * s, rc_error(s->C, "loop->Cond is not a conditional.\n"); return NULL; } - - /* Check if the number of loops is known at compile time. */ - if(transform_const_loop(s, loop)){ - return loop->BeginLoop->Next; - } - /* Prepare the loop to be unrolled */ + /* Prepare the loop to be emulated */ rc_remove_instruction(loop->Brk); rc_remove_instruction(loop->EndIf); rc_insert_instruction(loop->EndLoop->Prev, loop->EndIf); return loop->EndLoop; } -void rc_transform_unroll_loops(struct radeon_compiler *c, - struct emulate_loop_state * s) +void rc_transform_loops(struct radeon_compiler *c, + struct emulate_loop_state * s, + int prog_inst_limit) { struct rc_instruction * ptr; - + memset(s, 0, sizeof(struct emulate_loop_state)); s->C = c; ptr = s->C->Program.Instructions.Next; while(ptr != &s->C->Program.Instructions) { if(ptr->Type == RC_INSTRUCTION_NORMAL && ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){ - ptr = transform_loop(s, ptr); + ptr = transform_loop(s, ptr, prog_inst_limit); if(!ptr){ return; } @@ -444,8 +492,23 @@ void rc_transform_unroll_loops(struct radeon_compiler *c, } } -void rc_emulate_loops(struct emulate_loop_state *s, - unsigned int max_instructions) +void rc_unroll_loops(struct radeon_compiler *c, int prog_inst_limit) +{ + struct rc_instruction * inst; + struct loop_info loop; + + for(inst = c->Program.Instructions.Next; + inst != &c->Program.Instructions; inst = inst->Next) { + + if (inst->U.I.Opcode == RC_OPCODE_BGNLOOP) { + if (build_loop_info(c, &loop, inst)) { + try_unroll_loop(c, &loop, prog_inst_limit); + } + } + } +} + +void rc_emulate_loops(struct emulate_loop_state *s, int prog_inst_limit) { int i; /* Iterate backwards of the list of loops so that loops that nested @@ -455,8 +518,8 @@ void rc_emulate_loops(struct emulate_loop_state *s, if(!s->Loops[i].EndLoop){ continue; } - unsigned int iterations = loop_calc_iterations(s, &s->Loops[i], - max_instructions); - loop_unroll(s, &s->Loops[i], iterations); + unsigned int iterations = loop_max_possible_iterations( + s->C, &s->Loops[i], prog_inst_limit); + unroll_loop(s->C, &s->Loops[i], iterations); } } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h index 7748813c4e..339527ba3b 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h @@ -23,10 +23,11 @@ struct emulate_loop_state { unsigned int LoopReserved; }; -void rc_transform_unroll_loops(struct radeon_compiler *c, - struct emulate_loop_state * s); +void rc_transform_loops(struct radeon_compiler *c, + struct emulate_loop_state * s, int prog_inst_limit); -void rc_emulate_loops(struct emulate_loop_state *s, - unsigned int max_instructions); +void rc_unroll_loops(struct radeon_compiler * c, int prog_inst_limit); + +void rc_emulate_loops(struct emulate_loop_state * s, int prog_inst_limit); #endif /* RADEON_EMULATE_LOOPS_H */ diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c index eca0651536..7a3f35950a 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_optimize.c @@ -164,7 +164,8 @@ static void peephole(struct radeon_compiler * c, struct rc_instruction * inst_mo inst = inst->Next) { /* XXX In the future we might be able to make the optimizer * smart enough to handle loops. */ - if(inst->U.I.Opcode == RC_OPCODE_BGNLOOP){ + if(inst->U.I.Opcode == RC_OPCODE_BGNLOOP + || inst->U.I.Opcode == RC_OPCODE_ENDLOOP){ return; } rc_for_all_reads_mask(inst, peephole_scan_read, &s); -- cgit v1.2.3 From 09c8fa570e69272f65cb49840ec7c709820b1b2a Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Tue, 3 Aug 2010 20:17:00 -0700 Subject: r300/compiler: Always unroll loops when doing loop emulation. --- src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c | 2 +- src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c | 4 ++-- .../drivers/dri/r300/compiler/radeon_emulate_loops.c | 17 ++++++++++------- .../drivers/dri/r300/compiler/radeon_emulate_loops.h | 2 +- 4 files changed, 14 insertions(+), 11 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c index 070939497c..c6246a81a2 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_fragprog.c @@ -113,7 +113,7 @@ void r3xx_compile_fragment_program(struct r300_fragment_program_compiler* c) debug_program_log(c, "after unroll loops"); } else{ - rc_transform_loops(&c->Base, &loop_state, R300_PFS_MAX_ALU_INST); + rc_transform_loops(&c->Base, &loop_state); debug_program_log(c, "after transform loops"); rc_emulate_branches(&c->Base); diff --git a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c index fe34ff67cd..e940fedec2 100644 --- a/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r3xx_vertprog.c @@ -645,11 +645,11 @@ void r3xx_compile_vertex_program(struct r300_vertex_program_compiler* compiler) * on all chipsets. */ if (compiler->Base.is_r500){ - rc_transform_loops(&compiler->Base, &loop_state, R500_VS_MAX_ALU); + rc_transform_loops(&compiler->Base, &loop_state); rc_emulate_loops(&loop_state, R500_VS_MAX_ALU); } else { + rc_transform_loops(&compiler->Base, &loop_state); rc_emulate_loops(&loop_state, R300_VS_MAX_ALU); - rc_transform_loops(&compiler->Base, &loop_state, R300_VS_MAX_ALU); } debug_program_log(compiler, "after emulate loops"); diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c index 94e3e5f4f5..24c3ae57b6 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.c @@ -186,6 +186,10 @@ static void get_incr_amount(void * data, struct rc_instruction * inst, } } +/** + * If prog_inst_limit is -1, then all eligible loops will be unrolled regardless + * of how many iterations they have. + */ static int try_unroll_loop(struct radeon_compiler * c, struct loop_info * loop, unsigned int prog_inst_limit) { @@ -296,7 +300,8 @@ static int try_unroll_loop(struct radeon_compiler * c, struct loop_info * loop, return 0; } - if (iterations > loop_max_possible_iterations(c, loop, + if (prog_inst_limit > 0 + && iterations > loop_max_possible_iterations(c, loop, prog_inst_limit)) { return 0; } @@ -422,8 +427,7 @@ static int build_loop_info(struct radeon_compiler * c, struct loop_info * loop, * Null if there is an error. */ static struct rc_instruction * transform_loop(struct emulate_loop_state * s, - struct rc_instruction * inst, - int prog_inst_limit) + struct rc_instruction * inst) { struct loop_info * loop; @@ -435,7 +439,7 @@ static struct rc_instruction * transform_loop(struct emulate_loop_state * s, if (!build_loop_info(s->C, loop, inst)) return NULL; - if(try_unroll_loop(s->C, loop, prog_inst_limit)){ + if(try_unroll_loop(s->C, loop, -1)){ return loop->BeginLoop->Next; } @@ -472,8 +476,7 @@ static struct rc_instruction * transform_loop(struct emulate_loop_state * s, } void rc_transform_loops(struct radeon_compiler *c, - struct emulate_loop_state * s, - int prog_inst_limit) + struct emulate_loop_state * s) { struct rc_instruction * ptr; @@ -483,7 +486,7 @@ void rc_transform_loops(struct radeon_compiler *c, while(ptr != &s->C->Program.Instructions) { if(ptr->Type == RC_INSTRUCTION_NORMAL && ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){ - ptr = transform_loop(s, ptr, prog_inst_limit); + ptr = transform_loop(s, ptr); if(!ptr){ return; } diff --git a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h index 339527ba3b..86d91ef14b 100644 --- a/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h +++ b/src/mesa/drivers/dri/r300/compiler/radeon_emulate_loops.h @@ -24,7 +24,7 @@ struct emulate_loop_state { }; void rc_transform_loops(struct radeon_compiler *c, - struct emulate_loop_state * s, int prog_inst_limit); + struct emulate_loop_state * s); void rc_unroll_loops(struct radeon_compiler * c, int prog_inst_limit); -- cgit v1.2.3 From 8ad5b76d52f1c009f48ea90556633e497b40ba87 Mon Sep 17 00:00:00 2001 From: Andre Maasikas Date: Wed, 4 Aug 2010 12:49:39 +0300 Subject: r600: relax stride/alignment requirements for vertices seems hw can do unaligned accesses and unaligned strides removes extra conversion when using vbo's however I needed to switch 3 component byte format to 4 component formats for tests to pass. Somewhat sililar to GL_SHORT fix done earlier removes assert and gains +2 piglit especially draw-vertices --- src/mesa/drivers/dri/r600/r700_assembler.c | 5 ++++- src/mesa/drivers/dri/r600/r700_render.c | 23 +++++++---------------- src/mesa/drivers/dri/r600/r700_vertprog.c | 6 +++--- 3 files changed, 14 insertions(+), 20 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r600/r700_assembler.c b/src/mesa/drivers/dri/r600/r700_assembler.c index b555ea683c..9c954cbf70 100644 --- a/src/mesa/drivers/dri/r600/r700_assembler.c +++ b/src/mesa/drivers/dri/r600/r700_assembler.c @@ -275,7 +275,10 @@ GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size) case 2: format = FMT_8_8; break; case 3: - format = FMT_8_8_8; break; + /* for some (small/unaligned) strides using 4 comps works + * better, probably same as GL_SHORT below + * test piglit/draw-vertices */ + format = FMT_8_8_8_8; break; case 4: format = FMT_8_8_8_8; break; default: diff --git a/src/mesa/drivers/dri/r600/r700_render.c b/src/mesa/drivers/dri/r600/r700_render.c index 316a0943f4..ba55f38e05 100644 --- a/src/mesa/drivers/dri/r600/r700_render.c +++ b/src/mesa/drivers/dri/r600/r700_render.c @@ -625,11 +625,11 @@ static void r700SetupStreams(GLcontext *ctx, const struct gl_client_array *input stride = (input[i]->StrideB == 0) ? getTypeSize(input[i]->Type) * input[i]->Size : input[i]->StrideB; - if (input[i]->Type == GL_DOUBLE || input[i]->Type == GL_UNSIGNED_INT || input[i]->Type == GL_INT || + if (input[i]->Type == GL_DOUBLE || input[i]->Type == GL_UNSIGNED_INT || input[i]->Type == GL_INT #if MESA_BIG_ENDIAN - getTypeSize(input[i]->Type) != 4 || + || getTypeSize(input[i]->Type) != 4 #endif - stride < 4) + ) { r700ConvertAttrib(ctx, count, input[i], &context->stream_desc[index]); } @@ -637,19 +637,10 @@ static void r700SetupStreams(GLcontext *ctx, const struct gl_client_array *input { if (input[i]->BufferObj->Name) { - if (stride % 4 != 0) - { - assert(((intptr_t) input[i]->Ptr) % input[i]->StrideB == 0); - r700AlignDataToDword(ctx, input[i], count, &context->stream_desc[index]); - context->stream_desc[index].is_named_bo = GL_FALSE; - } - else - { - context->stream_desc[index].stride = input[i]->StrideB; - context->stream_desc[index].bo_offset = (intptr_t) input[i]->Ptr; - context->stream_desc[index].bo = get_radeon_buffer_object(input[i]->BufferObj)->bo; - context->stream_desc[index].is_named_bo = GL_TRUE; - } + context->stream_desc[index].stride = input[i]->StrideB; + context->stream_desc[index].bo_offset = (intptr_t) input[i]->Ptr; + context->stream_desc[index].bo = get_radeon_buffer_object(input[i]->BufferObj)->bo; + context->stream_desc[index].is_named_bo = GL_TRUE; } else { diff --git a/src/mesa/drivers/dri/r600/r700_vertprog.c b/src/mesa/drivers/dri/r600/r700_vertprog.c index 137f3007ce..6a2a09eaf1 100644 --- a/src/mesa/drivers/dri/r600/r700_vertprog.c +++ b/src/mesa/drivers/dri/r600/r700_vertprog.c @@ -461,11 +461,11 @@ static void r700TranslateAttrib(GLcontext *ctx, GLuint unLoc, int count, const s stride = (input->StrideB == 0) ? getTypeSize(input->Type) * input->Size : input->StrideB; - if (input->Type == GL_DOUBLE || input->Type == GL_UNSIGNED_INT || input->Type == GL_INT || + if (input->Type == GL_DOUBLE || input->Type == GL_UNSIGNED_INT || input->Type == GL_INT #if MESA_BIG_ENDIAN - getTypeSize(input->Type) != 4 || + || getTypeSize(input->Type) != 4 #endif - stride < 4) + ) { pStreamDesc->type = GL_FLOAT; -- cgit v1.2.3 From 81bc4f4cbfdd4d8abaf97cb66baf1c7b0cbe05b3 Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Wed, 4 Aug 2010 15:38:24 -0700 Subject: intel: Remove unnecessary header. --- src/mesa/drivers/dri/intel/intel_extensions_es2.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/intel/intel_extensions_es2.c b/src/mesa/drivers/dri/intel/intel_extensions_es2.c index baf8e13001..de34bbb2ae 100644 --- a/src/mesa/drivers/dri/intel/intel_extensions_es2.c +++ b/src/mesa/drivers/dri/intel/intel_extensions_es2.c @@ -28,7 +28,6 @@ #include "main/extensions.h" #include "intel_extensions.h" -#include "utils.h" static const char *es2_extensions[] = { /* Used by mesa internally (cf all_mesa_extensions in ../common/utils.c) */ -- cgit v1.2.3 From 7f7bbf0d5b78e0a4ace7ad6eddf5e73a826c75eb Mon Sep 17 00:00:00 2001 From: Vinson Lee Date: Wed, 4 Aug 2010 15:45:41 -0700 Subject: r300/compiler: Remove unnecessary header. --- src/mesa/drivers/dri/r300/compiler/r500_fragprog.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c index 95be619d5d..80a120497e 100644 --- a/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c +++ b/src/mesa/drivers/dri/r300/compiler/r500_fragprog.c @@ -30,7 +30,6 @@ #include #include "../r300_reg.h" -#include "radeon_emulate_loops.h" /** * Rewrite IF instructions to use the ALU result special register. -- cgit v1.2.3 From e96a52e9933eea7264a42983db1428368bcb4962 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 28 Jul 2010 18:37:24 +0200 Subject: dri/nouveau: Fix up software mipmap generation. --- src/mesa/drivers/dri/nouveau/nouveau_fbo.c | 7 ++-- src/mesa/drivers/dri/nouveau/nouveau_texture.c | 49 ++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 5 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/nouveau/nouveau_fbo.c b/src/mesa/drivers/dri/nouveau/nouveau_fbo.c index 8be7edb150..bd1273beea 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_fbo.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_fbo.c @@ -220,7 +220,7 @@ get_tex_format(struct gl_texture_image *ti) case MESA_FORMAT_RGB565: return GL_RGB5; default: - assert(0); + return GL_NONE; } } @@ -231,7 +231,6 @@ nouveau_render_texture(GLcontext *ctx, struct gl_framebuffer *fb, struct gl_renderbuffer *rb = att->Renderbuffer; struct gl_texture_image *ti = att->Texture->Image[att->CubeMapFace][att->TextureLevel]; - int ret; /* Allocate a renderbuffer object for the texture if we * haven't already done so. */ @@ -244,9 +243,7 @@ nouveau_render_texture(GLcontext *ctx, struct gl_framebuffer *fb, } /* Update the renderbuffer fields from the texture. */ - ret = set_renderbuffer_format(rb, get_tex_format(ti)); - assert(ret); - + set_renderbuffer_format(rb, get_tex_format(ti)); rb->Width = ti->Width; rb->Height = ti->Height; nouveau_surface_ref(&to_nouveau_teximage(ti)->surface, diff --git a/src/mesa/drivers/dri/nouveau/nouveau_texture.c b/src/mesa/drivers/dri/nouveau/nouveau_texture.c index dbf9a5cc61..79b6757586 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_texture.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_texture.c @@ -38,6 +38,7 @@ #include "main/mipmap.h" #include "main/texfetch.h" #include "main/teximage.h" +#include "drivers/common/meta.h" static struct gl_texture_object * nouveau_texture_new(GLcontext *ctx, GLuint name, GLenum target) @@ -589,6 +590,53 @@ nouveau_texture_unmap(GLcontext *ctx, struct gl_texture_object *t) } } +static void +store_mipmap(GLcontext *ctx, GLenum target, int first, int last, + struct gl_texture_object *t) +{ + struct gl_pixelstore_attrib packing = { + .BufferObj = ctx->Shared->NullBufferObj, + .Alignment = 1 + }; + GLenum format = t->Image[0][first]->TexFormat; + unsigned base_format, type, comps; + int i; + + base_format = _mesa_get_format_base_format(format); + _mesa_format_to_type_and_comps(format, &type, &comps); + + for (i = first; i <= last; i++) { + struct gl_texture_image *ti = t->Image[0][i]; + void *data = ti->Data; + + nouveau_teximage(ctx, 3, target, i, ti->InternalFormat, + ti->Width, ti->Height, ti->Depth, + ti->Border, base_format, type, data, + &packing, t, ti); + + _mesa_free_texmemory(data); + } +} + +static void +nouveau_generate_mipmap(GLcontext *ctx, GLenum target, + struct gl_texture_object *t) +{ + if (_mesa_meta_check_generate_mipmap_fallback(ctx, target, t)) { + struct gl_texture_image *base = t->Image[0][t->BaseLevel]; + + nouveau_teximage_map(ctx, base); + _mesa_generate_mipmap(ctx, target, t); + nouveau_teximage_unmap(ctx, base); + + store_mipmap(ctx, target, t->BaseLevel + 1, + get_last_level(t), t); + + } else { + _mesa_meta_GenerateMipmap(ctx, target, t); + } +} + void nouveau_texture_functions_init(struct dd_function_table *functions) { @@ -607,4 +655,5 @@ nouveau_texture_functions_init(struct dd_function_table *functions) functions->BindTexture = nouveau_bind_texture; functions->MapTexture = nouveau_texture_map; functions->UnmapTexture = nouveau_texture_unmap; + functions->GenerateMipmap = nouveau_generate_mipmap; } -- cgit v1.2.3 From d03f04bfb57cb7b5537cb31f1dc798a6ba500f36 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 28 Jul 2010 22:32:49 +0200 Subject: dri/nv20: Fix some PGRAPH_ERRORs seen with DATA_CHECK enabled. --- src/mesa/drivers/dri/nouveau/nv20_state_fb.c | 5 ++++- src/mesa/drivers/dri/nouveau/nv20_state_tex.c | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/nouveau/nv20_state_fb.c b/src/mesa/drivers/dri/nouveau/nv20_state_fb.c index 21da4f7af1..95691cad04 100644 --- a/src/mesa/drivers/dri/nouveau/nv20_state_fb.c +++ b/src/mesa/drivers/dri/nouveau/nv20_state_fb.c @@ -72,7 +72,7 @@ nv20_emit_framebuffer(GLcontext *ctx, int emit) fb->_ColorDrawBuffers[0])->surface; rt_format |= get_rt_format(s->format); - zeta_pitch = rt_pitch = s->pitch; + rt_pitch = s->pitch; nouveau_bo_markl(bctx, kelvin, NV20TCL_COLOR_OFFSET, s->bo, 0, bo_flags); @@ -88,6 +88,9 @@ nv20_emit_framebuffer(GLcontext *ctx, int emit) nouveau_bo_markl(bctx, kelvin, NV20TCL_ZETA_OFFSET, s->bo, 0, bo_flags); + } else { + rt_format |= get_rt_format(MESA_FORMAT_Z24_S8); + zeta_pitch = rt_pitch; } BEGIN_RING(chan, kelvin, NV20TCL_RT_FORMAT, 2); diff --git a/src/mesa/drivers/dri/nouveau/nv20_state_tex.c b/src/mesa/drivers/dri/nouveau/nv20_state_tex.c index e46118e4fc..2d45513bb4 100644 --- a/src/mesa/drivers/dri/nouveau/nv20_state_tex.c +++ b/src/mesa/drivers/dri/nouveau/nv20_state_tex.c @@ -194,7 +194,8 @@ nv20_emit_tex_obj(GLcontext *ctx, int emit) | nvgl_wrap_mode(t->WrapS) << 0; tx_filter = nvgl_filter_mode(t->MagFilter) << 24 - | nvgl_filter_mode(t->MinFilter) << 16; + | nvgl_filter_mode(t->MinFilter) << 16 + | 2 << 12; tx_enable = NV20TCL_TX_ENABLE_ENABLE | log2i(t->MaxAnisotropy) << 4; -- cgit v1.2.3 From bc578caefb29cb9d1720d51698e2cd23ee490c44 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 4 Aug 2010 16:38:57 +0200 Subject: dri/nouveau: Don't try to validate uninitialized teximages. --- src/mesa/drivers/dri/nouveau/nouveau_texture.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/nouveau/nouveau_texture.c b/src/mesa/drivers/dri/nouveau/nouveau_texture.c index 79b6757586..442f4e899e 100644 --- a/src/mesa/drivers/dri/nouveau/nouveau_texture.c +++ b/src/mesa/drivers/dri/nouveau/nouveau_texture.c @@ -183,10 +183,10 @@ teximage_fits(struct gl_texture_object *t, int level) struct nouveau_surface *s = &to_nouveau_texture(t)->surfaces[level]; struct gl_texture_image *ti = t->Image[0][level]; - return ti && (t->Target == GL_TEXTURE_RECTANGLE || - (s->bo && s->width == ti->Width && - s->height == ti->Height && - s->format == ti->TexFormat)); + return ti && to_nouveau_teximage(ti)->surface.bo && + (t->Target == GL_TEXTURE_RECTANGLE || + (s->bo && s->format == ti->TexFormat && + s->width == ti->Width && s->height == ti->Height)); } static GLboolean -- cgit v1.2.3 From 85cfe321805264686ef8989e45a911a999ed928a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 5 Aug 2010 08:34:09 +0100 Subject: intel: Check for region allocation failure. Signed-off-by: Chris Wilson --- src/mesa/drivers/dri/intel/intel_regions.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c index fe4de18960..680d18ba29 100644 --- a/src/mesa/drivers/dri/intel/intel_regions.c +++ b/src/mesa/drivers/dri/intel/intel_regions.c @@ -155,6 +155,9 @@ intel_region_alloc_internal(struct intel_context *intel, } region = calloc(sizeof(*region), 1); + if (region == NULL) + return region; + region->cpp = cpp; region->width = width; region->height = height; @@ -189,6 +192,9 @@ intel_region_alloc(struct intel_context *intel, region = intel_region_alloc_internal(intel, cpp, width, height, aligned_pitch / cpp, buffer); + if (region == NULL) + return region; + region->tiling = tiling; return region; -- cgit v1.2.3 From 66708fd8a98cc28dab756b9e29d026194ccdfcee Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 5 Aug 2010 08:37:31 +0100 Subject: intel: Check for a NULL src buffer prior to blt This can only happen along a malloc failure path, but check anyway. Signed-off-by: Chris Wilson --- src/mesa/drivers/dri/intel/intel_tex_copy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c index 224b506c05..6efb2ddc55 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_copy.c +++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c @@ -102,7 +102,7 @@ do_copy_texsubimage(struct intel_context *intel, GLcontext *ctx = &intel->ctx; const struct intel_region *src = get_teximage_source(intel, internalFormat); - if (!intelImage->mt || !src) { + if (!intelImage->mt || !src || !src->buffer) { if (INTEL_DEBUG & DEBUG_FALLBACKS) fprintf(stderr, "%s fail %p %p (0x%08x)\n", __FUNCTION__, intelImage->mt, src, internalFormat); -- cgit v1.2.3 From 063c70d7f72a043037fb4c9b534c53208f86611d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 20 May 2010 17:50:54 -0400 Subject: r600: add span support for 2D tiling Requires tiling config ioctl support from the drm to use. kms only. Signed-off-by: Alex Deucher --- .../drivers/dri/radeon/radeon_common_context.c | 9 +- .../drivers/dri/radeon/radeon_common_context.h | 7 + src/mesa/drivers/dri/radeon/radeon_screen.h | 7 + src/mesa/drivers/dri/radeon/radeon_span.c | 192 ++++++++++++++++++++- 4 files changed, 210 insertions(+), 5 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.c b/src/mesa/drivers/dri/radeon/radeon_common_context.c index f76c49eada..92663bf66d 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.c +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.c @@ -699,6 +699,13 @@ radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable, rb->base.Height = drawable->h; rb->has_surface = 0; + /* r6xx+ tiling */ + rb->tile_config = radeon->radeonScreen->tile_config; + rb->group_bytes = radeon->radeonScreen->group_bytes; + rb->num_channels = radeon->radeonScreen->num_channels; + rb->num_banks = radeon->radeonScreen->num_banks; + rb->r7xx_bank_op = radeon->radeonScreen->r7xx_bank_op; + if (buffers[i].attachment == __DRI_BUFFER_STENCIL && depth_bo) { if (RADEON_DEBUG & RADEON_DRI) fprintf(stderr, "(reusing depth buffer as stencil)\n"); @@ -727,7 +734,7 @@ radeon_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable, bo->flags |= RADEON_BO_FLAGS_MACRO_TILE; if (tiling_flags & RADEON_TILING_MICRO) bo->flags |= RADEON_BO_FLAGS_MICRO_TILE; - + } if (buffers[i].attachment == __DRI_BUFFER_DEPTH) { diff --git a/src/mesa/drivers/dri/radeon/radeon_common_context.h b/src/mesa/drivers/dri/radeon/radeon_common_context.h index ec773cfa52..f06e5fdf24 100644 --- a/src/mesa/drivers/dri/radeon/radeon_common_context.h +++ b/src/mesa/drivers/dri/radeon/radeon_common_context.h @@ -93,6 +93,13 @@ struct radeon_renderbuffer GLuint pf_pending; /**< sequence number of pending flip */ GLuint vbl_pending; /**< vblank sequence number of pending flip */ __DRIdrawable *dPriv; + + /* r6xx+ tiling */ + GLuint tile_config; + GLint group_bytes; + GLint num_channels; + GLint num_banks; + GLint r7xx_bank_op; }; struct radeon_framebuffer diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.h b/src/mesa/drivers/dri/radeon/radeon_screen.h index 0d7e335fa3..2b33201a53 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.h +++ b/src/mesa/drivers/dri/radeon/radeon_screen.h @@ -112,6 +112,13 @@ typedef struct radeon_screen { int kernel_mm; drm_radeon_sarea_t *sarea; /* Private SAREA data */ struct radeon_bo_manager *bom; + + /* r6xx+ tiling */ + GLuint tile_config; + GLint group_bytes; + GLint num_channels; + GLint num_banks; + GLint r7xx_bank_op; } radeonScreenRec, *radeonScreenPtr; #define IS_R100_CLASS(screen) \ diff --git a/src/mesa/drivers/dri/radeon/radeon_span.c b/src/mesa/drivers/dri/radeon/radeon_span.c index 1adb609603..9dfe2dd243 100644 --- a/src/mesa/drivers/dri/radeon/radeon_span.c +++ b/src/mesa/drivers/dri/radeon/radeon_span.c @@ -111,7 +111,6 @@ static GLubyte *r200_depth_4byte(const struct radeon_renderbuffer * rrb, * two main types: * - 1D (akin to macro-linear/micro-tiled on older asics) * - 2D (akin to macro-tiled/micro-tiled on older asics) - * only 1D tiling is implemented below */ #if defined(RADEON_R600) static inline GLint r600_1d_tile_helper(const struct radeon_renderbuffer * rrb, @@ -208,12 +207,190 @@ static inline GLint r600_1d_tile_helper(const struct radeon_renderbuffer * rrb, return offset; } +static inline GLint r600_log2(GLint n) +{ + GLint log2 = 0; + + while (n >>= 1) + ++log2; + return log2; +} + +static inline GLint r600_2d_tile_helper(const struct radeon_renderbuffer * rrb, + GLint x, GLint y, GLint is_depth, GLint is_stencil) +{ + GLint group_bytes = rrb->group_bytes; + GLint num_channels = rrb->num_channels; + GLint num_banks = rrb->num_banks; + GLint r7xx_bank_op = rrb->r7xx_bank_op; + /* */ + GLint group_bits = r600_log2(group_bytes); + GLint channel_bits = r600_log2(num_channels); + GLint bank_bits = r600_log2(num_banks); + GLint element_bytes = rrb->cpp; + GLint num_samples = 1; + GLint tile_width = 8; + GLint tile_height = 8; + GLint tile_thickness = 1; + GLint macro_tile_width = num_banks; + GLint macro_tile_height = num_channels; + GLint pitch_elements = (rrb->pitch / element_bytes) / tile_width; + GLint height = rrb->base.Height / tile_height; + GLint z = 0; + GLint sample_number = 0; + /* */ + GLint tile_bytes; + GLint macro_tile_bytes; + GLint macro_tiles_per_row; + GLint macro_tiles_per_slice; + GLint slice_offset; + GLint macro_tile_row_index; + GLint macro_tile_column_index; + GLint macro_tile_offset; + GLint pixel_number = 0; + GLint element_offset; + GLint bank = 0; + GLint channel = 0; + GLint total_offset; + GLint group_mask = (1 << group_bits) - 1; + GLint offset_low; + GLint offset_high; + GLint offset = 0; + + switch (num_channels) { + case 2: + default: + // channel[0] = x[3] ^ y[3] + channel |= (((x >> 3) ^ (y >> 3)) & 1) << 0; + break; + case 4: + // channel[0] = x[4] ^ y[3] + channel |= (((x >> 4) ^ (y >> 3)) & 1) << 0; + // channel[1] = x[3] ^ y[4] + channel |= (((x >> 3) ^ (y >> 4)) & 1) << 1; + break; + case 8: + // channel[0] = x[5] ^ y[3] + channel |= (((x >> 5) ^ (y >> 3)) & 1) << 0; + // channel[0] = x[4] ^ x[5] ^ y[4] + channel |= (((x >> 4) ^ (x >> 5) ^ (y >> 4)) & 1) << 1; + // channel[0] = x[3] ^ y[5] + channel |= (((x >> 3) ^ (y >> 5)) & 1) << 2; + break; + } + + switch (num_banks) { + case 4: + // bank[0] = x[3] ^ y[4 + log2(num_channels)] + bank |= (((x >> 3) ^ (y >> (4 + channel_bits))) & 1) << 0; + if (r7xx_bank_op) + // bank[1] = x[3] ^ y[4 + log2(num_channels)] ^ x[5] + bank |= (((x >> 4) ^ (y >> (3 + channel_bits)) ^ (x >> 5)) & 1) << 1; + else + // bank[1] = x[4] ^ y[3 + log2(num_channels)] + bank |= (((x >> 4) ^ (y >> (3 + channel_bits))) & 1) << 1; + break; + case 8: + // bank[0] = x[3] ^ y[5 + log2(num_channels)] + bank |= (((x >> 3) ^ (y >> (5 + channel_bits))) & 1) << 0; + // bank[1] = x[4] ^ y[4 + log2(num_channels)] ^ y[5 + log2(num_channels)] + bank |= (((x >> 4) ^ (y >> (4 + channel_bits)) ^ (y >> (5 + channel_bits))) & 1) << 1; + if (r7xx_bank_op) + // bank[2] = x[5] ^ y[3 + log2(num_channels)] ^ x[6] + bank |= (((x >> 5) ^ (y >> (3 + channel_bits)) ^ (x >> 6)) & 1) << 2; + else + // bank[2] = x[5] ^ y[3 + log2(num_channels)] + bank |= (((x >> 5) ^ (y >> (3 + channel_bits))) & 1) << 2; + break; + } + + tile_bytes = tile_width * tile_height * tile_thickness * element_bytes * num_samples; + macro_tile_bytes = macro_tile_width * macro_tile_height * tile_bytes; + macro_tiles_per_row = pitch_elements / macro_tile_width; + macro_tiles_per_slice = macro_tiles_per_row * (height / macro_tile_height); + slice_offset = (z / tile_thickness) * macro_tiles_per_slice * macro_tile_bytes; + macro_tile_row_index = (y / tile_height) / macro_tile_height; + macro_tile_column_index = (x / tile_width) / macro_tile_width; + macro_tile_offset = ((macro_tile_row_index * macro_tiles_per_row) + macro_tile_column_index) * macro_tile_bytes; + + if (is_depth) { + GLint pixel_offset = 0; + + pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0] + pixel_number |= ((y >> 0) & 1) << 1; // pn[1] = y[0] + pixel_number |= ((x >> 1) & 1) << 2; // pn[2] = x[1] + pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1] + pixel_number |= ((x >> 2) & 1) << 4; // pn[4] = x[2] + pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2] + switch (element_bytes) { + case 2: + pixel_offset = pixel_number * element_bytes * num_samples; + break; + case 4: + /* stencil and depth data are stored separately within a tile. + * stencil is stored in a contiguous tile before the depth tile. + * stencil element is 1 byte, depth element is 3 bytes. + * stencil tile is 64 bytes. + */ + if (is_stencil) + pixel_offset = pixel_number * 1 * num_samples; + else + pixel_offset = (pixel_number * 3 * num_samples) + 64; + break; + } + element_offset = pixel_offset + (sample_number * element_bytes); + } else { + GLint sample_offset; + + switch (element_bytes) { + case 1: + pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0] + pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1] + pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2] + pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1] + pixel_number |= ((y >> 0) & 1) << 4; // pn[4] = y[0] + pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2] + break; + case 2: + pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0] + pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1] + pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2] + pixel_number |= ((y >> 0) & 1) << 3; // pn[3] = y[0] + pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1] + pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2] + break; + case 4: + pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0] + pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1] + pixel_number |= ((y >> 0) & 1) << 2; // pn[2] = y[0] + pixel_number |= ((x >> 2) & 1) << 3; // pn[3] = x[2] + pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1] + pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2] + break; + } + sample_offset = sample_number * (tile_bytes / num_samples); + element_offset = sample_offset + (pixel_number * element_bytes); + } + total_offset = (slice_offset + macro_tile_offset) >> (channel_bits + bank_bits); + total_offset += element_offset; + + offset_low = total_offset & group_mask; + offset_high = (total_offset & ~group_mask) << (channel_bits + bank_bits); + offset = (bank << (group_bits + channel_bits)) + (channel << group_bits) + offset_low + offset_high; + + return offset; +} + /* depth buffers */ static GLubyte *r600_ptr_depth(const struct radeon_renderbuffer * rrb, GLint x, GLint y) { GLubyte *ptr = rrb->bo->ptr; - GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 0); + GLint offset; + if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) + offset = r600_2d_tile_helper(rrb, x, y, 1, 0); + else + offset = r600_1d_tile_helper(rrb, x, y, 1, 0); return &ptr[offset]; } @@ -221,7 +398,11 @@ static GLubyte *r600_ptr_stencil(const struct radeon_renderbuffer * rrb, GLint x, GLint y) { GLubyte *ptr = rrb->bo->ptr; - GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 1); + GLint offset; + if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) + offset = r600_2d_tile_helper(rrb, x, y, 1, 1); + else + offset = r600_1d_tile_helper(rrb, x, y, 1, 1); return &ptr[offset]; } @@ -235,7 +416,10 @@ static GLubyte *r600_ptr_color(const struct radeon_renderbuffer * rrb, if (rrb->has_surface || !(rrb->bo->flags & mask)) { offset = x * rrb->cpp + y * rrb->pitch; } else { - offset = r600_1d_tile_helper(rrb, x, y, 0, 0); + if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) + offset = r600_2d_tile_helper(rrb, x, y, 0, 0); + else + offset = r600_1d_tile_helper(rrb, x, y, 0, 0); } return &ptr[offset]; } -- cgit v1.2.3 From 3095bc679da7241488219e1778db2083c662bbef Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 20 May 2010 17:59:05 -0400 Subject: r600: add new relocs for tiling support Signed-off-by: Alex Deucher --- src/mesa/drivers/dri/r600/r600_blit.c | 20 ++++++++++++------- src/mesa/drivers/dri/r600/r700_chip.c | 37 ++++++++++++++++++++++++----------- 2 files changed, 39 insertions(+), 18 deletions(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/r600/r600_blit.c b/src/mesa/drivers/dri/r600/r600_blit.c index 172f85eb26..619678214f 100644 --- a/src/mesa/drivers/dri/r600/r600_blit.c +++ b/src/mesa/drivers/dri/r600/r600_blit.c @@ -390,13 +390,20 @@ set_render_target(context_t *context, struct radeon_bo *bo, gl_format mesa_forma 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0); END_BATCH(); - BEGIN_BATCH_NO_AUTOSTATE(12); + BEGIN_BATCH_NO_AUTOSTATE(9); R600_OUT_BATCH_REGVAL(CB_COLOR0_SIZE + (4 * id), cb_color0_size); R600_OUT_BATCH_REGVAL(CB_COLOR0_VIEW + (4 * id), cb_color0_view); - R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), cb_color0_info); R600_OUT_BATCH_REGVAL(CB_COLOR0_MASK + (4 * id), 0); END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), cb_color0_info); + R600_OUT_BATCH_RELOC(0, + bo, + 0, + 0, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0); + END_BATCH(); + COMMIT_BATCH(); } @@ -1447,7 +1454,7 @@ set_default_state(context_t *context) SETbit(sq_dyn_gpr_cntl_ps_flush_req, VS_PC_LIMIT_ENABLE_bit); } - BEGIN_BATCH_NO_AUTOSTATE(117); + BEGIN_BATCH_NO_AUTOSTATE(114); R600_OUT_BATCH_REGSEQ(SQ_CONFIG, 6); R600_OUT_BATCH(sq_config); R600_OUT_BATCH(sq_gpr_resource_mgmt_1); @@ -1477,7 +1484,6 @@ set_default_state(context_t *context) (CLRCMP_SEL_SRC << CLRCMP_FCN_SEL_shift)); R600_OUT_BATCH_REGVAL(SQ_VTX_BASE_VTX_LOC, 0); R600_OUT_BATCH_REGVAL(SQ_VTX_START_INST_LOC, 0); - R600_OUT_BATCH_REGVAL(DB_DEPTH_INFO, 0); R600_OUT_BATCH_REGVAL(DB_DEPTH_CONTROL, 0); R600_OUT_BATCH_REGVAL(CB_SHADER_MASK, (OUTPUT0_ENABLE_mask)); R600_OUT_BATCH_REGVAL(CB_TARGET_MASK, (TARGET0_ENABLE_mask)); @@ -1607,7 +1613,7 @@ unsigned r600_blit(GLcontext *ctx, /* Flush is needed to make sure that source buffer has correct data */ radeonFlush(ctx); - rcommonEnsureCmdBufSpace(&context->radeon, 304, __FUNCTION__); + rcommonEnsureCmdBufSpace(&context->radeon, 305, __FUNCTION__); /* load shaders */ load_shaders(context->radeon.glCtx); @@ -1616,7 +1622,7 @@ unsigned r600_blit(GLcontext *ctx, return GL_FALSE; /* set clear state */ - /* 117 */ + /* 114 */ set_default_state(context); /* shaders */ @@ -1632,7 +1638,7 @@ unsigned r600_blit(GLcontext *ctx, set_tex_sampler(context); /* dst */ - /* 27 */ + /* 31 */ set_render_target(context, dst_bo, dst_mesaformat, dst_pitch, dst_width, dst_height, dst_offset); /* scissors */ diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c index cefda3ac4b..1e955b93b2 100644 --- a/src/mesa/drivers/dri/r600/r700_chip.c +++ b/src/mesa/drivers/dri/r600/r700_chip.c @@ -617,18 +617,25 @@ static void r700SendDepthTargetState(GLcontext *ctx, struct radeon_state_atom *a r700SetDepthTarget(context); - BEGIN_BATCH_NO_AUTOSTATE(8 + 2); + BEGIN_BATCH_NO_AUTOSTATE(7 + 2); R600_OUT_BATCH_REGSEQ(DB_DEPTH_SIZE, 2); R600_OUT_BATCH(r700->DB_DEPTH_SIZE.u32All); R600_OUT_BATCH(r700->DB_DEPTH_VIEW.u32All); - R600_OUT_BATCH_REGSEQ(DB_DEPTH_BASE, 2); + R600_OUT_BATCH_REGSEQ(DB_DEPTH_BASE, 1); R600_OUT_BATCH(r700->DB_DEPTH_BASE.u32All); - R600_OUT_BATCH(r700->DB_DEPTH_INFO.u32All); R600_OUT_BATCH_RELOC(r700->DB_DEPTH_BASE.u32All, rrb->bo, r700->DB_DEPTH_BASE.u32All, 0, RADEON_GEM_DOMAIN_VRAM, 0); END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGSEQ(DB_DEPTH_INFO, 1); + R600_OUT_BATCH(r700->DB_DEPTH_INFO.u32All); + R600_OUT_BATCH_RELOC(r700->DB_DEPTH_INFO.u32All, + rrb->bo, + r700->DB_DEPTH_INFO.u32All, + 0, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); if ((context->radeon.radeonScreen->chip_family > CHIP_FAMILY_R600) && (context->radeon.radeonScreen->chip_family < CHIP_FAMILY_RV770)) { @@ -687,27 +694,35 @@ static void r700SendRenderTargetState(GLcontext *ctx, struct radeon_state_atom * BEGIN_BATCH_NO_AUTOSTATE(3 + 2); R600_OUT_BATCH_REGSEQ(CB_COLOR0_TILE + (4 * id), 1); R600_OUT_BATCH(r700->render_target[id].CB_COLOR0_TILE.u32All); - R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_BASE.u32All, + R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_TILE.u32All, rrb->bo, - r700->render_target[id].CB_COLOR0_BASE.u32All, + r700->render_target[id].CB_COLOR0_TILE.u32All, 0, RADEON_GEM_DOMAIN_VRAM, 0); END_BATCH(); BEGIN_BATCH_NO_AUTOSTATE(3 + 2); R600_OUT_BATCH_REGSEQ(CB_COLOR0_FRAG + (4 * id), 1); R600_OUT_BATCH(r700->render_target[id].CB_COLOR0_FRAG.u32All); - R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_BASE.u32All, + R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_FRAG.u32All, rrb->bo, - r700->render_target[id].CB_COLOR0_BASE.u32All, + r700->render_target[id].CB_COLOR0_FRAG.u32All, 0, RADEON_GEM_DOMAIN_VRAM, 0); END_BATCH(); - BEGIN_BATCH_NO_AUTOSTATE(12); + BEGIN_BATCH_NO_AUTOSTATE(9); R600_OUT_BATCH_REGVAL(CB_COLOR0_SIZE + (4 * id), r700->render_target[id].CB_COLOR0_SIZE.u32All); R600_OUT_BATCH_REGVAL(CB_COLOR0_VIEW + (4 * id), r700->render_target[id].CB_COLOR0_VIEW.u32All); - R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), r700->render_target[id].CB_COLOR0_INFO.u32All); R600_OUT_BATCH_REGVAL(CB_COLOR0_MASK + (4 * id), r700->render_target[id].CB_COLOR0_MASK.u32All); END_BATCH(); + BEGIN_BATCH_NO_AUTOSTATE(3 + 2); + R600_OUT_BATCH_REGVAL(CB_COLOR0_INFO + (4 * id), r700->render_target[id].CB_COLOR0_INFO.u32All); + R600_OUT_BATCH_RELOC(r700->render_target[id].CB_COLOR0_INFO.u32All, + rrb->bo, + r700->render_target[id].CB_COLOR0_INFO.u32All, + 0, RADEON_GEM_DOMAIN_VRAM, 0); + + END_BATCH(); + COMMIT_BATCH(); } @@ -1567,7 +1582,7 @@ void r600InitAtoms(context_t *context) ALLOC_STATE(sq, always, 34, r700SendSQConfig); ALLOC_STATE(db, always, 17, r700SendDBState); ALLOC_STATE(stencil, always, 4, r700SendStencilState); - ALLOC_STATE(db_target, always, 12, r700SendDepthTargetState); + ALLOC_STATE(db_target, always, 16, r700SendDepthTargetState); ALLOC_STATE(sc, always, 15, r700SendSCState); ALLOC_STATE(scissor, always, 22, r700SendScissorState); ALLOC_STATE(aa, always, 12, r700SendAAState); @@ -1578,7 +1593,7 @@ void r600InitAtoms(context_t *context) ALLOC_STATE(poly, always, 10, r700SendPolyState); ALLOC_STATE(cb, cb, 18, r700SendCBState); ALLOC_STATE(clrcmp, always, 6, r700SendCBCLRCMPState); - ALLOC_STATE(cb_target, always, 29, r700SendRenderTargetState); + ALLOC_STATE(cb_target, always, 31, r700SendRenderTargetState); ALLOC_STATE(blnd, blnd, (6 + (R700_MAX_RENDER_TARGETS * 3)), r700SendCBBlendState); ALLOC_STATE(blnd_clr, always, 6, r700SendCBBlendColorState); ALLOC_STATE(sx, always, 9, r700SendSXState); -- cgit v1.2.3 From e93d413a0d81f591318f362f770083e9ecc7e0c0 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 21 Jul 2010 17:46:45 -0400 Subject: r600: add support for getting the tiling config via drm ioctl (v2) Needed for the the 2D tiling span functions. v2: rebase on new kernel, mesa changes Signed-off-by: Alex Deucher --- src/mesa/drivers/dri/radeon/radeon_screen.c | 57 +++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index d3d7b216ba..895c2e1f62 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -213,6 +213,10 @@ static const GLuint __driNConfigOptions = 17; static int getSwapInfo( __DRIdrawable *dPriv, __DRIswapInfo * sInfo ); +#ifndef RADEON_INFO_TILE_CONFIG +#define RADEON_INFO_TILE_CONFIG 0x6 +#endif + static int radeonGetParam(__DRIscreen *sPriv, int param, void *value) { @@ -232,6 +236,9 @@ radeonGetParam(__DRIscreen *sPriv, int param, void *value) case RADEON_PARAM_NUM_Z_PIPES: info.request = RADEON_INFO_NUM_Z_PIPES; break; + case RADEON_INFO_TILE_CONFIG: + info.request = RADEON_INFO_TILE_CONFIG; + break; default: return -EINVAL; } @@ -1320,6 +1327,56 @@ radeonCreateScreen2(__DRIscreen *sPriv) else screen->chip_flags |= RADEON_CLASS_R600; + /* r6xx+ tiling */ + if (IS_R600_CLASS(screen) && (sPriv->drm_version.minor >= 5)) { + ret = radeonGetParam(sPriv, RADEON_INFO_TILE_CONFIG, &temp); + if (ret) + fprintf(stderr, "failed to get tiling info\n"); + else { + screen->tile_config = temp; + screen->r7xx_bank_op = 0; + switch((screen->tile_config & 0xe) >> 1) { + case 0: + screen->num_channels = 1; + break; + case 1: + screen->num_channels = 2; + break; + case 2: + screen->num_channels = 4; + break; + case 3: + screen->num_channels = 8; + break; + default: + fprintf(stderr, "bad channels\n"); + break; + } + switch((screen->tile_config & 0x30) >> 4) { + case 0: + screen->num_banks = 4; + break; + case 1: + screen->num_banks = 8; + break; + default: + fprintf(stderr, "bad banks\n"); + break; + } + switch((screen->tile_config & 0xc0) >> 6) { + case 0: + screen->group_bytes = 256; + break; + case 1: + screen->group_bytes = 512; + break; + default: + fprintf(stderr, "bad group_bytes\n"); + break; + } + } + } + if (IS_R300_CLASS(screen)) { ret = radeonGetParam(sPriv, RADEON_PARAM_NUM_GB_PIPES, &temp); if (ret) { -- cgit v1.2.3 From 918ef7ff18dc407d8ab1d6dc9cfb1267618c6534 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Thu, 5 Aug 2010 17:42:29 -0400 Subject: r600c: tiling require drm 2.6.0, not 2.5.0 --- src/mesa/drivers/dri/radeon/radeon_screen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/mesa/drivers') diff --git a/src/mesa/drivers/dri/radeon/radeon_screen.c b/src/mesa/drivers/dri/radeon/radeon_screen.c index 895c2e1f62..fa97a19302 100644 --- a/src/mesa/drivers/dri/radeon/radeon_screen.c +++ b/src/mesa/drivers/dri/radeon/radeon_screen.c @@ -1328,7 +1328,7 @@ radeonCreateScreen2(__DRIscreen *sPriv) screen->chip_flags |= RADEON_CLASS_R600; /* r6xx+ tiling */ - if (IS_R600_CLASS(screen) && (sPriv->drm_version.minor >= 5)) { + if (IS_R600_CLASS(screen) && (sPriv->drm_version.minor >= 6)) { ret = radeonGetParam(sPriv, RADEON_INFO_TILE_CONFIG, &temp); if (ret) fprintf(stderr, "failed to get tiling info\n"); -- cgit v1.2.3