diff options
Diffstat (limited to 'src')
48 files changed, 1407 insertions, 332 deletions
diff --git a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt index b83abd4093..5b21a2be0b 100644 --- a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt +++ b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt @@ -382,7 +382,9 @@ TGSI Instruction Specification 1.5.7 KILP - Predicated Discard - TBD + if (cc.x || cc.y || cc.z || cc.w) + discard + endif 1.5.8 LG2 - Logarithm Base 2 @@ -599,7 +601,9 @@ TGSI Instruction Specification 1.8.2 KIL - Conditional Discard - TBD + if (src.x < 0.0 || src.y < 0.0 || src.z < 0.0 || src.w < 0.0) + discard + endif 1.8.3 SCS - Sine Cosine diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile index 160df8dfa7..d68bdeadcc 100644 --- a/src/gallium/auxiliary/util/Makefile +++ b/src/gallium/auxiliary/util/Makefile @@ -24,6 +24,7 @@ C_SOURCES = \ u_tile.c \ u_time.c \ u_timed_winsys.c \ + u_upload_mgr.c \ u_simple_screen.c include ../../Makefile.template diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript index 9d5dd006f0..0f15c632c3 100644 --- a/src/gallium/auxiliary/util/SConscript +++ b/src/gallium/auxiliary/util/SConscript @@ -26,6 +26,7 @@ util = env.ConvenienceLibrary( 'u_tile.c', 'u_time.c', 'u_timed_winsys.c', + 'u_upload_mgr.c', 'u_simple_screen.c', ]) diff --git a/src/gallium/auxiliary/util/u_upload_mgr.c b/src/gallium/auxiliary/util/u_upload_mgr.c new file mode 100644 index 0000000000..d9c0d7afa8 --- /dev/null +++ b/src/gallium/auxiliary/util/u_upload_mgr.c @@ -0,0 +1,220 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Helper utility for uploading user buffers & other data, and + * coalescing small buffers into larger ones. + */ + +#include "pipe/p_error.h" +#include "pipe/p_inlines.h" +#include "pipe/p_screen.h" +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "u_upload_mgr.h" + + +struct u_upload_mgr { + struct pipe_screen *screen; + + unsigned default_size; + unsigned alignment; + unsigned usage; + + /* The active buffer: + */ + struct pipe_buffer *buffer; + unsigned size; + unsigned offset; +}; + + +struct u_upload_mgr *u_upload_create( struct pipe_screen *screen, + unsigned default_size, + unsigned alignment, + unsigned usage ) +{ + struct u_upload_mgr *upload = CALLOC_STRUCT( u_upload_mgr ); + + upload->default_size = default_size; + upload->screen = screen; + upload->alignment = alignment; + upload->usage = usage; + upload->buffer = NULL; + + return upload; +} + + +static INLINE void +my_buffer_write(struct pipe_screen *screen, + struct pipe_buffer *buf, + unsigned offset, unsigned size, unsigned dirty_size, + const void *data) +{ + uint8_t *map; + + assert(offset < buf->size); + assert(offset + size <= buf->size); + assert(dirty_size >= size); + assert(size); + + map = pipe_buffer_map_range(screen, buf, offset, size, PIPE_BUFFER_USAGE_CPU_WRITE); + assert(map); + if(map) { + memcpy(map + offset, data, size); + pipe_buffer_flush_mapped_range(screen, buf, offset, dirty_size); + pipe_buffer_unmap(screen, buf); + } +} + +/* Release old buffer. + * + * This must usually be called prior to firing the command stream + * which references the upload buffer, as many memory managers will + * cause subsequent maps of a fired buffer to wait. + * + * Can improve this with a change to pipe_buffer_write to use the + * DONT_WAIT bit, but for now, it's easiest just to grab a new buffer. + */ +void u_upload_flush( struct u_upload_mgr *upload ) +{ + pipe_buffer_reference( &upload->buffer, NULL ); + upload->size = 0; +} + + +void u_upload_destroy( struct u_upload_mgr *upload ) +{ + u_upload_flush( upload ); + FREE( upload ); +} + + +static enum pipe_error +u_upload_alloc_buffer( struct u_upload_mgr *upload, + unsigned min_size ) +{ + /* Release old buffer, if present: + */ + u_upload_flush( upload ); + + /* Allocate a new one: + */ + upload->size = align(MAX2(upload->default_size, min_size), 4096); + + upload->buffer = pipe_buffer_create( upload->screen, + upload->alignment, + upload->usage | PIPE_BUFFER_USAGE_CPU_WRITE, + upload->size ); + if (upload->buffer == NULL) + goto fail; + + upload->offset = 0; + return 0; + +fail: + if (upload->buffer) + pipe_buffer_reference( &upload->buffer, NULL ); + + return PIPE_ERROR_OUT_OF_MEMORY; +} + + +enum pipe_error u_upload_data( struct u_upload_mgr *upload, + unsigned size, + const void *data, + unsigned *out_offset, + struct pipe_buffer **outbuf ) +{ + unsigned alloc_size = align( size, upload->alignment ); + enum pipe_error ret = PIPE_OK; + + if (upload->offset + alloc_size > upload->size) { + ret = u_upload_alloc_buffer( upload, alloc_size ); + if (ret) + return ret; + } + + /* Copy the data, using map_range if available: + */ + my_buffer_write( upload->screen, + upload->buffer, + upload->offset, + size, + alloc_size, + data ); + + /* Emit the return values: + */ + pipe_buffer_reference( outbuf, upload->buffer ); + *out_offset = upload->offset; + upload->offset += alloc_size; + return PIPE_OK; +} + + +/* As above, but upload the full contents of a buffer. Useful for + * uploading user buffers, avoids generating an explosion of GPU + * buffers if you have an app that does lots of small vertex buffer + * renders or DrawElements calls. + */ +enum pipe_error u_upload_buffer( struct u_upload_mgr *upload, + unsigned offset, + unsigned size, + struct pipe_buffer *inbuf, + unsigned *out_offset, + struct pipe_buffer **outbuf ) +{ + enum pipe_error ret = PIPE_OK; + const char *map = NULL; + + map = (const char *)pipe_buffer_map( + upload->screen, inbuf, PIPE_BUFFER_USAGE_CPU_READ ); + + if (map == NULL) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto done; + } + + if (0) + debug_printf("upload ptr %p ofs %d sz %d\n", map, offset, size); + + ret = u_upload_data( upload, + size, + map + offset, + out_offset, + outbuf ); + if (ret) + goto done; + +done: + if (map) + pipe_buffer_unmap( upload->screen, inbuf ); + + return ret; +} diff --git a/src/gallium/auxiliary/util/u_upload_mgr.h b/src/gallium/auxiliary/util/u_upload_mgr.h new file mode 100644 index 0000000000..745b5834af --- /dev/null +++ b/src/gallium/auxiliary/util/u_upload_mgr.h @@ -0,0 +1,75 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Helper utility for uploading user buffers & other data, and + * coalescing small buffers into larger ones. + */ + +#ifndef U_UPLOAD_MGR_H +#define U_UPLOAD_MGR_H + +struct pipe_screen; +struct pipe_buffer; +struct u_upload_mgr; + + +struct u_upload_mgr *u_upload_create( struct pipe_screen *screen, + unsigned default_size, + unsigned alignment, + unsigned usage ); + +void u_upload_destroy( struct u_upload_mgr *upload ); + +/* Unmap and release old buffer. + * + * This must usually be called prior to firing the command stream + * which references the upload buffer, as many memory managers either + * don't like firing a mapped buffer or cause subsequent maps of a + * fired buffer to wait. For now, it's easiest just to grab a new + * buffer. + */ +void u_upload_flush( struct u_upload_mgr *upload ); + + +enum pipe_error u_upload_data( struct u_upload_mgr *upload, + unsigned size, + const void *data, + unsigned *out_offset, + struct pipe_buffer **outbuf ); + + +enum pipe_error u_upload_buffer( struct u_upload_mgr *upload, + unsigned offset, + unsigned size, + struct pipe_buffer *inbuf, + unsigned *out_offset, + struct pipe_buffer **outbuf ); + + + +#endif + diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h index d8038ff1e1..9913678d27 100644 --- a/src/gallium/drivers/r300/r300_cs.h +++ b/src/gallium/drivers/r300/r300_cs.h @@ -47,9 +47,6 @@ #define CP_PACKET0(register, count) \ (RADEON_CP_PACKET0 | ((count) << 16) | ((register) >> 2)) -#define CP_PACKET3(op, count) \ - (RADEON_CP_PACKET3 | (op) | ((count) << 16)) - #define CS_LOCALS(context) \ struct r300_winsys* cs_winsys = context->winsys; \ struct radeon_cs* cs = cs_winsys->cs; \ @@ -118,6 +115,21 @@ cs_winsys->flush_cs(cs); \ } while (0) -#include "r300_cs_inlines.h" +#define RADEON_ONE_REG_WR (1 << 15) + +#define OUT_CS_ONE_REG(register, count) do { \ + if (VERY_VERBOSE_REGISTERS) \ + debug_printf("r300: writing data sequence of %d to 0x%04X\n", \ + count, register); \ + assert(register); \ + OUT_CS(CP_PACKET0(register, ((count) - 1)) | RADEON_ONE_REG_WR); \ +} while (0) + +#define CP_PACKET3(op, count) \ + (RADEON_CP_PACKET3 | (op) | ((count) << 16)) + +#define OUT_CS_PKT3(op, count) do { \ + OUT_CS(CP_PACKET3(op, count)); \ +} while (0) #endif /* R300_CS_H */ diff --git a/src/gallium/drivers/r300/r300_cs_inlines.h b/src/gallium/drivers/r300/r300_cs_inlines.h deleted file mode 100644 index 03bb608eb9..0000000000 --- a/src/gallium/drivers/r300/r300_cs_inlines.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -/* r300_cs_inlines: This is just a handful of useful inlines for sending - * (very) common instructions to the CS buffer. Should only be included from - * r300_cs.h, probably. */ - -#ifdef R300_CS_H - -#define RADEON_ONE_REG_WR (1 << 15) - -#define OUT_CS_ONE_REG(register, count) do { \ - if (VERY_VERBOSE_REGISTERS) \ - debug_printf("r300: writing data sequence of %d to 0x%04X\n", \ - count, register); \ - assert(register); \ - OUT_CS(CP_PACKET0(register, ((count) - 1)) | RADEON_ONE_REG_WR); \ -} while (0) - -#define R300_PACIFY do { \ - OUT_CS_REG(RADEON_WAIT_UNTIL, (1 << 14) | (1 << 15) | (1 << 16) | (1 << 17) | \ - (1 << 18)); \ -} while (0) - -#define R300_SCREENDOOR do { \ - OUT_CS_REG(R300_SC_SCREENDOOR, 0x0); \ - R300_PACIFY; \ - OUT_CS_REG(R300_SC_SCREENDOOR, 0xffffff); \ -} while (0) - -#endif /* R300_CS_H */ diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c index a2e771bd1b..9bfb89626c 100644 --- a/src/gallium/drivers/r300/r300_emit.c +++ b/src/gallium/drivers/r300/r300_emit.c @@ -152,21 +152,6 @@ void r500_emit_fragment_shader(struct r300_context* r300, END_CS; } -/* Translate pipe_format into US_OUT_FMT. Note that formats are stored from - * C3 to C0. */ -uint32_t translate_out_fmt(enum pipe_format format) -{ - switch (format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - return R300_US_OUT_FMT_C4_8 | - R300_C0_SEL_B | R300_C1_SEL_G | - R300_C2_SEL_R | R300_C3_SEL_A; - default: - return R300_US_OUT_FMT_UNUSED; - } - return 0; -} - /* XXX add pitch, stride, clean up */ void r300_emit_fb_state(struct r300_context* r300, struct pipe_framebuffer_state* fb) @@ -182,7 +167,7 @@ void r300_emit_fb_state(struct r300_context* r300, OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), - translate_out_fmt(fb->cbufs[i]->format)); + r300_translate_out_fmt(fb->cbufs[i]->format)); } if (fb->zsbuf) { diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h index 4aba1ee08c..0bc1f90e6a 100644 --- a/src/gallium/drivers/r300/r300_emit.h +++ b/src/gallium/drivers/r300/r300_emit.h @@ -28,6 +28,7 @@ #include "r300_context.h" #include "r300_cs.h" #include "r300_screen.h" +#include "r300_state_inlines.h" void r300_emit_blend_state(struct r300_context* r300, struct r300_blend_state* blend); @@ -52,11 +53,20 @@ void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs); void r300_emit_rs_block_state(struct r300_context* r300, struct r300_rs_block* rs); +void r300_emit_sampler(struct r300_context* r300, + struct r300_sampler_state* sampler, unsigned offset); + void r300_emit_scissor_state(struct r300_context* r300, struct r300_scissor_state* scissor); +void r300_emit_texture(struct r300_context* r300, + struct r300_texture* tex, unsigned offset); + void r300_emit_vertex_format_state(struct r300_context* r300); +void r300_emit_viewport_state(struct r300_context* r300, + struct r300_viewport_state* viewport); + /* Emit all dirty state. */ void r300_emit_dirty_state(struct r300_context* r300); diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h index 6f3ad970ab..3fe45e1393 100644 --- a/src/gallium/drivers/r300/r300_reg.h +++ b/src/gallium/drivers/r300/r300_reg.h @@ -293,10 +293,19 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. # define R300_INPUT_CNTL_TC7 0x00020000 /* GUESS */ /* Programmable Stream Control Signed Normalize Control */ -#define R300_VAP_PSC_SGN_NORM_CNTL 0x21dc -# define SGN_NORM_ZERO 0 -# define SGN_NORM_ZERO_CLAMP_MINUS_ONE 1 -# define SGN_NORM_NO_ZERO 2 +#define R300_VAP_PSC_SGN_NORM_CNTL 0x21dc +# define SGN_NORM_ZERO 0 +# define SGN_NORM_ZERO_CLAMP_MINUS_ONE 1 +# define SGN_NORM_NO_ZERO 2 +# define R300_SGN_NORM_NO_ZERO (SGN_NORM_NO_ZERO | \ + (SGN_NORM_NO_ZERO << 2) | (SGN_NORM_NO_ZERO << 4) | \ + (SGN_NORM_NO_ZERO << 6) | (SGN_NORM_NO_ZERO << 8) | \ + (SGN_NORM_NO_ZERO << 10) | (SGN_NORM_NO_ZERO << 12) | \ + (SGN_NORM_NO_ZERO << 14) | (SGN_NORM_NO_ZERO << 16) | \ + (SGN_NORM_NO_ZERO << 18) | (SGN_NORM_NO_ZERO << 20) | \ + (SGN_NORM_NO_ZERO << 22) | (SGN_NORM_NO_ZERO << 24) | \ + (SGN_NORM_NO_ZERO << 26) | (SGN_NORM_NO_ZERO << 28) | \ + (SGN_NORM_NO_ZERO << 30)) /* gap */ diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c index 58bce22fc8..2a026e7fca 100644 --- a/src/gallium/drivers/r300/r300_state.c +++ b/src/gallium/drivers/r300/r300_state.c @@ -515,12 +515,22 @@ static void r300_set_scissor_state(struct pipe_context* pipe, struct r300_context* r300 = r300_context(pipe); draw_flush(r300->draw); - r300->scissor_state->scissor_top_left = - (state->minx << R300_SCISSORS_X_SHIFT) | - (state->miny << R300_SCISSORS_Y_SHIFT); - r300->scissor_state->scissor_bottom_right = - (state->maxx << R300_SCISSORS_X_SHIFT) | - (state->maxy << R300_SCISSORS_Y_SHIFT); + if (r300_screen(r300->context.screen)->caps->is_r500) { + r300->scissor_state->scissor_top_left = + (state->minx << R300_SCISSORS_X_SHIFT) | + (state->miny << R300_SCISSORS_Y_SHIFT); + r300->scissor_state->scissor_bottom_right = + (state->maxx << R300_SCISSORS_X_SHIFT) | + (state->maxy << R300_SCISSORS_Y_SHIFT); + } else { + /* Offset of 1440 in non-R500 chipsets. */ + r300->scissor_state->scissor_top_left = + ((state->minx + 1440) << R300_SCISSORS_X_SHIFT) | + ((state->miny + 1440) << R300_SCISSORS_Y_SHIFT); + r300->scissor_state->scissor_bottom_right = + ((state->maxx + 1440) << R300_SCISSORS_X_SHIFT) | + ((state->maxy + 1440) << R300_SCISSORS_Y_SHIFT); + } r300->dirty_state |= R300_NEW_SCISSOR; } diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h index fd92c71756..b80ff1c1ab 100644 --- a/src/gallium/drivers/r300/r300_state_inlines.h +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -297,8 +297,7 @@ static INLINE uint32_t r300_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_A32R32G32B32: return R300_COLOR_FORMAT_ARGB32323232; case PIPE_FORMAT_A16R16G16B16: - return R300_COLOR_FORMAT_ARGB16161616; */ - /* XXX Not in pipe_format + return R300_COLOR_FORMAT_ARGB16161616; case PIPE_FORMAT_A10R10G10B10_UNORM: return R500_COLOR_FORMAT_ARGB10101010; case PIPE_FORMAT_A2R10G10B10_UNORM: @@ -306,7 +305,7 @@ static INLINE uint32_t r300_translate_colorformat(enum pipe_format format) case PIPE_FORMAT_I10_UNORM: return R500_COLOR_FORMAT_I10; */ default: - debug_printf("r300: Implementation error: " \ + debug_printf("r300: Implementation error: " "Got unsupported color format %s in %s\n", pf_name(format), __FUNCTION__); break; @@ -324,7 +323,7 @@ static INLINE uint32_t r300_translate_zsformat(enum pipe_format format) case PIPE_FORMAT_Z24S8_UNORM: return R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL; default: - debug_printf("r300: Implementation error: " \ + debug_printf("r300: Implementation error: " "Got unsupported ZS format %s in %s\n", pf_name(format), __FUNCTION__); break; @@ -332,6 +331,24 @@ static INLINE uint32_t r300_translate_zsformat(enum pipe_format format) return 0; } +/* Translate pipe_format into US_OUT_FMT. + * Note that formats are stored from C3 to C0. */ +static INLINE uint32_t r300_translate_out_fmt(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + return R300_US_OUT_FMT_C4_8 | + R300_C0_SEL_B | R300_C1_SEL_G | + R300_C2_SEL_R | R300_C3_SEL_A; + default: + debug_printf("r300: Implementation error: " + "Got unsupported output format %s in %s\n", + pf_name(format), __FUNCTION__); + return R300_US_OUT_FMT_UNUSED; + } + return 0; +} + /* Non-CSO state. (For now.) */ static INLINE uint32_t r300_translate_gb_pipes(int pipe_count) diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c index 3d51a8e65d..e1837b6380 100644 --- a/src/gallium/drivers/r300/r300_state_invariant.c +++ b/src/gallium/drivers/r300/r300_state_invariant.c @@ -34,11 +34,11 @@ void r300_emit_invariant_state(struct r300_context* r300) struct r300_capabilities* caps = r300_screen(r300->context.screen)->caps; CS_LOCALS(r300); - BEGIN_CS(24 + (caps->has_tcl ? 2: 0)); + BEGIN_CS(30 + (caps->has_tcl ? 2: 0)); + /*** Graphics Backend (GB) ***/ /* Various GB enables */ - OUT_CS_REG(R300_GB_ENABLE, R300_GB_POINT_STUFF_ENABLE | - R300_GB_LINE_STUFF_ENABLE | R300_GB_TRIANGLE_STUFF_ENABLE); + OUT_CS_REG(R300_GB_ENABLE, 0x0); /* Subpixel multisampling for AA */ OUT_CS_REG(R300_GB_MSPOS0, 0x66666666); OUT_CS_REG(R300_GB_MSPOS1, 0x66666666); @@ -49,6 +49,8 @@ void r300_emit_invariant_state(struct r300_context* r300) OUT_CS_REG(R300_GB_SELECT, R300_GB_FOG_SELECT_1_1_W); /* AA enable */ OUT_CS_REG(R300_GB_AA_CONFIG, 0x0); + + /*** Geometry Assembly (GA) ***/ /* GA errata fixes. */ if (caps->is_r500) { OUT_CS_REG(R300_GA_ENHANCE, @@ -62,13 +64,19 @@ void r300_emit_invariant_state(struct r300_context* r300) R300_GA_ENHANCE_FASTSYNC_CNTL_ENABLE); } - /* Fog block. */ - OUT_CS_REG(R300_FG_FOG_BLEND, 0x00000000); - OUT_CS_REG(R300_FG_FOG_COLOR_R, 0x00000000); - OUT_CS_REG(R300_FG_FOG_COLOR_G, 0x00000000); - OUT_CS_REG(R300_FG_FOG_COLOR_B, 0x00000000); - OUT_CS_REG(R300_FG_DEPTH_SRC, 0x00000000); + /*** Fog (FG) ***/ + OUT_CS_REG(R300_FG_FOG_BLEND, 0x0); + OUT_CS_REG(R300_FG_FOG_COLOR_R, 0x0); + OUT_CS_REG(R300_FG_FOG_COLOR_G, 0x0); + OUT_CS_REG(R300_FG_FOG_COLOR_B, 0x0); + OUT_CS_REG(R300_FG_DEPTH_SRC, 0x0); + /*** VAP ***/ + /* Max and min vertex index clamp. */ + OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0x0); + OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, 0xffffff); + /* Sign/normalize control */ + OUT_CS_REG(R300_VAP_PSC_SGN_NORM_CNTL, R300_SGN_NORM_NO_ZERO); /* TCL-only stuff */ if (caps->has_tcl) { /* Amount of time to wait for vertex fetches in PVS */ @@ -78,7 +86,7 @@ void r300_emit_invariant_state(struct r300_context* r300) END_CS; /* XXX unsorted stuff from surface_fill */ - BEGIN_CS(99 + (caps->has_tcl ? 26 : 0)); + BEGIN_CS(91 + (caps->has_tcl ? 26 : 0)); /* Flush PVS. */ OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0); @@ -86,9 +94,6 @@ void r300_emit_invariant_state(struct r300_context* r300) R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA | R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA | R300_VPORT_Z_OFFSET_ENA | R300_VTX_W0_FMT); - /* Max and min vertex index clamp. */ - OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, 0xFFFFFF); - OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0x0); /* XXX endian */ if (caps->has_tcl) { OUT_CS_REG(R300_VAP_CNTL_STATUS, R300_VC_NO_SWAP); @@ -103,8 +108,6 @@ void r300_emit_invariant_state(struct r300_context* r300) OUT_CS_REG(R300_VAP_CNTL_STATUS, R300_VC_NO_SWAP | R300_VAP_TCL_BYPASS); } - /* XXX magic number not in r300_reg */ - OUT_CS_REG(R300_VAP_PSC_SGN_NORM_CNTL, 0xAAAAAAAA); /* XXX point tex stuffing */ OUT_CS_REG_SEQ(R300_GA_POINT_S0, 1); OUT_CS_32F(0.0); @@ -157,7 +160,6 @@ void r300_emit_invariant_state(struct r300_context* r300) OUT_CS_REG(R300_SE_VTE_CNTL, 0x0000043F); /* Vertex size. */ OUT_CS_REG(R300_VAP_VTX_SIZE, 0x8); - OUT_CS_REG(R300_VAP_PSC_SGN_NORM_CNTL, 0xAAAAAAAA); OUT_CS_REG(R300_VAP_OUTPUT_VTX_FMT_0, 0x00000003); OUT_CS_REG(R300_VAP_OUTPUT_VTX_FMT_1, 0x00000000); OUT_CS_REG(R300_TX_ENABLE, 0x0); diff --git a/src/gallium/drivers/r300/r300_surface.c b/src/gallium/drivers/r300/r300_surface.c index 2cc0677e52..db18975a10 100644 --- a/src/gallium/drivers/r300/r300_surface.c +++ b/src/gallium/drivers/r300/r300_surface.c @@ -23,6 +23,55 @@ #include "r300_surface.h" +static void r300_surface_setup(struct pipe_context* pipe, + struct pipe_surface* dest, + unsigned x, unsigned y, + unsigned w, unsigned h) +{ + struct r300_context* r300 = r300_context(pipe); + CS_LOCALS(r300); + struct r300_capabilities* caps = r300_screen(pipe->screen)->caps; + struct r300_texture* tex = (struct r300_texture*)dest->texture; + unsigned pixpitch = tex->stride / tex->tex.block.size; + + r300_emit_blend_state(r300, &blend_clear_state); + r300_emit_blend_color_state(r300, &blend_color_clear_state); + r300_emit_dsa_state(r300, &dsa_clear_state); + r300_emit_rs_state(r300, &rs_clear_state); + + BEGIN_CS(15); + + /* Pixel scissors. */ + OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); + if (caps->is_r500) { + OUT_CS((x << R300_SCISSORS_X_SHIFT) | (y << R300_SCISSORS_Y_SHIFT)); + OUT_CS((w << R300_SCISSORS_X_SHIFT) | (h << R300_SCISSORS_Y_SHIFT)); + } else { + /* Non-R500 chipsets have an offset of 1440 in their scissors. */ + OUT_CS(((x + 1440) << R300_SCISSORS_X_SHIFT) | + ((y + 1440) << R300_SCISSORS_Y_SHIFT)); + OUT_CS(((w + 1440) << R300_SCISSORS_X_SHIFT) | + ((h + 1440) << R300_SCISSORS_Y_SHIFT)); + } + + /* Flush colorbuffer and blend caches. */ + OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, + R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D | + R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_SIGNAL); + OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT, + R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | + R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); + + /* Setup colorbuffer. */ + OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0, 1); + OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); + OUT_CS_REG(R300_RB3D_COLORPITCH0, pixpitch | + r300_translate_colorformat(tex->tex.format)); + OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0xf); + + END_CS; +} + /* Provides pipe_context's "surface_fill". Commonly used for clearing * buffers. */ static void r300_surface_fill(struct pipe_context* pipe, @@ -53,10 +102,7 @@ static void r300_surface_fill(struct pipe_context* pipe, return; } - r300_emit_blend_state(r300, &blend_clear_state); - r300_emit_blend_color_state(r300, &blend_color_clear_state); - r300_emit_dsa_state(r300, &dsa_clear_state); - r300_emit_rs_state(r300, &rs_clear_state); + r300_surface_setup(r300, dest, x, y, w, h); /* Fragment shader setup */ if (caps->is_r500) { @@ -67,7 +113,7 @@ static void r300_surface_fill(struct pipe_context* pipe, r300_emit_rs_block_state(r300, &r300_rs_block_clear_state); } - BEGIN_CS(36); + BEGIN_CS(21); /* Viewport setup */ OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6); @@ -78,31 +124,13 @@ static void r300_surface_fill(struct pipe_context* pipe, OUT_CS_32F(1.0); OUT_CS_32F(0.0); - /* Pixel scissors */ - OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); - OUT_CS((x << R300_SCISSORS_X_SHIFT) | (y << R300_SCISSORS_Y_SHIFT)); - OUT_CS((w << R300_SCISSORS_X_SHIFT) | (h << R300_SCISSORS_Y_SHIFT)); - /* The size of the point we're about to draw, in sixths of pixels */ OUT_CS_REG(R300_GA_POINT_SIZE, ((h * 6) & R300_POINTSIZE_Y_MASK) | ((w * 6) << R300_POINTSIZE_X_SHIFT)); - /* Flush colorbuffer and blend caches. */ - OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, - R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D | - R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_SIGNAL); - OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT, - R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | - R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); - - OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0, 1); - OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); - OUT_CS_REG(R300_RB3D_COLORPITCH0, pixpitch | - r300_translate_colorformat(tex->tex.format)); - OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0x0000000F); - /* XXX Packet3 */ - OUT_CS(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8)); + /* Packet3 with our point vertex */ + OUT_CS_PKT3(R200_3D_DRAW_IMMD_2, 8); OUT_CS(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING | (1 << R300_PRIM_NUM_VERTICES_SHIFT)); OUT_CS_32F(w / 2.0); @@ -143,47 +171,14 @@ static void r300_surface_copy(struct pipe_context* pipe, " dimensions %dx%d (pixel pitch %d)\n", src, srcx, srcy, dest, destx, desty, w, h, pixpitch); + /* if ((srctex == desttex) && + ((destx < srcx + w) || (srcx < destx + w)) && + ((desty < srcy + h) || (srcy < destx + h))) { */ if (TRUE) { debug_printf("r300: Falling back on surface_copy\n"); return util_surface_copy(pipe, FALSE, dest, destx, desty, src, srcx, srcy, w, h); } -#if 0 - BEGIN_CS(); - OUT_CS_REG(RADEON_DEFAULT_SC_BOTTOM_RIGHT,(RADEON_DEFAULT_SC_RIGHT_MAX | - RADEON_DEFAULT_SC_BOTTOM_MAX)); - OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL, (RADEON_GMC_DST_PITCH_OFFSET_CNTL | - RADEON_GMC_SRC_PITCH_OFFSET_CNTL | - RADEON_GMC_BRUSH_NONE | - (datatype << 8) | - RADEON_GMC_SRC_DATATYPE_COLOR | - RADEON_ROP[rop].rop | - RADEON_DP_SRC_SOURCE_MEMORY | - RADEON_GMC_CLR_CMP_CNTL_DIS)); - OUT_CS_REG(RADEON_DP_BRUSH_FRGD_CLR, 0xffffffff); - OUT_CS_REG(RADEON_DP_BRUSH_BKGD_CLR, 0x0); - OUT_CS_REG(RADEON_DP_SRC_FRGD_CLR, 0xffffffff); - OUT_CS_REG(RADEON_DP_SRC_BKGD_CLR, 0x0); - OUT_ACCEL_REG(RADEON_DP_WRITE_MASK, planemask); - OUT_ACCEL_REG(RADEON_DP_CNTL, ((info->accel_state->xdir >= 0 ? RADEON_DST_X_LEFT_TO_RIGHT : 0) | - (info->accel_state->ydir >= 0 ? RADEON_DST_Y_TOP_TO_BOTTOM : 0)); -); - - OUT_CS_REG_SEQ(RADEON_DST_PITCH_OFFSET, 1); - OUT_CS_RELOC(desttex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); - - OUT_CS_REG_SEQ(RADEON_SRC_PITCH_OFFSET, 1); - OUT_CS_RELOC(srctex->buffer, 0, - RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0, 0); - - OUT_CS_REG(RADEON_SRC_Y_X, (srcy << 16) | srcx); - OUT_CS_REG(RADEON_DST_Y_X, (desty << 16) | destx); - OUT_CS_REG(RADEON_DST_HEIGHT_WIDTH, (h << 16) | w); - OUT_CS_REG(RADEON_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH_ALL); - OUT_CS_REG(RADEON_WAIT_UNTIL, - RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_DMA_GUI_IDLE); - END_CS; -#endif } void r300_init_surface_functions(struct r300_context* r300) diff --git a/src/gallium/drivers/r300/r300_swtcl_emit.c b/src/gallium/drivers/r300/r300_swtcl_emit.c index 3db09514c6..83c25f496b 100644 --- a/src/gallium/drivers/r300/r300_swtcl_emit.c +++ b/src/gallium/drivers/r300/r300_swtcl_emit.c @@ -66,7 +66,7 @@ r300_swtcl_render_get_vertex_info(struct vbuf_render* render) r300_update_derived_state(r300); - return &r300->vertex_info; + return &r300->vertex_info.vinfo; } static boolean r300_swtcl_render_allocate_vertices(struct vbuf_render* render, @@ -177,7 +177,6 @@ static boolean r300_swtcl_render_set_primitive(struct vbuf_render* render, static void prepare_render(struct r300_swtcl_render* render, unsigned count) { struct r300_context* r300 = render->r300; - int i; CS_LOCALS(r300); @@ -195,7 +194,7 @@ static void prepare_render(struct r300_swtcl_render* render, unsigned count) * VBPNTR [relocated BO] */ BEGIN_CS(7); - OUT_CS(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 3)); + OUT_CS_PKT3(R300_PACKET3_3D_LOAD_VBPNTR, 3); OUT_CS(1); OUT_CS(r300->vertex_info.vinfo.size | (r300->vertex_info.vinfo.size << 8)); @@ -210,7 +209,6 @@ static void r300_swtcl_render_draw_arrays(struct vbuf_render* render, { struct r300_swtcl_render* r300render = r300_swtcl_render(render); struct r300_context* r300 = r300render->r300; - struct pipe_screen* screen = r300->context.screen; CS_LOCALS(r300); @@ -221,7 +219,7 @@ static void r300_swtcl_render_draw_arrays(struct vbuf_render* render, debug_printf("r300: Doing vbuf render, count %d\n", count); BEGIN_CS(2); - OUT_CS(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0)); + OUT_CS_PKT3(R300_PACKET3_3D_DRAW_VBUF_2, 0); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) | r300render->hwprim); END_CS; @@ -239,34 +237,31 @@ static void r300_swtcl_render_draw(struct vbuf_render* render, CS_LOCALS(r300); - count /= 4; - prepare_render(r300render, count); /* Send our indices into an index buffer. */ index_buffer = pipe_buffer_create(screen, 64, PIPE_BUFFER_USAGE_VERTEX, - count * 4); + count); if (!index_buffer) { return; } index_map = pipe_buffer_map(screen, index_buffer, PIPE_BUFFER_USAGE_CPU_WRITE); - memcpy(index_map, indices, count * 4); + memcpy(index_map, indices, count); pipe_buffer_unmap(screen, index_buffer); debug_printf("r300: Doing indexbuf render, count %d\n", count); -#if 0 + BEGIN_CS(5); - OUT_CS(CP_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0)); + OUT_CS_PKT3(R300_PACKET3_3D_DRAW_INDX_2, 0); OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | r300render->hwprim | R300_VAP_VF_CNTL__INDEX_SIZE_32bit); - OUT_CS(CP_PACKET3(R300_PACKET3_INDX_BUFFER, 2)); + OUT_CS_PKT3(R300_PACKET3_INDX_BUFFER, 2); OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2)); OUT_CS_RELOC(index_buffer, 0, RADEON_GEM_DOMAIN_GTT, 0, 0); END_CS; -#endif } static void r300_swtcl_render_destroy(struct vbuf_render* render) @@ -277,7 +272,6 @@ static void r300_swtcl_render_destroy(struct vbuf_render* render) static struct vbuf_render* r300_swtcl_render_create(struct r300_context* r300) { struct r300_swtcl_render* r300render = CALLOC_STRUCT(r300_swtcl_render); - struct pipe_screen* screen = r300->context.screen; r300render->r300 = r300; @@ -295,19 +289,6 @@ static struct vbuf_render* r300_swtcl_render_create(struct r300_context* r300) r300render->base.release_vertices = r300_swtcl_render_release_vertices; r300render->base.destroy = r300_swtcl_render_destroy; - /* XXX bonghits ahead - r300render->vbo_alloc_size = 128 * 4096; - r300render->vbo_size = r300render->vbo_alloc_size; - r300render->vbo_offset = 0; - r300render->vbo = pipe_buffer_create(screen, - 64, - PIPE_BUFFER_USAGE_VERTEX, - r300render->vbo_size); - r300render->vbo_map = pipe_buffer_map(screen, - r300render->vbo, - PIPE_BUFFER_USAGE_CPU_WRITE); - pipe_buffer_unmap(screen, r300render->vbo); */ - return &r300render->base; } diff --git a/src/gallium/drivers/softpipe/sp_quad_stipple.c b/src/gallium/drivers/softpipe/sp_quad_stipple.c index 05e862f097..07162db7b6 100644 --- a/src/gallium/drivers/softpipe/sp_quad_stipple.c +++ b/src/gallium/drivers/softpipe/sp_quad_stipple.c @@ -22,21 +22,11 @@ stipple_quad(struct quad_stage *qs, struct quad_header *quad) if (quad->input.prim == QUAD_PRIM_TRI) { struct softpipe_context *softpipe = qs->softpipe; /* need to invert Y to index into OpenGL's stipple pattern */ - int y0, y1; - uint stipple0, stipple1; const int col0 = quad->input.x0 % 32; - - if (softpipe->rasterizer->origin_lower_left) { - y0 = softpipe->framebuffer.height - 1 - quad->input.y0; - y1 = y0 - 1; - } - else { - y0 = quad->input.y0; - y1 = y0 + 1; - } - - stipple0 = softpipe->poly_stipple.stipple[y0 % 32]; - stipple1 = softpipe->poly_stipple.stipple[y1 % 32]; + const int y0 = quad->input.y0; + const int y1 = y0 + 1; + const uint stipple0 = softpipe->poly_stipple.stipple[y0 % 32]; + const uint stipple1 = softpipe->poly_stipple.stipple[y1 % 32]; /* turn off quad mask bits that fail the stipple test */ if ((stipple0 & (bit31 >> col0)) == 0) diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 0925653b5d..96cb09b905 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -732,18 +732,9 @@ setup_fragcoord_coeff(struct setup_context *setup, uint slot) setup->coef[slot].dadx[0] = 1.0; setup->coef[slot].dady[0] = 0.0; /*Y*/ - if (setup->softpipe->rasterizer->origin_lower_left) { - /* y=0=bottom */ - const int winHeight = setup->softpipe->framebuffer.height; - setup->coef[slot].a0[1] = (float) (winHeight - 1); - setup->coef[slot].dady[1] = -1.0; - } - else { - /* y=0=top */ - setup->coef[slot].a0[1] = 0.0; - setup->coef[slot].dady[1] = 1.0; - } + setup->coef[slot].a0[1] = 0.0; setup->coef[slot].dadx[1] = 0.0; + setup->coef[slot].dady[1] = 1.0; /*Z*/ setup->coef[slot].a0[2] = setup->posCoef.a0[2]; setup->coef[slot].dadx[2] = setup->posCoef.dadx[2]; diff --git a/src/gallium/drivers/trace/tr_state.c b/src/gallium/drivers/trace/tr_state.c index b6a1ce0d62..f9fbe9aee7 100644 --- a/src/gallium/drivers/trace/tr_state.c +++ b/src/gallium/drivers/trace/tr_state.c @@ -123,7 +123,6 @@ void trace_dump_rasterizer_state(const struct pipe_rasterizer_state *state) trace_dump_member(uint, state, line_stipple_pattern); trace_dump_member(bool, state, line_last_pixel); trace_dump_member(bool, state, bypass_vs_clip_and_viewport); - trace_dump_member(bool, state, origin_lower_left); trace_dump_member(bool, state, flatshade_first); trace_dump_member(bool, state, gl_rasterization_rules); diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h index ed3a026023..ceac755e71 100644 --- a/src/gallium/include/pipe/p_screen.h +++ b/src/gallium/include/pipe/p_screen.h @@ -223,6 +223,13 @@ struct pipe_screen { * specified to buffer_map_range. This is different from the * ARB_map_buffer_range semantics because we don't forbid multiple mappings * of the same buffer (yet). + * + * If the buffer was mapped for writing and no buffer_flush_mapped_range + * call was done until the buffer_unmap is called then the pipe driver will + * assumed that the whole buffer was written. This is for backward + * compatibility purposes and may affect performance -- the state tracker + * should always specify exactly what got written while the buffer was + * mapped. */ void (*buffer_flush_mapped_range)( struct pipe_screen *screen, struct pipe_buffer *buf, diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index aad41fab11..9c7baa3d92 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -117,7 +117,6 @@ struct pipe_rasterizer_state */ unsigned bypass_vs_clip_and_viewport:1; - unsigned origin_lower_left:1; /**< Is (0,0) the lower-left corner? */ unsigned flatshade_first:1; /**< take color attribute from the first vertex of a primitive */ unsigned gl_rasterization_rules:1; /**< enable tweaks for GL rasterization? */ diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c index 1d8ad0b046..5cfd233c4c 100644 --- a/src/gallium/state_trackers/g3dvl/vl_context.c +++ b/src/gallium/state_trackers/g3dvl/vl_context.c @@ -42,7 +42,6 @@ static int vlInitCommon(struct vlContext *context) rast.line_stipple_pattern = 0; rast.line_last_pixel = 0; rast.bypass_vs_clip_and_viewport = 0; - rast.origin_lower_left = 0; rast.line_width = 1; rast.point_smooth = 0; rast.point_size = 1; diff --git a/src/gallium/state_trackers/wgl/SConscript b/src/gallium/state_trackers/wgl/SConscript index 2141b02d68..038a7a31b3 100644 --- a/src/gallium/state_trackers/wgl/SConscript +++ b/src/gallium/state_trackers/wgl/SConscript @@ -30,6 +30,7 @@ if env['platform'] in ['windows']: 'shared/stw_arbextensionsstring.c', 'shared/stw_getprocaddress.c', 'shared/stw_arbpixelformat.c', + 'shared/stw_tls.c', ] wgl = env.ConvenienceLibrary( diff --git a/src/gallium/state_trackers/wgl/shared/stw_context.c b/src/gallium/state_trackers/wgl/shared/stw_context.c index d77daac39c..89df8b0a2a 100644 --- a/src/gallium/state_trackers/wgl/shared/stw_context.c +++ b/src/gallium/state_trackers/wgl/shared/stw_context.c @@ -39,9 +39,7 @@ #include "shared/stw_pixelformat.h" #include "stw_public.h" #include "stw_context.h" - -static HDC current_hdc = NULL; -static UINT_PTR current_hglrc = 0; +#include "stw_tls.h" BOOL stw_copy_context( @@ -137,17 +135,7 @@ stw_create_layer_context( pipe_mutex_lock( stw_dev->mutex ); { - UINT_PTR i; - - for (i = 0; i < STW_CONTEXT_MAX; i++) { - if (stw_dev->ctx_array[i].ctx == NULL) { - /* success: - */ - stw_dev->ctx_array[i].ctx = ctx; - hglrc = i + 1; - break; - } - } + hglrc = handle_table_add(stw_dev->ctx_table, ctx); } pipe_mutex_unlock( stw_dev->mutex ); @@ -197,12 +185,14 @@ stw_delete_context( if (WindowFromDC( ctx->hdc ) != NULL) ReleaseDC( WindowFromDC( ctx->hdc ), ctx->hdc ); - st_destroy_context( ctx->st ); + pipe_mutex_lock(stw_dev->mutex); + { + st_destroy_context(ctx->st); + FREE(ctx); + handle_table_remove(stw_dev->ctx_table, hglrc); + } + pipe_mutex_unlock(stw_dev->mutex); - FREE( ctx ); - - stw_dev->ctx_array[hglrc - 1].ctx = NULL; - ret = TRUE; } @@ -264,13 +254,13 @@ get_window_size( HDC hdc, GLuint *width, GLuint *height ) UINT_PTR stw_get_current_context( void ) { - return current_hglrc; + return stw_tls_get_data()->currentGLRC; } HDC stw_get_current_dc( void ) { - return current_hdc; + return stw_tls_get_data()->currentDC; } BOOL @@ -291,12 +281,9 @@ stw_make_current( pipe_mutex_lock( stw_dev->mutex ); ctx = stw_lookup_context( hglrc ); pipe_mutex_unlock( stw_dev->mutex ); - - if (ctx == NULL) - return FALSE; - current_hdc = hdc; - current_hglrc = hglrc; + stw_tls_get_data()->currentDC = hdc; + stw_tls_get_data()->currentGLRC = hglrc; if (glcurctx != NULL) { curctx = (struct stw_context *) glcurctx->DriverCtx; diff --git a/src/gallium/state_trackers/wgl/shared/stw_device.c b/src/gallium/state_trackers/wgl/shared/stw_device.c index 0dca856d73..3c1eb1ad39 100644 --- a/src/gallium/state_trackers/wgl/shared/stw_device.c +++ b/src/gallium/state_trackers/wgl/shared/stw_device.c @@ -35,6 +35,7 @@ #include "shared/stw_winsys.h" #include "shared/stw_pixelformat.h" #include "shared/stw_public.h" +#include "shared/stw_tls.h" #ifdef WIN32_THREADS extern _glthread_Mutex OneTimeLock; @@ -70,6 +71,8 @@ st_init(const struct stw_winsys *stw_winsys) assert(!stw_dev); + stw_tls_init(); + stw_dev = &stw_dev_storage; memset(stw_dev, 0, sizeof(*stw_dev)); @@ -91,6 +94,11 @@ st_init(const struct stw_winsys *stw_winsys) pipe_mutex_init( stw_dev->mutex ); + stw_dev->ctx_table = handle_table_create(); + if (!stw_dev->ctx_table) { + goto error1; + } + pixelformat_init(); return TRUE; @@ -101,6 +109,24 @@ error1: } +boolean +st_init_thread(void) +{ + if (!stw_tls_init_thread()) { + return FALSE; + } + + return TRUE; +} + + +void +st_cleanup_thread(void) +{ + stw_tls_cleanup_thread(); +} + + void st_cleanup(void) { @@ -114,9 +140,12 @@ st_cleanup(void) pipe_mutex_lock( stw_dev->mutex ); { /* Ensure all contexts are destroyed */ - for (i = 0; i < STW_CONTEXT_MAX; i++) - if (stw_dev->ctx_array[i].ctx) - stw_delete_context( i + 1 ); + i = handle_table_get_first_handle(stw_dev->ctx_table); + while (i) { + stw_delete_context(i); + i = handle_table_get_next_handle(stw_dev->ctx_table, i); + } + handle_table_destroy(stw_dev->ctx_table); } pipe_mutex_unlock( stw_dev->mutex ); @@ -133,6 +162,8 @@ st_cleanup(void) debug_memory_end(stw_dev->memdbg_no); #endif + stw_tls_cleanup(); + stw_dev = NULL; } @@ -140,13 +171,12 @@ st_cleanup(void) struct stw_context * stw_lookup_context( UINT_PTR dhglrc ) { - if (dhglrc == 0 || - dhglrc >= STW_CONTEXT_MAX) + if (dhglrc == 0) return NULL; if (stw_dev == NULL) return NULL; - return stw_dev->ctx_array[dhglrc - 1].ctx; + return (struct stw_context *) handle_table_get(stw_dev->ctx_table, dhglrc); } diff --git a/src/gallium/state_trackers/wgl/shared/stw_device.h b/src/gallium/state_trackers/wgl/shared/stw_device.h index 80da14b84f..6a9cee0d02 100644 --- a/src/gallium/state_trackers/wgl/shared/stw_device.h +++ b/src/gallium/state_trackers/wgl/shared/stw_device.h @@ -31,9 +31,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_thread.h" - - -#define STW_CONTEXT_MAX 32 +#include "util/u_handle_table.h" struct pipe_screen; @@ -45,9 +43,7 @@ struct stw_device pipe_mutex mutex; - struct { - struct stw_context *ctx; - } ctx_array[STW_CONTEXT_MAX]; + struct handle_table *ctx_table; #ifdef DEBUG unsigned long memdbg_no; diff --git a/src/gallium/state_trackers/wgl/shared/stw_pixelformat.c b/src/gallium/state_trackers/wgl/shared/stw_pixelformat.c index 2992a1ac0a..b216ca5c82 100644 --- a/src/gallium/state_trackers/wgl/shared/stw_pixelformat.c +++ b/src/gallium/state_trackers/wgl/shared/stw_pixelformat.c @@ -28,6 +28,7 @@ #include "util/u_debug.h" #include "stw_pixelformat.h" #include "stw_public.h" +#include "stw_tls.h" #define MAX_PIXELFORMATS 16 @@ -35,8 +36,6 @@ static struct pixelformat_info pixelformats[MAX_PIXELFORMATS]; static uint pixelformat_count = 0; static uint pixelformat_extended_count = 0; -static uint currentpixelformat = 0; - static void add_standard_pixelformats( @@ -248,7 +247,7 @@ int stw_pixelformat_get( HDC hdc ) { - return currentpixelformat; + return stw_tls_get_data()->currentPixelFormat; } @@ -267,8 +266,8 @@ stw_pixelformat_set( if (index >= count) return FALSE; - currentpixelformat = iPixelFormat; - + stw_tls_get_data()->currentPixelFormat = iPixelFormat; + /* Some applications mistakenly use the undocumented wglSetPixelFormat * function instead of SetPixelFormat, so we call SetPixelFormat here to * avoid opengl32.dll's wglCreateContext to fail */ diff --git a/src/gallium/state_trackers/wgl/shared/stw_tls.c b/src/gallium/state_trackers/wgl/shared/stw_tls.c new file mode 100644 index 0000000000..e72bafb880 --- /dev/null +++ b/src/gallium/state_trackers/wgl/shared/stw_tls.c @@ -0,0 +1,101 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <windows.h> + +#include "pipe/p_compiler.h" +#include "util/u_memory.h" +#include "stw_tls.h" + +static DWORD tlsIndex = TLS_OUT_OF_INDEXES; + +boolean +stw_tls_init(void) +{ + tlsIndex = TlsAlloc(); + if (tlsIndex == TLS_OUT_OF_INDEXES) { + return FALSE; + } + + return TRUE; +} + +boolean +stw_tls_init_thread(void) +{ + struct stw_tls_data *data; + + if (tlsIndex == TLS_OUT_OF_INDEXES) { + return FALSE; + } + + data = MALLOC(sizeof(*data)); + if (!data) { + return FALSE; + } + + data->currentPixelFormat = 0; + data->currentDC = NULL; + data->currentGLRC = 0; + + TlsSetValue(tlsIndex, data); + + return TRUE; +} + +void +stw_tls_cleanup_thread(void) +{ + struct stw_tls_data *data; + + if (tlsIndex == TLS_OUT_OF_INDEXES) { + return; + } + + data = (struct stw_tls_data *) TlsGetValue(tlsIndex); + TlsSetValue(tlsIndex, NULL); + FREE(data); +} + +void +stw_tls_cleanup(void) +{ + if (tlsIndex != TLS_OUT_OF_INDEXES) { + TlsFree(tlsIndex); + tlsIndex = TLS_OUT_OF_INDEXES; + } +} + +struct stw_tls_data * +stw_tls_get_data(void) +{ + if (tlsIndex == TLS_OUT_OF_INDEXES) { + return NULL; + } + + return (struct stw_tls_data *) TlsGetValue(tlsIndex); +} diff --git a/src/gallium/state_trackers/wgl/shared/stw_tls.h b/src/gallium/state_trackers/wgl/shared/stw_tls.h new file mode 100644 index 0000000000..23b61e68ff --- /dev/null +++ b/src/gallium/state_trackers/wgl/shared/stw_tls.h @@ -0,0 +1,53 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef STW_TLS_H +#define STW_TLS_H + +struct stw_tls_data +{ + uint currentPixelFormat; + HDC currentDC; + UINT_PTR currentGLRC; +}; + +boolean +stw_tls_init(void); + +boolean +stw_tls_init_thread(void); + +void +stw_tls_cleanup_thread(void); + +void +stw_tls_cleanup(void); + +struct stw_tls_data * +stw_tls_get_data(void); + +#endif /* STW_TLS_H */ diff --git a/src/gallium/state_trackers/wgl/shared/stw_winsys.h b/src/gallium/state_trackers/wgl/shared/stw_winsys.h index a85a9a2257..e4a1d4f979 100644 --- a/src/gallium/state_trackers/wgl/shared/stw_winsys.h +++ b/src/gallium/state_trackers/wgl/shared/stw_winsys.h @@ -53,6 +53,12 @@ struct stw_winsys boolean st_init(const struct stw_winsys *stw_winsys); +boolean +st_init_thread(void); + +void +st_cleanup_thread(void); + void st_cleanup(void); diff --git a/src/gallium/winsys/gdi/gdi_softpipe_winsys.c b/src/gallium/winsys/gdi/gdi_softpipe_winsys.c index 440666d835..d5d9431865 100644 --- a/src/gallium/winsys/gdi/gdi_softpipe_winsys.c +++ b/src/gallium/winsys/gdi/gdi_softpipe_winsys.c @@ -312,9 +312,20 @@ DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpReserved) { switch (fdwReason) { case DLL_PROCESS_ATTACH: - return st_init(&stw_winsys); + if (!st_init(&stw_winsys)) { + return FALSE; + } + return st_init_thread(); + + case DLL_THREAD_ATTACH: + return st_init_thread(); + + case DLL_THREAD_DETACH: + st_cleanup_thread(); + break; case DLL_PROCESS_DETACH: + st_cleanup_thread(); st_cleanup(); break; } diff --git a/src/glx/x11/glx_pbuffer.c b/src/glx/x11/glx_pbuffer.c index a602cd2881..6bcf965056 100644 --- a/src/glx/x11/glx_pbuffer.c +++ b/src/glx/x11/glx_pbuffer.c @@ -189,6 +189,21 @@ determineTextureTarget(const int *attribs, int numAttribs) return target; } + + +static GLenum +determineTextureFormat(const int *attribs, int numAttribs) +{ + GLenum target = 0; + int i; + + for (i = 0; i < numAttribs; i++) { + if (attribs[2 * i] == GLX_TEXTURE_FORMAT_EXT) + return attribs[2 * i + 1]; + } + + return 0; +} #endif /** @@ -294,6 +309,9 @@ GetDrawableAttribute(Display * dpy, GLXDrawable drawable, if (pdraw != NULL && !pdraw->textureTarget) pdraw->textureTarget = determineTextureTarget((const int *) data, num_attributes); + if (pdraw != NULL && !pdraw->textureFormat) + pdraw->textureFormat = + determineTextureFormat((const int *) data, num_attributes); } #endif @@ -374,6 +392,7 @@ CreateDrawable(Display * dpy, const __GLcontextModes * fbconfig, } pdraw->textureTarget = determineTextureTarget(attrib_list, i); + pdraw->textureFormat = determineTextureFormat(attrib_list, i); } while (0); #endif diff --git a/src/glx/x11/glxclient.h b/src/glx/x11/glxclient.h index caf58bbd44..c42e80a0e8 100644 --- a/src/glx/x11/glxclient.h +++ b/src/glx/x11/glxclient.h @@ -161,6 +161,7 @@ struct __GLXDRIdrawableRec { __GLXscreenConfigs *psc; GLenum textureTarget; __DRIdrawable *driDrawable; + GLenum textureFormat; /* EXT_texture_from_pixmap support */ }; /* diff --git a/src/glx/x11/glxcmds.c b/src/glx/x11/glxcmds.c index fc0e593cb3..e5c0db4c96 100644 --- a/src/glx/x11/glxcmds.c +++ b/src/glx/x11/glxcmds.c @@ -2631,11 +2631,19 @@ static void __glXBindTexImageEXT(Display *dpy, if (gc->driContext) { __GLXDRIdrawable *pdraw = GetGLXDRIDrawable(dpy, drawable, NULL); - if (pdraw != NULL) - (*pdraw->psc->texBuffer->setTexBuffer)(gc->__driContext, - pdraw->textureTarget, - pdraw->driDrawable); - + if (pdraw != NULL) { + if (pdraw->psc->texBuffer->base.version >= 2 && + pdraw->psc->texBuffer->setTexBuffer2 != NULL) { + (*pdraw->psc->texBuffer->setTexBuffer2)(gc->__driContext, + pdraw->textureTarget, + pdraw->textureFormat, + pdraw->driDrawable); + } else { + (*pdraw->psc->texBuffer->setTexBuffer)(gc->__driContext, + pdraw->textureTarget, + pdraw->driDrawable); + } + } return; } #endif diff --git a/src/mesa/drivers/dri/i915/i830_texstate.c b/src/mesa/drivers/dri/i915/i830_texstate.c index c718bb0055..df43b779a7 100644 --- a/src/mesa/drivers/dri/i915/i830_texstate.c +++ b/src/mesa/drivers/dri/i915/i830_texstate.c @@ -38,7 +38,7 @@ static GLuint -translate_texture_format(GLuint mesa_format) +translate_texture_format(GLuint mesa_format, GLuint internal_format) { switch (mesa_format) { case MESA_FORMAT_L8: @@ -56,7 +56,10 @@ translate_texture_format(GLuint mesa_format) case MESA_FORMAT_ARGB4444: return MAPSURF_16BIT | MT_16BIT_ARGB4444; case MESA_FORMAT_ARGB8888: - return MAPSURF_32BIT | MT_32BIT_ARGB8888; + if (internal_format == GL_RGB) + return MAPSURF_32BIT | MT_32BIT_XRGB8888; + else + return MAPSURF_32BIT | MT_32BIT_ARGB8888; case MESA_FORMAT_YCBCR_REV: return (MAPSURF_422 | MT_422_YCRCB_NORMAL); case MESA_FORMAT_YCBCR: @@ -162,7 +165,8 @@ i830_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) 0, intelObj-> firstLevel); - format = translate_texture_format(firstImage->TexFormat->MesaFormat); + format = translate_texture_format(firstImage->TexFormat->MesaFormat, + firstImage->InternalFormat); pitch = intelObj->mt->pitch * intelObj->mt->cpp; } diff --git a/src/mesa/drivers/dri/i915/i915_texstate.c b/src/mesa/drivers/dri/i915/i915_texstate.c index adbb52a3a3..6d25f8dd8e 100644 --- a/src/mesa/drivers/dri/i915/i915_texstate.c +++ b/src/mesa/drivers/dri/i915/i915_texstate.c @@ -37,7 +37,8 @@ static GLuint -translate_texture_format(GLuint mesa_format, GLenum DepthMode) +translate_texture_format(GLuint mesa_format, GLuint internal_format, + GLenum DepthMode) { switch (mesa_format) { case MESA_FORMAT_L8: @@ -55,7 +56,10 @@ translate_texture_format(GLuint mesa_format, GLenum DepthMode) case MESA_FORMAT_ARGB4444: return MAPSURF_16BIT | MT_16BIT_ARGB4444; case MESA_FORMAT_ARGB8888: - return MAPSURF_32BIT | MT_32BIT_ARGB8888; + if (internal_format == GL_RGB) + return MAPSURF_32BIT | MT_32BIT_XRGB8888; + else + return MAPSURF_32BIT | MT_32BIT_ARGB8888; case MESA_FORMAT_YCBCR_REV: return (MAPSURF_422 | MT_422_YCRCB_NORMAL); case MESA_FORMAT_YCBCR: @@ -173,7 +177,8 @@ i915_update_tex_unit(struct intel_context *intel, GLuint unit, GLuint ss3) firstLevel); format = translate_texture_format(firstImage->TexFormat->MesaFormat, - tObj->DepthMode); + firstImage->InternalFormat, + tObj->DepthMode); pitch = intelObj->mt->pitch * intelObj->mt->cpp; } diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 9b320480b6..e6113eff87 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -69,7 +69,8 @@ static GLuint translate_tex_target( GLenum target ) } -static GLuint translate_tex_format( GLuint mesa_format, GLenum depth_mode ) +static GLuint translate_tex_format( GLuint mesa_format, GLenum internal_format, + GLenum depth_mode ) { switch( mesa_format ) { case MESA_FORMAT_L8: @@ -89,10 +90,16 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum depth_mode ) return BRW_SURFACEFORMAT_R8G8B8_UNORM; case MESA_FORMAT_ARGB8888: - return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; + if (internal_format == GL_RGB) + return BRW_SURFACEFORMAT_B8G8R8X8_UNORM; + else + return BRW_SURFACEFORMAT_B8G8R8A8_UNORM; case MESA_FORMAT_RGBA8888_REV: - return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; + if (internal_format == GL_RGB) + return BRW_SURFACEFORMAT_R8G8B8X8_UNORM; + else + return BRW_SURFACEFORMAT_R8G8B8A8_UNORM; case MESA_FORMAT_RGB565: return BRW_SURFACEFORMAT_B5G6R5_UNORM; @@ -161,7 +168,7 @@ static GLuint translate_tex_format( GLuint mesa_format, GLenum depth_mode ) struct brw_wm_surface_key { GLenum target, depthmode; dri_bo *bo; - GLint format; + GLint format, internal_format; GLint first_level, last_level; GLint width, height, depth; GLint pitch, cpp; @@ -199,9 +206,11 @@ brw_create_texture_surface( struct brw_context *brw, surf.ss0.mipmap_layout_mode = BRW_SURFACE_MIPMAPLAYOUT_BELOW; surf.ss0.surface_type = translate_tex_target(key->target); - - if (key->bo) - surf.ss0.surface_format = translate_tex_format(key->format, key->depthmode); + if (key->bo) { + surf.ss0.surface_format = translate_tex_format(key->format, + key->internal_format, + key->depthmode); + } else { switch (key->depth) { case 32: @@ -278,6 +287,7 @@ brw_update_texture_surface( GLcontext *ctx, GLuint unit ) key.offset = intelObj->textureOffset; } else { key.format = firstImage->TexFormat->MesaFormat; + key.internal_format = firstImage->InternalFormat; key.pitch = intelObj->mt->pitch; key.depth = firstImage->Depth; key.bo = intelObj->mt->region->buffer; diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index e8c074712c..d20ea15187 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -211,6 +211,7 @@ static const __DRItexOffsetExtension intelTexOffsetExtension = { static const __DRItexBufferExtension intelTexBufferExtension = { { __DRI_TEX_BUFFER, __DRI_TEX_BUFFER_VERSION }, intelSetTexBuffer, + intelSetTexBuffer2, }; static const __DRIextension *intelScreenExtensions[] = { diff --git a/src/mesa/drivers/dri/intel/intel_tex.h b/src/mesa/drivers/dri/intel/intel_tex.h index 742ccc043a..f5372d82fb 100644 --- a/src/mesa/drivers/dri/intel/intel_tex.h +++ b/src/mesa/drivers/dri/intel/intel_tex.h @@ -149,6 +149,8 @@ void intelSetTexOffset(__DRIcontext *pDRICtx, GLint texname, unsigned long long offset, GLint depth, GLuint pitch); void intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *pDraw); +void intelSetTexBuffer2(__DRIcontext *pDRICtx, + GLint target, GLint format, __DRIdrawable *pDraw); GLuint intel_finalize_mipmap_tree(struct intel_context *intel, GLuint unit); diff --git a/src/mesa/drivers/dri/intel/intel_tex_image.c b/src/mesa/drivers/dri/intel/intel_tex_image.c index 943636c37b..e902187637 100644 --- a/src/mesa/drivers/dri/intel/intel_tex_image.c +++ b/src/mesa/drivers/dri/intel/intel_tex_image.c @@ -714,7 +714,9 @@ intelSetTexOffset(__DRIcontext *pDRICtx, GLint texname, } void -intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv) +intelSetTexBuffer2(__DRIcontext *pDRICtx, GLint target, + GLint glx_texture_format, + __DRIdrawable *dPriv) { struct intel_framebuffer *intel_fb = dPriv->driverPrivate; struct intel_context *intel = pDRICtx->driverPrivate; @@ -745,7 +747,10 @@ intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv) type = GL_BGRA; format = GL_UNSIGNED_BYTE; - internalFormat = (rb->region->cpp == 3 ? 3 : 4); + if (glx_texture_format == GLX_TEXTURE_FORMAT_RGB_EXT) + internalFormat = GL_RGB; + else + internalFormat = GL_RGBA; mt = intel_miptree_create_for_region(intel, target, internalFormat, @@ -785,3 +790,12 @@ intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv) _mesa_unlock_texture(&intel->ctx, texObj); } + +void +intelSetTexBuffer(__DRIcontext *pDRICtx, GLint target, __DRIdrawable *dPriv) +{ + /* The old interface didn't have the format argument, so copy our + * implementation's behavior at the time. + */ + intelSetTexBuffer2(pDRICtx, target, GLX_TEXTURE_FORMAT_RGBA_EXT, dPriv); +} diff --git a/src/mesa/shader/prog_optimize.c b/src/mesa/shader/prog_optimize.c index ec06da141d..458a69f70b 100644 --- a/src/mesa/shader/prog_optimize.c +++ b/src/mesa/shader/prog_optimize.c @@ -33,6 +33,9 @@ #include "prog_print.h" +#define MAX_LOOP_NESTING 50 + + static GLboolean dbg = GL_FALSE; @@ -76,6 +79,37 @@ remove_instructions(struct gl_program *prog, const GLboolean *removeFlags) /** + * Remap register indexes according to map. + * \param prog the program to search/replace + * \param file the type of register file to search/replace + * \param map maps old register indexes to new indexes + */ +static void +replace_regs(struct gl_program *prog, gl_register_file file, const GLint map[]) +{ + GLuint i; + + for (i = 0; i < prog->NumInstructions; i++) { + struct prog_instruction *inst = prog->Instructions + i; + const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); + GLuint j; + for (j = 0; j < numSrc; j++) { + if (inst->SrcReg[j].File == file) { + GLuint index = inst->SrcReg[j].Index; + ASSERT(map[index] >= 0); + inst->SrcReg[j].Index = map[index]; + } + } + if (inst->DstReg.File == file) { + const GLuint index = inst->DstReg.Index; + ASSERT(map[index] >= 0); + inst->DstReg.Index = map[index]; + } + } +} + + +/** * Consolidate temporary registers to use low numbers. For example, if the * shader only uses temps 4, 5, 8, replace them with 0, 1, 2. */ @@ -83,7 +117,7 @@ static void _mesa_consolidate_registers(struct gl_program *prog) { GLboolean tempUsed[MAX_PROGRAM_TEMPS]; - GLuint tempMap[MAX_PROGRAM_TEMPS]; + GLint tempMap[MAX_PROGRAM_TEMPS]; GLuint tempMax = 0, i; if (dbg) { @@ -92,6 +126,10 @@ _mesa_consolidate_registers(struct gl_program *prog) memset(tempUsed, 0, sizeof(tempUsed)); + for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { + tempMap[i] = -1; + } + /* set tempUsed[i] if temporary [i] is referenced */ for (i = 0; i < prog->NumInstructions; i++) { const struct prog_instruction *inst = prog->Instructions + i; @@ -132,26 +170,8 @@ _mesa_consolidate_registers(struct gl_program *prog) } } - /* now replace occurances of old temp indexes with new indexes */ - for (i = 0; i < prog->NumInstructions; i++) { - struct prog_instruction *inst = prog->Instructions + i; - const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); - GLuint j; - for (j = 0; j < numSrc; j++) { - if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { - GLuint index = inst->SrcReg[j].Index; - assert(index <= tempMax); - assert(tempUsed[index]); - inst->SrcReg[j].Index = tempMap[index]; - } - } - if (inst->DstReg.File == PROGRAM_TEMPORARY) { - const GLuint index = inst->DstReg.Index; - assert(tempUsed[index]); - assert(index <= tempMax); - inst->DstReg.Index = tempMap[index]; - } - } + replace_regs(prog, PROGRAM_TEMPORARY, tempMap); + if (dbg) { _mesa_printf("Optimize: End register consolidation\n"); } @@ -409,6 +429,370 @@ _mesa_remove_extra_moves(struct gl_program *prog) } +/** A live register interval */ +struct interval +{ + GLuint Reg; /** The temporary register index */ + GLuint Start, End; /** Start/end instruction numbers */ +}; + + +/** A list of register intervals */ +struct interval_list +{ + GLuint Num; + struct interval Intervals[MAX_PROGRAM_TEMPS]; +}; + + +static void +append_interval(struct interval_list *list, const struct interval *inv) +{ + list->Intervals[list->Num++] = *inv; +} + + +/** Insert interval inv into list, sorted by interval end */ +static void +insert_interval_by_end(struct interval_list *list, const struct interval *inv) +{ + /* XXX we could do a binary search insertion here since list is sorted */ + GLint i = list->Num - 1; + while (i >= 0 && list->Intervals[i].End > inv->End) { + list->Intervals[i + 1] = list->Intervals[i]; + i--; + } + list->Intervals[i + 1] = *inv; + list->Num++; + +#ifdef DEBUG + { + GLuint i; + for (i = 0; i + 1 < list->Num; i++) { + ASSERT(list->Intervals[i].End <= list->Intervals[i + 1].End); + } + } +#endif +} + + +/** Remove the given interval from the interval list */ +static void +remove_interval(struct interval_list *list, const struct interval *inv) +{ + /* XXX we could binary search since list is sorted */ + GLuint k; + for (k = 0; k < list->Num; k++) { + if (list->Intervals[k].Reg == inv->Reg) { + /* found, remove it */ + ASSERT(list->Intervals[k].Start == inv->Start); + ASSERT(list->Intervals[k].End == inv->End); + while (k < list->Num - 1) { + list->Intervals[k] = list->Intervals[k + 1]; + k++; + } + list->Num--; + return; + } + } +} + + +/** called by qsort() */ +static int +compare_start(const void *a, const void *b) +{ + const struct interval *ia = (const struct interval *) a; + const struct interval *ib = (const struct interval *) b; + if (ia->Start < ib->Start) + return -1; + else if (ia->Start > ib->Start) + return +1; + else + return 0; +} + +/** sort the interval list according to interval starts */ +static void +sort_interval_list_by_start(struct interval_list *list) +{ + qsort(list->Intervals, list->Num, sizeof(struct interval), compare_start); +#ifdef DEBUG + { + GLuint i; + for (i = 0; i + 1 < list->Num; i++) { + ASSERT(list->Intervals[i].Start <= list->Intervals[i + 1].Start); + } + } +#endif +} + + +/** + * Update the intermediate interval info for register 'index' and + * instruction 'ic'. + */ +static void +update_interval(GLint intBegin[], GLint intEnd[], GLuint index, GLuint ic) +{ + ASSERT(index < MAX_PROGRAM_TEMPS); + if (intBegin[index] == -1) { + ASSERT(intEnd[index] == -1); + intBegin[index] = intEnd[index] = ic; + } + else { + intEnd[index] = ic; + } +} + + +/** + * Find the live intervals for each temporary register in the program. + * For register R, the interval [A,B] indicates that R is referenced + * from instruction A through instruction B. + * Special consideration is needed for loops and subroutines. + * \return GL_TRUE if success, GL_FALSE if we cannot proceed for some reason + */ +static GLboolean +find_live_intervals(struct gl_program *prog, + struct interval_list *liveIntervals) +{ + struct loop_info + { + GLuint Start, End; /**< Start, end instructions of loop */ + }; + struct loop_info loopStack[MAX_LOOP_NESTING]; + GLuint loopStackDepth = 0; + GLint intBegin[MAX_PROGRAM_TEMPS], intEnd[MAX_PROGRAM_TEMPS]; + GLuint i; + + /* + * Note: we'll return GL_FALSE below if we find relative indexing + * into the TEMP register file. We can't handle that yet. + * We also give up on subroutines for now. + */ + + if (dbg) { + _mesa_printf("Optimize: Begin find intervals\n"); + } + + for (i = 0; i < MAX_PROGRAM_TEMPS; i++){ + intBegin[i] = intEnd[i] = -1; + } + + /* Scan instructions looking for temporary registers */ + for (i = 0; i < prog->NumInstructions; i++) { + const struct prog_instruction *inst = prog->Instructions + i; + if (inst->Opcode == OPCODE_BGNLOOP) { + loopStack[loopStackDepth].Start = i; + loopStack[loopStackDepth].End = inst->BranchTarget; + loopStackDepth++; + } + else if (inst->Opcode == OPCODE_ENDLOOP) { + loopStackDepth--; + } + else if (inst->Opcode == OPCODE_CAL) { + return GL_FALSE; + } + else { + const GLuint numSrc = _mesa_num_inst_src_regs(inst->Opcode); + GLuint j; + for (j = 0; j < numSrc; j++) { + if (inst->SrcReg[j].File == PROGRAM_TEMPORARY) { + const GLuint index = inst->SrcReg[j].Index; + if (inst->SrcReg[j].RelAddr) + return GL_FALSE; + update_interval(intBegin, intEnd, index, i); + if (loopStackDepth > 0) { + /* extend temp register's interval to end of loop */ + GLuint loopEnd = loopStack[loopStackDepth - 1].End; + update_interval(intBegin, intEnd, index, loopEnd); + } + } + } + if (inst->DstReg.File == PROGRAM_TEMPORARY) { + const GLuint index = inst->DstReg.Index; + if (inst->DstReg.RelAddr) + return GL_FALSE; + update_interval(intBegin, intEnd, index, i); + if (loopStackDepth > 0) { + /* extend temp register's interval to end of loop */ + GLuint loopEnd = loopStack[loopStackDepth - 1].End; + update_interval(intBegin, intEnd, index, loopEnd); + } + } + } + } + + /* Build live intervals list from intermediate arrays */ + liveIntervals->Num = 0; + for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { + if (intBegin[i] >= 0) { + struct interval inv; + inv.Reg = i; + inv.Start = intBegin[i]; + inv.End = intEnd[i]; + append_interval(liveIntervals, &inv); + } + } + + /* Sort the list according to interval starts */ + sort_interval_list_by_start(liveIntervals); + + if (dbg) { + /* print interval info */ + for (i = 0; i < liveIntervals->Num; i++) { + const struct interval *inv = liveIntervals->Intervals + i; + _mesa_printf("Reg[%d] live [%d, %d]:", + inv->Reg, inv->Start, inv->End); + if (1) { + int j; + for (j = 0; j < inv->Start; j++) + _mesa_printf(" "); + for (j = inv->Start; j <= inv->End; j++) + _mesa_printf("x"); + } + _mesa_printf("\n"); + } + } + + return GL_TRUE; +} + + +static GLuint +alloc_register(GLboolean usedRegs[MAX_PROGRAM_TEMPS]) +{ + GLuint k; + for (k = 0; k < MAX_PROGRAM_TEMPS; k++) { + if (!usedRegs[k]) { + usedRegs[k] = GL_TRUE; + return k; + } + } + return MAX_PROGRAM_TEMPS; +} + + +/** + * This function implements "Linear Scan Register Allocation" to reduce + * the number of temporary registers used by the program. + * + * We compute the "live interval" for all temporary registers then + * examine the overlap of the intervals to allocate new registers. + * Basically, if two intervals do not overlap, they can use the same register. + */ +static void +_mesa_reallocate_registers(struct gl_program *prog) +{ + struct interval_list liveIntervals; + GLint registerMap[MAX_PROGRAM_TEMPS]; + GLboolean usedRegs[MAX_PROGRAM_TEMPS]; + GLuint i; + GLuint maxTemp = 0; + + if (dbg) { + _mesa_printf("Optimize: Begin live-interval register reallocation\n"); + _mesa_print_program(prog); + } + + for (i = 0; i < MAX_PROGRAM_TEMPS; i++){ + registerMap[i] = -1; + usedRegs[i] = GL_FALSE; + } + + if (!find_live_intervals(prog, &liveIntervals)) { + if (dbg) + _mesa_printf("Aborting register reallocation\n"); + return; + } + + { + struct interval_list activeIntervals; + activeIntervals.Num = 0; + + /* loop over live intervals, allocating a new register for each */ + for (i = 0; i < liveIntervals.Num; i++) { + const struct interval *live = liveIntervals.Intervals + i; + + if (dbg) + _mesa_printf("Consider register %u\n", live->Reg); + + /* Expire old intervals. Intervals which have ended with respect + * to the live interval can have their remapped registers freed. + */ + { + GLint j; + for (j = 0; j < activeIntervals.Num; j++) { + const struct interval *inv = activeIntervals.Intervals + j; + if (inv->End >= live->Start) { + /* Stop now. Since the activeInterval list is sorted + * we know we don't have to go further. + */ + break; + } + else { + /* Interval 'inv' has expired */ + const GLint regNew = registerMap[inv->Reg]; + ASSERT(regNew >= 0); + + if (dbg) + _mesa_printf(" expire interval for reg %u\n", inv->Reg); + + /* remove interval j from active list */ + remove_interval(&activeIntervals, inv); + j--; /* counter-act j++ in for-loop above */ + + /* return register regNew to the free pool */ + if (dbg) + _mesa_printf(" free reg %d\n", regNew); + ASSERT(usedRegs[regNew] == GL_TRUE); + usedRegs[regNew] = GL_FALSE; + } + } + } + + /* find a free register for this live interval */ + { + const GLuint k = alloc_register(usedRegs); + if (k == MAX_PROGRAM_TEMPS) { + /* out of registers, give up */ + return; + } + registerMap[live->Reg] = k; + maxTemp = MAX2(maxTemp, k); + if (dbg) + _mesa_printf(" remap register %d -> %d\n", live->Reg, k); + } + + /* Insert this live interval into the active list which is sorted + * by increasing end points. + */ + insert_interval_by_end(&activeIntervals, live); + } + } + + if (maxTemp + 1 < liveIntervals.Num) { + /* OK, we've reduced the number of registers needed. + * Scan the program and replace all the old temporary register + * indexes with the new indexes. + */ + replace_regs(prog, PROGRAM_TEMPORARY, registerMap); + + prog->NumTemporaries = maxTemp + 1; + } + + if (dbg) { + _mesa_printf("Optimize: End live-interval register reallocation\n"); + _mesa_printf("Num temp regs before: %u after: %u\n", + liveIntervals.Num, maxTemp + 1); + _mesa_print_program(prog); + } +} + + + + /** * Apply optimizations to the given program to eliminate unnecessary * instructions, temp regs, etc. @@ -424,4 +808,6 @@ _mesa_optimize_program(GLcontext *ctx, struct gl_program *program) if (1) _mesa_consolidate_registers(program); + else /*NEW*/ + _mesa_reallocate_registers(program); } diff --git a/src/mesa/shader/prog_statevars.c b/src/mesa/shader/prog_statevars.c index f51d9e2651..aeb7cf6de2 100644 --- a/src/mesa/shader/prog_statevars.c +++ b/src/mesa/shader/prog_statevars.c @@ -506,6 +506,13 @@ _mesa_fetch_state(GLcontext *ctx, const gl_state_index state[], } } return; + case STATE_FB_SIZE: + value[0] = (GLfloat) (ctx->DrawBuffer->Width - 1); + value[1] = (GLfloat) (ctx->DrawBuffer->Height - 1); + value[2] = 0.0F; + value[3] = 0.0F; + return; + case STATE_ROT_MATRIX_0: { const int unit = (int) state[2]; @@ -628,6 +635,9 @@ _mesa_program_state_flags(const gl_state_index state[STATE_LENGTH]) case STATE_PCM_BIAS: return _NEW_PIXEL; + case STATE_FB_SIZE: + return _NEW_BUFFERS; + default: /* unknown state indexes are silently ignored and * no flag set, since it is handled by the driver. @@ -828,6 +838,9 @@ append_token(char *dst, gl_state_index k) case STATE_SHADOW_AMBIENT: append(dst, "CompareFailValue"); break; + case STATE_FB_SIZE: + append(dst, "FbSize"); + break; case STATE_ROT_MATRIX_0: append(dst, "rotMatrixRow0"); break; diff --git a/src/mesa/shader/prog_statevars.h b/src/mesa/shader/prog_statevars.h index d563080db1..1180d9eaa4 100644 --- a/src/mesa/shader/prog_statevars.h +++ b/src/mesa/shader/prog_statevars.h @@ -117,6 +117,7 @@ typedef enum gl_state_index_ { STATE_PCM_SCALE, /**< Post color matrix RGBA scale */ STATE_PCM_BIAS, /**< Post color matrix RGBA bias */ STATE_SHADOW_AMBIENT, /**< ARB_shadow_ambient fail value; token[2] is texture unit index */ + STATE_FB_SIZE, /**< (width-1, height-1, 0, 0) */ STATE_ROT_MATRIX_0, /**< ATI_envmap_bumpmap, rot matrix row 0 */ STATE_ROT_MATRIX_1, /**< ATI_envmap_bumpmap, rot matrix row 1 */ STATE_INTERNAL_DRIVER /* first available state index for drivers (must be last) */ diff --git a/src/mesa/shader/slang/slang_codegen.c b/src/mesa/shader/slang/slang_codegen.c index 8263aae334..a7cfc45e6f 100644 --- a/src/mesa/shader/slang/slang_codegen.c +++ b/src/mesa/shader/slang/slang_codegen.c @@ -1441,7 +1441,7 @@ _slang_gen_function_call(slang_assemble_ctx *A, slang_function *fun, if (A->pragmas->Debug) { char s[1000]; - snprintf(s, sizeof(s), "Call/inline %s()", (char *) fun->header.a_name); + _mesa_snprintf(s, sizeof(s), "Call/inline %s()", (char *) fun->header.a_name); n->Comment = _slang_strdup(s); } diff --git a/src/mesa/state_tracker/st_atom_rasterizer.c b/src/mesa/state_tracker/st_atom_rasterizer.c index ea76487bcf..61687fbc3e 100644 --- a/src/mesa/state_tracker/st_atom_rasterizer.c +++ b/src/mesa/state_tracker/st_atom_rasterizer.c @@ -79,8 +79,6 @@ static void update_raster_state( struct st_context *st ) memset(raster, 0, sizeof(*raster)); - raster->origin_lower_left = 1; /* Always true for OpenGL */ - /* _NEW_POLYGON, _NEW_BUFFERS */ { diff --git a/src/mesa/state_tracker/st_atom_stipple.c b/src/mesa/state_tracker/st_atom_stipple.c index f395930ab4..31e124b329 100644 --- a/src/mesa/state_tracker/st_atom_stipple.c +++ b/src/mesa/state_tracker/st_atom_stipple.c @@ -39,24 +39,52 @@ #include "pipe/p_defines.h" +/** + * OpenGL's polygon stipple is indexed with window coordinates in which + * the origin (0,0) is the lower-left corner of the window. + * With Gallium, the origin is the upper-left corner of the window. + * To convert GL's polygon stipple to what gallium expects we need to + * invert the pattern vertically and rotate the stipple rows according + * to the window height. + */ +static void +invert_stipple(GLuint dest[32], const GLuint src[32], GLuint winHeight) +{ + GLuint i; + + for (i = 0; i < 32; i++) { + dest[i] = src[(winHeight - 1 - i) & 0x1f]; + } +} + + + static void update_stipple( struct st_context *st ) { - const GLuint sz = sizeof(st->state.poly_stipple.stipple); + const GLuint sz = sizeof(st->state.poly_stipple); assert(sz == sizeof(st->ctx->PolygonStipple)); - if (memcmp(&st->state.poly_stipple.stipple, st->ctx->PolygonStipple, sz)) { + if (memcmp(st->state.poly_stipple, st->ctx->PolygonStipple, sz)) { /* state has changed */ - memcpy(st->state.poly_stipple.stipple, st->ctx->PolygonStipple, sz); - st->pipe->set_polygon_stipple(st->pipe, &st->state.poly_stipple); + struct pipe_poly_stipple newStipple; + + memcpy(st->state.poly_stipple, st->ctx->PolygonStipple, sz); + + invert_stipple(newStipple.stipple, st->ctx->PolygonStipple, + st->ctx->DrawBuffer->Height); + + st->pipe->set_polygon_stipple(st->pipe, &newStipple); } } +/** Update the stipple when the pattern or window height changes */ const struct st_tracked_state st_update_polygon_stipple = { "st_update_polygon_stipple", /* name */ { /* dirty */ - (_NEW_POLYGONSTIPPLE), /* mesa */ + (_NEW_POLYGONSTIPPLE | + _NEW_BUFFERS), /* mesa */ 0, /* st */ }, update_stipple /* update */ diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index edfa8854d8..311d812ccf 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -467,7 +467,7 @@ st_TexImage(GLcontext * ctx, */ if (stObj->pt) { if (stObj->teximage_realloc || - level > stObj->pt->last_level || + level > (GLint) stObj->pt->last_level || (stObj->pt->last_level == level && stObj->pt->target != PIPE_TEXTURE_CUBE && !st_texture_match_image(stObj->pt, &stImage->base, @@ -803,7 +803,6 @@ st_TexSubimage(GLcontext * ctx, PIPE_TRANSFER_WRITE, xoffset, yoffset, width, height); - dstRowStride = stImage->transfer->stride; } if (!texImage->Data) { @@ -812,6 +811,7 @@ st_TexSubimage(GLcontext * ctx, } src = (const GLubyte *) pixels; + dstRowStride = stImage->transfer->stride; for (i = 0; i++ < depth;) { if (!texImage->TexFormat->StoreImage(ctx, dims, texImage->_BaseFormat, diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index d7518ab689..ae8c2978bf 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -93,12 +93,13 @@ struct st_context struct pipe_constant_buffer constants[2]; struct pipe_framebuffer_state framebuffer; struct pipe_texture *sampler_texture[PIPE_MAX_SAMPLERS]; - struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; struct pipe_viewport_state viewport; GLuint num_samplers; GLuint num_textures; + + GLuint poly_stipple[32]; /**< In OpenGL's bottom-to-top order */ } state; struct { diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index cbf3f334c0..ffa607dd87 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -219,8 +219,9 @@ compile_instruction( const GLuint immediateMapping[], GLboolean indirectAccess, GLuint preamble_size, - GLuint processor, - GLboolean *insideSubroutine) + GLuint procType, + GLboolean *insideSubroutine, + GLint wposTemp) { GLuint i; struct tgsi_full_dst_register *fulldst; @@ -247,19 +248,29 @@ compile_instruction( GLuint j; fullsrc = &fullinst->FullSrcRegisters[i]; - fullsrc->SrcRegister.File = map_register_file( - inst->SrcReg[i].File, - inst->SrcReg[i].Index, - immediateMapping, - indirectAccess ); - fullsrc->SrcRegister.Index = map_register_file_index( - fullsrc->SrcRegister.File, - inst->SrcReg[i].Index, - inputMapping, - outputMapping, - immediateMapping, - indirectAccess ); + if (procType == TGSI_PROCESSOR_FRAGMENT && + inst->SrcReg[i].File == PROGRAM_INPUT && + inst->SrcReg[i].Index == FRAG_ATTRIB_WPOS) { + /* special case of INPUT[WPOS] */ + fullsrc->SrcRegister.File = TGSI_FILE_TEMPORARY; + fullsrc->SrcRegister.Index = wposTemp; + } + else { + /* any other src register */ + fullsrc->SrcRegister.File = map_register_file( + inst->SrcReg[i].File, + inst->SrcReg[i].Index, + immediateMapping, + indirectAccess ); + fullsrc->SrcRegister.Index = map_register_file_index( + fullsrc->SrcRegister.File, + inst->SrcReg[i].Index, + inputMapping, + outputMapping, + immediateMapping, + indirectAccess ); + } /* swizzle (ext swizzle also depends on negation) */ { @@ -733,6 +744,111 @@ find_temporaries(const struct gl_program *program, } +/** + * Find an unused temporary in the tempsUsed array. + */ +static int +find_free_temporary(GLboolean tempsUsed[MAX_PROGRAM_TEMPS]) +{ + int i; + for (i = 0; i < MAX_PROGRAM_TEMPS; i++) { + if (!tempsUsed[i]) { + tempsUsed[i] = GL_TRUE; + return i; + } + } + return -1; +} + + +/** helper for building simple TGSI instruction, one src register */ +static void +build_tgsi_instruction1(struct tgsi_full_instruction *inst, + int opcode, + int dstFile, int dstIndex, int writemask, + int srcFile1, int srcIndex1) +{ + *inst = tgsi_default_full_instruction(); + + inst->Instruction.Opcode = opcode; + + inst->Instruction.NumDstRegs = 1; + inst->FullDstRegisters[0].DstRegister.File = dstFile; + inst->FullDstRegisters[0].DstRegister.Index = dstIndex; + inst->FullDstRegisters[0].DstRegister.WriteMask = writemask; + + inst->Instruction.NumSrcRegs = 1; + inst->FullSrcRegisters[0].SrcRegister.File = srcFile1; + inst->FullSrcRegisters[0].SrcRegister.Index = srcIndex1; +} + + +/** helper for building simple TGSI instruction, two src registers */ +static void +build_tgsi_instruction2(struct tgsi_full_instruction *inst, + int opcode, + int dstFile, int dstIndex, int writemask, + int srcFile1, int srcIndex1, + int srcFile2, int srcIndex2) +{ + *inst = tgsi_default_full_instruction(); + + inst->Instruction.Opcode = opcode; + + inst->Instruction.NumDstRegs = 1; + inst->FullDstRegisters[0].DstRegister.File = dstFile; + inst->FullDstRegisters[0].DstRegister.Index = dstIndex; + inst->FullDstRegisters[0].DstRegister.WriteMask = writemask; + + inst->Instruction.NumSrcRegs = 2; + inst->FullSrcRegisters[0].SrcRegister.File = srcFile1; + inst->FullSrcRegisters[0].SrcRegister.Index = srcIndex1; + inst->FullSrcRegisters[1].SrcRegister.File = srcFile2; + inst->FullSrcRegisters[1].SrcRegister.Index = srcIndex2; +} + + + +/** + * Emit the TGSI instructions for inverting the WPOS y coordinate. + */ +static int +emit_inverted_wpos(struct tgsi_token *tokens, + int wpos_temp, + int winsize_const, + int wpos_input, + struct tgsi_header *header, int maxTokens) +{ + struct tgsi_full_instruction fullinst; + int ti = 0; + + /* MOV wpos_temp.xzw, input[wpos]; */ + build_tgsi_instruction1(&fullinst, + TGSI_OPCODE_MOV, + TGSI_FILE_TEMPORARY, wpos_temp, WRITEMASK_XZW, + TGSI_FILE_INPUT, 0); + + ti += tgsi_build_full_instruction(&fullinst, + &tokens[ti], + header, + maxTokens - ti); + + /* SUB wpos_temp.y, const[winsize_const] - input[wpos_input]; */ + build_tgsi_instruction2(&fullinst, + TGSI_OPCODE_SUB, + TGSI_FILE_TEMPORARY, wpos_temp, WRITEMASK_Y, + TGSI_FILE_CONSTANT, winsize_const, + TGSI_FILE_INPUT, wpos_input); + + ti += tgsi_build_full_instruction(&fullinst, + &tokens[ti], + header, + maxTokens - ti); + + return ti; +} + + /** @@ -778,16 +894,34 @@ st_translate_mesa_program( GLuint ti; /* token index */ struct tgsi_header *header; struct tgsi_processor *processor; - struct tgsi_full_instruction fullinst; GLuint preamble_size = 0; GLuint immediates[1000]; GLuint numImmediates = 0; GLboolean insideSubroutine = GL_FALSE; GLboolean indirectAccess = GL_FALSE; + GLboolean tempsUsed[MAX_PROGRAM_TEMPS + 1]; + GLint wposTemp = -1, winHeightConst = -1; assert(procType == TGSI_PROCESSOR_FRAGMENT || procType == TGSI_PROCESSOR_VERTEX); + find_temporaries(program, tempsUsed); + + if (procType == TGSI_PROCESSOR_FRAGMENT) { + if (program->InputsRead & FRAG_BIT_WPOS) { + /* Fragment program uses fragment position input. + * Need to replace instances of INPUT[WPOS] with temp T + * where T = INPUT[WPOS] by y is inverted. + */ + static const gl_state_index winSizeState[STATE_LENGTH] + = { STATE_INTERNAL, STATE_FB_SIZE, 0, 0, 0 }; + winHeightConst = _mesa_add_state_reference(program->Parameters, + winSizeState); + wposTemp = find_free_temporary(tempsUsed); + } + } + + *(struct tgsi_version *) &tokens[0] = tgsi_build_version(); header = (struct tgsi_header *) &tokens[1]; @@ -884,11 +1018,9 @@ st_translate_mesa_program( /* temporary decls */ { - GLboolean tempsUsed[MAX_PROGRAM_TEMPS + 1]; GLboolean inside_range = GL_FALSE; GLuint start_range = 0; - find_temporaries(program, tempsUsed); tempsUsed[MAX_PROGRAM_TEMPS] = GL_FALSE; for (i = 0; i < MAX_PROGRAM_TEMPS + 1; i++) { if (tempsUsed[i] && !inside_range) { @@ -1018,7 +1150,17 @@ st_translate_mesa_program( } } + /* invert WPOS fragment input */ + if (wposTemp >= 0) { + ti += emit_inverted_wpos(&tokens[ti], wposTemp, winHeightConst, + inputMapping[FRAG_ATTRIB_WPOS], + header, maxTokens - ti); + preamble_size = 2; /* two instructions added */ + } + for (i = 0; i < program->NumInstructions; i++) { + struct tgsi_full_instruction fullinst; + compile_instruction( &program->Instructions[i], &fullinst, @@ -1028,7 +1170,8 @@ st_translate_mesa_program( indirectAccess, preamble_size, procType, - &insideSubroutine ); + &insideSubroutine, + wposTemp); ti += tgsi_build_full_instruction( &fullinst, |