diff options
Diffstat (limited to 'src/gallium')
616 files changed, 74860 insertions, 15864 deletions
diff --git a/src/gallium/Makefile b/src/gallium/Makefile index aa77021daf..79ca767f7e 100644 --- a/src/gallium/Makefile +++ b/src/gallium/Makefile @@ -1,25 +1,12 @@ +# src/gallium/Makefile TOP = ../.. include $(TOP)/configs/current +SUBDIRS = $(GALLIUM_DIRS) -SUBDIRS = auxiliary drivers - - -default: subdirs - - -subdirs: +default install clean: @for dir in $(SUBDIRS) ; do \ if [ -d $$dir ] ; then \ - (cd $$dir && $(MAKE)) || exit 1 ; \ + (cd $$dir && $(MAKE) $@) || exit 1; \ fi \ done - - -clean: - rm -f `find . -name \*.[oa]` - rm -f `find . -name depend` - - -# Dummy install target -install: diff --git a/src/gallium/Makefile.template b/src/gallium/Makefile.template index 4e462b5c97..98487d43bd 100644 --- a/src/gallium/Makefile.template +++ b/src/gallium/Makefile.template @@ -1,64 +1,66 @@ -# -*-makefile-*- +# src/gallium/Makefile.template - -# We still have a dependency on the "dri" buffer manager. Most likely -# the interface can be reused in non-dri environments, and also as a -# frontend to simpler memory managers. +# Template makefile for gallium libraries. +# +# Usage: +# The minimum that the including makefile needs to define +# is TOP, LIBNAME and one of of the *_SOURCES. # -COMMON_SOURCES = +# Optional defines: +# LIBRARY_INCLUDES are appended to the list of includes directories. +# LIBRARY_DEFINES is not used for makedepend, but for compilation. -OBJECTS = $(C_SOURCES:.c=.o) \ - $(CPP_SOURCES:.cpp=.o) \ - $(ASM_SOURCES:.S=.o) +### Basic defines ### +OBJECTS = $(C_SOURCES:.c=.o) \ + $(CPP_SOURCES:.cpp=.o) \ + $(ASM_SOURCES:.S=.o) -### Include directories INCLUDES = \ -I. \ -I$(TOP)/src/gallium/include \ -I$(TOP)/src/gallium/auxiliary \ -I$(TOP)/src/gallium/drivers \ - -I$(TOP)/include \ - $(DRIVER_INCLUDES) - - -##### RULES ##### - -.c.o: - $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ - -.cpp.o: - $(CXX) -c $(INCLUDES) $(CXXFLAGS) $(DRIVER_DEFINES) $< -o $@ - -.S.o: - $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ + -I$(GALLIUM)/src/gallium/include \ + -I$(GALLIUM)/src/gallium/auxiliary \ + -I$(GALLIUM)/src/gallium/drivers \ + $(LIBRARY_INCLUDES) ##### TARGETS ##### -default: depend symlinks $(LIBNAME) - - -$(LIBNAME): $(OBJECTS) Makefile $(TOP)/src/gallium/Makefile.template - $(TOP)/bin/mklib -o $@ -static $(OBJECTS) $(DRIVER_LIBS) +default: depend lib$(LIBNAME).a +lib$(LIBNAME).a: $(OBJECTS) Makefile $(TOP)/src/gallium/Makefile.template + $(MKLIB) -o $(LIBNAME) -static $(OBJECTS) depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(SYMLINKS) rm -f depend touch depend - $(MKDEP) $(MKDEP_OPTIONS) $(DRIVER_DEFINES) $(INCLUDES) $(C_SOURCES) $(CPP_SOURCES) \ - $(ASM_SOURCES) 2> /dev/null - + $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) 2> /dev/null # Emacs tags tags: etags `find . -name \*.[ch]` `find ../include` - # Remove .o and backup files -clean:: - -rm -f *.o */*.o *~ *.so *~ server/*.o $(SYMLINKS) - -rm -f depend depend.bak +clean: + rm -f $(OBJECTS) lib$(LIBNAME).a depend depend.bak + +# Dummy target +install: + @echo -n "" + +##### RULES ##### + +.c.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@ + +.cpp.o: + $(CXX) -c $(INCLUDES) $(CXXFLAGS) $(LIBRARY_DEFINES) $< -o $@ + +.S.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(LIBRARY_DEFINES) $< -o $@ -include depend +sinclude depend diff --git a/src/gallium/SConscript b/src/gallium/SConscript index 6a3e7e77ed..0c632ac2b8 100644 --- a/src/gallium/SConscript +++ b/src/gallium/SConscript @@ -21,9 +21,14 @@ SConscript([ 'auxiliary/translate/SConscript', 'auxiliary/draw/SConscript', 'auxiliary/pipebuffer/SConscript', + 'auxiliary/indices/SConscript', ]) for driver in env['drivers']: SConscript(os.path.join('drivers', driver, 'SConscript')) SConscript('state_trackers/python/SConscript') +SConscript('state_trackers/glx/xlib/SConscript') + +if platform == 'windows': + SConscript('state_trackers/wgl/SConscript') diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index eaa0f2fe4e..5446eb68a9 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -1,20 +1,12 @@ +# src/gallium/auxiliary/Makefile TOP = ../../.. include $(TOP)/configs/current - SUBDIRS = $(GALLIUM_AUXILIARY_DIRS) - -default: subdirs - - -subdirs: +default install clean: @for dir in $(SUBDIRS) ; do \ if [ -d $$dir ] ; then \ - (cd $$dir && $(MAKE)) || exit 1 ; \ + (cd $$dir && $(MAKE) $@) || exit 1; \ fi \ done - - -clean: - rm -f `find . -name \*.[oa]` diff --git a/src/gallium/auxiliary/cso_cache/Makefile b/src/gallium/auxiliary/cso_cache/Makefile index 6bd6602088..8726afcd94 100644 --- a/src/gallium/auxiliary/cso_cache/Makefile +++ b/src/gallium/auxiliary/cso_cache/Makefile @@ -9,6 +9,3 @@ C_SOURCES = \ cso_hash.c include ../../Makefile.template - -symlinks: - diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c index 6b1754ea00..0bc77a5728 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.c +++ b/src/gallium/auxiliary/cso_cache/cso_cache.c @@ -28,7 +28,7 @@ /* Authors: Zack Rusin <zack@tungstengraphics.com> */ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "util/u_memory.h" diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 68508f24de..a9157aad71 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -783,7 +783,7 @@ copy_framebuffer_state(struct pipe_framebuffer_state *dst, dst->width = src->width; dst->height = src->height; - dst->num_cbufs = src->num_cbufs; + dst->nr_cbufs = src->nr_cbufs; for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { pipe_surface_reference(&dst->cbufs[i], src->cbufs[i]); } diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.c b/src/gallium/auxiliary/cso_cache/cso_hash.c index 7f0044c5a7..288cef7b6f 100644 --- a/src/gallium/auxiliary/cso_cache/cso_hash.c +++ b/src/gallium/auxiliary/cso_cache/cso_hash.c @@ -30,7 +30,7 @@ * Zack Rusin <zack@tungstengraphics.com> */ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "util/u_memory.h" #include "cso_hash.h" @@ -431,3 +431,9 @@ struct cso_hash_iter cso_hash_erase(struct cso_hash *hash, struct cso_hash_iter --hash->data.d->size; return ret; } + +boolean cso_hash_contains(struct cso_hash *hash, unsigned key) +{ + struct cso_node **node = cso_hash_find_node(hash, key); + return (*node != hash->data.e); +} diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.h b/src/gallium/auxiliary/cso_cache/cso_hash.h index 85f3e276c6..5891c325fa 100644 --- a/src/gallium/auxiliary/cso_cache/cso_hash.h +++ b/src/gallium/auxiliary/cso_cache/cso_hash.h @@ -44,6 +44,7 @@ #ifndef CSO_HASH_H #define CSO_HASH_H +#include "pipe/p_compiler.h" #ifdef __cplusplus extern "C" { @@ -95,6 +96,11 @@ struct cso_hash_iter cso_hash_first_node(struct cso_hash *hash); */ struct cso_hash_iter cso_hash_find(struct cso_hash *hash, unsigned key); +/** + * Returns true if a value with the given key exists in the hash + */ +boolean cso_hash_contains(struct cso_hash *hash, unsigned key); + int cso_hash_iter_is_null(struct cso_hash_iter iter); unsigned cso_hash_iter_key(struct cso_hash_iter iter); diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index f2e36a89e9..5041dcc072 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -40,10 +40,7 @@ C_SOURCES = \ draw_vs_aos_machine.c \ draw_vs_exec.c \ draw_vs_llvm.c \ + draw_vs_ppc.c \ draw_vs_sse.c - include ../../Makefile.template - -symlinks: - diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript index 544a04918b..5f05aa324a 100644 --- a/src/gallium/auxiliary/draw/SConscript +++ b/src/gallium/auxiliary/draw/SConscript @@ -38,6 +38,7 @@ draw = env.ConvenienceLibrary( 'draw_vs_aos_machine.c', 'draw_vs_exec.c', 'draw_vs_llvm.c', + 'draw_vs_ppc.c', 'draw_vs_sse.c', 'draw_vs_varient.c' ]) diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index fab8fc95fc..581532c1c9 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -138,8 +138,9 @@ void draw_set_driver_clipping( struct draw_context *draw, draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); draw->driver.bypass_clipping = bypass_clipping; - draw->bypass_clipping = (draw->rasterizer->bypass_clipping || - draw->driver.bypass_clipping); + draw->bypass_clipping = + ((draw->rasterizer && draw->rasterizer->bypass_clipping) || + draw->driver.bypass_clipping); } @@ -343,6 +344,21 @@ draw_num_vs_outputs(const struct draw_context *draw) } +/** + * Provide TGSI sampler objects for vertex shaders that use texture fetches. + * This might only be used by software drivers for the time being. + */ +void +draw_texture_samplers(struct draw_context *draw, + uint num_samplers, + struct tgsi_sampler **samplers) +{ + draw->vs.num_samplers = num_samplers; + draw->vs.samplers = samplers; +} + + + void draw_set_render( struct draw_context *draw, struct vbuf_render *render ) diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index a29bb01d81..d529e4e9a2 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -45,7 +45,7 @@ struct pipe_context; struct draw_context; struct draw_stage; struct draw_vertex_shader; - +struct tgsi_sampler; struct draw_context *draw_create( void ); @@ -92,6 +92,12 @@ uint draw_num_vs_outputs(const struct draw_context *draw); +void +draw_texture_samplers(struct draw_context *draw, + uint num_samplers, + struct tgsi_sampler **samplers); + + /* * Vertex shader functions diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index 20841bb5d6..80c9c918a9 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -410,7 +410,7 @@ aaline_create_texture(struct aaline_stage *aaline) * texels which are zero. Special case the 1x1 and 2x2 levels. */ for (level = 0; level <= MAX_TEXTURE_LEVEL; level++) { - struct pipe_surface *surface; + struct pipe_transfer *transfer; const uint size = aaline->texture->width[level]; ubyte *data; uint i, j; @@ -419,9 +419,9 @@ aaline_create_texture(struct aaline_stage *aaline) /* This texture is new, no need to flush. */ - surface = screen->get_tex_surface(screen, aaline->texture, 0, level, 0, - PIPE_BUFFER_USAGE_CPU_WRITE); - data = screen->surface_map(screen, surface, PIPE_BUFFER_USAGE_CPU_WRITE); + transfer = screen->get_tex_transfer(screen, aaline->texture, 0, level, 0, + PIPE_TRANSFER_WRITE, 0, 0, size, size); + data = screen->transfer_map(screen, transfer); if (data == NULL) return FALSE; @@ -440,13 +440,13 @@ aaline_create_texture(struct aaline_stage *aaline) else { d = 255; } - data[i * surface->stride + j] = d; + data[i * transfer->stride + j] = d; } } /* unmap */ - screen->surface_unmap(screen, surface); - screen->tex_surface_release(screen, &surface); + screen->transfer_unmap(screen, transfer); + screen->tex_transfer_release(screen, &transfer); } return TRUE; } diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index b764d9c518..e68c824c86 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -256,7 +256,7 @@ pstip_transform_inst(struct tgsi_transform_context *ctx, struct tgsi_full_immediate immed; uint size = 4; immed = tgsi_default_full_immediate(); - immed.Immediate.Size = 1 + size; /* one for the token itself */ + immed.Immediate.NrTokens = 1 + size; /* one for the token itself */ immed.u.Pointer = (void *) value; ctx->emit_immediate(ctx, &immed); } @@ -372,7 +372,7 @@ pstip_update_texture(struct pstip_stage *pstip) static const uint bit31 = 1 << 31; struct pipe_context *pipe = pstip->pipe; struct pipe_screen *screen = pipe->screen; - struct pipe_surface *surface; + struct pipe_transfer *transfer; const uint *stipple = pstip->state.stipple->stipple; uint i, j; ubyte *data; @@ -381,10 +381,9 @@ pstip_update_texture(struct pstip_stage *pstip) */ pipe->flush( pipe, PIPE_FLUSH_TEXTURE_CACHE, NULL ); - surface = screen->get_tex_surface(screen, pstip->texture, 0, 0, 0, - PIPE_BUFFER_USAGE_CPU_WRITE); - data = screen->surface_map(screen, surface, - PIPE_BUFFER_USAGE_CPU_WRITE); + transfer = screen->get_tex_transfer(screen, pstip->texture, 0, 0, 0, + PIPE_TRANSFER_WRITE, 0, 0, 32, 32); + data = screen->transfer_map(screen, transfer); /* * Load alpha texture. @@ -396,18 +395,18 @@ pstip_update_texture(struct pstip_stage *pstip) for (j = 0; j < 32; j++) { if (stipple[i] & (bit31 >> j)) { /* fragment "on" */ - data[i * surface->stride + j] = 0; + data[i * transfer->stride + j] = 0; } else { /* fragment "off" */ - data[i * surface->stride + j] = 255; + data[i * transfer->stride + j] = 255; } } } /* unmap */ - screen->surface_unmap(screen, surface); - screen->tex_surface_release(screen, &surface); + screen->transfer_unmap(screen, transfer); + screen->tex_transfer_release(screen, &transfer); } diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index 9825e116c3..12325d30d6 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -29,12 +29,12 @@ * \file * Vertex buffer drawing stage. * - * \author José Fonseca <jrfonsec@tungstengraphics.com> + * \author Jose Fonseca <jrfonsec@tungstengraphics.com> * \author Keith Whitwell <keith@tungstengraphics.com> */ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -93,7 +93,6 @@ vbuf_stage( struct draw_stage *stage ) } -static void vbuf_flush_indices( struct vbuf_stage *vbuf ); static void vbuf_flush_vertices( struct vbuf_stage *vbuf ); static void vbuf_alloc_vertices( struct vbuf_stage *vbuf ); @@ -109,13 +108,12 @@ overflow( void *map, void *ptr, unsigned bytes, unsigned bufsz ) static INLINE void check_space( struct vbuf_stage *vbuf, unsigned nr ) { - if (vbuf->nr_vertices + nr > vbuf->max_vertices ) { - vbuf_flush_vertices(vbuf); - vbuf_alloc_vertices(vbuf); + if (vbuf->nr_vertices + nr > vbuf->max_vertices || + vbuf->nr_indices + nr > vbuf->max_indices) + { + vbuf_flush_vertices( vbuf ); + vbuf_alloc_vertices( vbuf ); } - - if (vbuf->nr_indices + nr > vbuf->max_indices ) - vbuf_flush_indices(vbuf); } @@ -202,7 +200,7 @@ vbuf_point( struct draw_stage *stage, * will be flushed if needed and a new one allocated. */ static void -vbuf_set_prim( struct vbuf_stage *vbuf, uint prim ) +vbuf_start_prim( struct vbuf_stage *vbuf, uint prim ) { struct translate_key hw_key; unsigned dst_offset; @@ -217,11 +215,7 @@ vbuf_set_prim( struct vbuf_stage *vbuf, uint prim ) * state change. */ vbuf->vinfo = vbuf->render->get_vertex_info(vbuf->render); - - if (vbuf->vertex_size != vbuf->vinfo->size * sizeof(float)) { - vbuf_flush_vertices(vbuf); - vbuf->vertex_size = vbuf->vinfo->size * sizeof(float); - } + vbuf->vertex_size = vbuf->vinfo->size * sizeof(float); /* Translate from pipeline vertices to hw vertices. */ @@ -294,8 +288,8 @@ vbuf_set_prim( struct vbuf_stage *vbuf, uint prim ) /* Allocate new buffer? */ - if (!vbuf->vertices) - vbuf_alloc_vertices(vbuf); + assert(vbuf->vertices == NULL); + vbuf_alloc_vertices(vbuf); } @@ -305,9 +299,9 @@ vbuf_first_tri( struct draw_stage *stage, { struct vbuf_stage *vbuf = vbuf_stage( stage ); - vbuf_flush_indices( vbuf ); + vbuf_flush_vertices( vbuf ); + vbuf_start_prim(vbuf, PIPE_PRIM_TRIANGLES); stage->tri = vbuf_tri; - vbuf_set_prim(vbuf, PIPE_PRIM_TRIANGLES); stage->tri( stage, prim ); } @@ -318,9 +312,9 @@ vbuf_first_line( struct draw_stage *stage, { struct vbuf_stage *vbuf = vbuf_stage( stage ); - vbuf_flush_indices( vbuf ); + vbuf_flush_vertices( vbuf ); + vbuf_start_prim(vbuf, PIPE_PRIM_LINES); stage->line = vbuf_line; - vbuf_set_prim(vbuf, PIPE_PRIM_LINES); stage->line( stage, prim ); } @@ -331,53 +325,42 @@ vbuf_first_point( struct draw_stage *stage, { struct vbuf_stage *vbuf = vbuf_stage( stage ); - vbuf_flush_indices( vbuf ); + vbuf_flush_vertices(vbuf); + vbuf_start_prim(vbuf, PIPE_PRIM_POINTS); stage->point = vbuf_point; - vbuf_set_prim(vbuf, PIPE_PRIM_POINTS); stage->point( stage, prim ); } -static void -vbuf_flush_indices( struct vbuf_stage *vbuf ) -{ - if(!vbuf->nr_indices) - return; - - assert((uint) (vbuf->vertex_ptr - vbuf->vertices) == - vbuf->nr_vertices * vbuf->vertex_size / sizeof(unsigned)); - - vbuf->render->draw(vbuf->render, vbuf->indices, vbuf->nr_indices); - - vbuf->nr_indices = 0; -} - /** * Flush existing vertex buffer and allocate a new one. - * - * XXX: We separate flush-on-index-full and flush-on-vb-full, but may - * raise issues uploading vertices if the hardware wants to flush when - * we flush. */ static void vbuf_flush_vertices( struct vbuf_stage *vbuf ) { - if(vbuf->vertices) { - vbuf_flush_indices(vbuf); - + if(vbuf->vertices) { + + vbuf->render->unmap_vertices( vbuf->render, 0, vbuf->nr_vertices - 1 ); + + if (vbuf->nr_indices) + { + vbuf->render->draw(vbuf->render, + vbuf->indices, + vbuf->nr_indices ); + + vbuf->nr_indices = 0; + } + /* Reset temporary vertices ids */ if(vbuf->nr_vertices) draw_reset_vertex_ids( vbuf->stage.draw ); /* Free the vertex buffer */ - vbuf->render->release_vertices(vbuf->render, - vbuf->vertices, - vbuf->vertex_size, - vbuf->nr_vertices); + vbuf->render->release_vertices( vbuf->render ); + vbuf->max_vertices = vbuf->nr_vertices = 0; vbuf->vertex_ptr = vbuf->vertices = NULL; - } } @@ -394,14 +377,20 @@ vbuf_alloc_vertices( struct vbuf_stage *vbuf ) /* even number */ vbuf->max_vertices = vbuf->max_vertices & ~1; + if(vbuf->max_vertices >= UNDEFINED_VERTEX_ID) + vbuf->max_vertices = UNDEFINED_VERTEX_ID - 1; + /* Must always succeed -- driver gives us a * 'max_vertex_buffer_bytes' which it guarantees it can allocate, * and it will flush itself if necessary to do so. If this does * fail, we are basically without usable hardware. */ - vbuf->vertices = (uint *) vbuf->render->allocate_vertices(vbuf->render, - (ushort) vbuf->vertex_size, - (ushort) vbuf->max_vertices); + vbuf->render->allocate_vertices(vbuf->render, + (ushort) vbuf->vertex_size, + (ushort) vbuf->max_vertices); + + vbuf->vertices = (uint *) vbuf->render->map_vertices( vbuf->render ); + vbuf->vertex_ptr = vbuf->vertices; } @@ -412,14 +401,11 @@ vbuf_flush( struct draw_stage *stage, unsigned flags ) { struct vbuf_stage *vbuf = vbuf_stage( stage ); - vbuf_flush_indices( vbuf ); + vbuf_flush_vertices( vbuf ); stage->point = vbuf_first_point; stage->line = vbuf_first_line; stage->tri = vbuf_first_tri; - - if (flags & DRAW_FLUSH_BACKEND) - vbuf_flush_vertices( vbuf ); } diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index a16b45d340..81e4eae401 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -187,6 +187,9 @@ struct draw_context /** TGSI program interpreter runtime state */ struct tgsi_exec_machine machine; + uint num_samplers; + struct tgsi_sampler **samplers; + /* This (and the tgsi_exec_machine struct) probably need to be moved somewhere private. */ struct gallivm_cpu_engine *engine; @@ -198,7 +201,7 @@ struct draw_context const float (*aligned_constants)[4]; - float (*aligned_constant_storage)[4]; + const float (*aligned_constant_storage)[4]; unsigned const_storage_size; diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 18f24e5980..4e5ffa0930 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -228,7 +228,7 @@ draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count) for (j = 0; j < draw->pt.nr_vertex_elements; j++) { uint buf = draw->pt.vertex_element[j].vertex_buffer_index; ubyte *ptr = (ubyte *) draw->pt.user.vbuffer[buf]; - ptr += draw->pt.vertex_buffer[buf].pitch * ii; + ptr += draw->pt.vertex_buffer[buf].stride * ii; ptr += draw->pt.vertex_element[j].src_offset; debug_printf(" Attr %u: ", j); @@ -301,8 +301,8 @@ draw_arrays(struct draw_context *draw, unsigned prim, } debug_printf("Buffers:\n"); for (i = 0; i < draw->pt.nr_vertex_buffers; i++) { - debug_printf(" pitch=%u offset=%u ptr=%p\n", - draw->pt.vertex_buffer[i].pitch, + debug_printf(" stride=%u offset=%u ptr=%p\n", + draw->pt.vertex_buffer[i].stride, draw->pt.vertex_buffer[i].buffer_offset, draw->pt.user.vbuffer[i]); } diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index c02f229110..aecaeee5b9 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -173,9 +173,7 @@ void draw_pt_emit( struct pt_emit *emit, void draw_pt_emit_linear( struct pt_emit *emit, const float (*vertex_data)[4], - unsigned vertex_count, unsigned stride, - unsigned start, unsigned count ); void draw_pt_emit_destroy( struct pt_emit *emit ); diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index d520b05869..064e16c295 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -165,6 +165,14 @@ void draw_pt_emit( struct pt_emit *emit, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + if (vertex_count == 0) + return; + + if (vertex_count >= UNDEFINED_VERTEX_ID) { + assert(0); + return; + } + /* XXX: and work out some way to coordinate the render primitive * between vbuf.c and here... */ @@ -173,9 +181,11 @@ void draw_pt_emit( struct pt_emit *emit, return; } - hw_verts = render->allocate_vertices(render, - (ushort)translate->key.output_stride, - (ushort)vertex_count); + render->allocate_vertices(render, + (ushort)translate->key.output_stride, + (ushort)vertex_count); + + hw_verts = render->map_vertices( render ); if (!hw_verts) { assert(0); return; @@ -196,22 +206,21 @@ void draw_pt_emit( struct pt_emit *emit, vertex_count, hw_verts ); + render->unmap_vertices( render, + 0, + vertex_count - 1 ); + render->draw(render, elts, count); - render->release_vertices(render, - hw_verts, - translate->key.output_stride, - vertex_count); + render->release_vertices(render); } void draw_pt_emit_linear(struct pt_emit *emit, const float (*vertex_data)[4], - unsigned vertex_count, unsigned stride, - unsigned start, unsigned count) { struct draw_context *draw = emit->draw; @@ -226,21 +235,23 @@ void draw_pt_emit_linear(struct pt_emit *emit, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + if (count >= UNDEFINED_VERTEX_ID) + goto fail; + /* XXX: and work out some way to coordinate the render primitive * between vbuf.c and here... */ - if (!draw->render->set_primitive(draw->render, emit->prim)) { - assert(0); - return; - } + if (!draw->render->set_primitive(draw->render, emit->prim)) + goto fail; - hw_verts = render->allocate_vertices(render, - (ushort)translate->key.output_stride, - (ushort)count); - if (!hw_verts) { - assert(0); - return; - } + if (!render->allocate_vertices(render, + (ushort)translate->key.output_stride, + (ushort)count)) + goto fail; + + hw_verts = render->map_vertices( render ); + if (!hw_verts) + goto fail; translate->set_buffer(translate, 0, vertex_data, stride); @@ -251,12 +262,12 @@ void draw_pt_emit_linear(struct pt_emit *emit, translate->run(translate, 0, - vertex_count, + count, hw_verts); if (0) { unsigned i; - for (i = 0; i < vertex_count; i++) { + for (i = 0; i < count; i++) { debug_printf("\n\n%s vertex %d:\n", __FUNCTION__, i); draw_dump_emitted_vertex( emit->vinfo, (const uint8_t *)hw_verts + @@ -264,13 +275,17 @@ void draw_pt_emit_linear(struct pt_emit *emit, } } + render->unmap_vertices( render, 0, count - 1 ); + + render->draw_arrays(render, 0, count); + + render->release_vertices(render); - render->draw_arrays(render, start, count); + return; - render->release_vertices(render, - hw_verts, - translate->key.output_stride, - vertex_count); +fail: + assert(0); + return; } struct pt_emit *draw_pt_emit_create( struct draw_context *draw ) diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index 6377f896fb..058caf7dcc 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -144,7 +144,7 @@ void draw_pt_fetch_run( struct pt_fetch *fetch, i, ((char *)draw->pt.user.vbuffer[i] + draw->pt.vertex_buffer[i].buffer_offset), - draw->pt.vertex_buffer[i].pitch ); + draw->pt.vertex_buffer[i].stride ); } translate->run_elts( translate, @@ -180,7 +180,7 @@ void draw_pt_fetch_run_linear( struct pt_fetch *fetch, i, ((char *)draw->pt.user.vbuffer[i] + draw->pt.vertex_buffer[i].buffer_offset), - draw->pt.vertex_buffer[i].pitch ); + draw->pt.vertex_buffer[i].stride ); } translate->run( translate, diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 3966ad48ba..6b7d02a19b 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -195,7 +195,7 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, i, ((char *)draw->pt.user.vbuffer[i] + draw->pt.vertex_buffer[i].buffer_offset), - draw->pt.vertex_buffer[i].pitch ); + draw->pt.vertex_buffer[i].stride ); } *max_vertices = (draw->render->max_vertex_buffer_bytes / @@ -229,9 +229,16 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); - hw_verts = draw->render->allocate_vertices( draw->render, - (ushort)feme->translate->key.output_stride, - (ushort)fetch_count ); + if (fetch_count >= UNDEFINED_VERTEX_ID) { + assert(0); + return; + } + + draw->render->allocate_vertices( draw->render, + (ushort)feme->translate->key.output_stride, + (ushort)fetch_count ); + + hw_verts = draw->render->map_vertices( draw->render ); if (!hw_verts) { assert(0); return; @@ -254,6 +261,10 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle, } } + draw->render->unmap_vertices( draw->render, + 0, + (ushort)(fetch_count - 1) ); + /* XXX: Draw arrays path to avoid re-emitting index list again and * again. */ @@ -263,10 +274,7 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle, /* Done -- that was easy, wasn't it: */ - draw->render->release_vertices( draw->render, - hw_verts, - feme->translate->key.output_stride, - fetch_count ); + draw->render->release_vertices( draw->render ); } @@ -283,13 +291,17 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); - hw_verts = draw->render->allocate_vertices( draw->render, - (ushort)feme->translate->key.output_stride, - (ushort)count ); - if (!hw_verts) { - assert(0); - return; - } + if (count >= UNDEFINED_VERTEX_ID) + goto fail; + + if (!draw->render->allocate_vertices( draw->render, + (ushort)feme->translate->key.output_stride, + (ushort)count )) + goto fail; + + hw_verts = draw->render->map_vertices( draw->render ); + if (!hw_verts) + goto fail; /* Single routine to fetch vertices and emit HW verts. */ @@ -307,20 +319,21 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle, } } + draw->render->unmap_vertices( draw->render, 0, count - 1 ); + /* XXX: Draw arrays path to avoid re-emitting index list again and * again. */ - draw->render->draw_arrays( draw->render, - 0, /*start*/ - count ); + draw->render->draw_arrays( draw->render, 0, count ); /* Done -- that was easy, wasn't it: */ - draw->render->release_vertices( draw->render, - hw_verts, - feme->translate->key.output_stride, - count ); + draw->render->release_vertices( draw->render ); + return; +fail: + assert(0); + return; } @@ -338,9 +351,15 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); - hw_verts = draw->render->allocate_vertices( draw->render, - (ushort)feme->translate->key.output_stride, - (ushort)count ); + if (count >= UNDEFINED_VERTEX_ID) + return FALSE; + + if (!draw->render->allocate_vertices( draw->render, + (ushort)feme->translate->key.output_stride, + (ushort)count )) + return FALSE; + + hw_verts = draw->render->map_vertices( draw->render ); if (!hw_verts) return FALSE; @@ -351,6 +370,8 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle, count, hw_verts ); + draw->render->unmap_vertices( draw->render, 0, (ushort)(count - 1) ); + /* XXX: Draw arrays path to avoid re-emitting index list again and * again. */ @@ -360,10 +381,7 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle, /* Done -- that was easy, wasn't it: */ - draw->render->release_vertices( draw->render, - hw_verts, - feme->translate->key.output_stride, - count ); + draw->render->release_vertices( draw->render ); return TRUE; } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index f7e6a1a8ee..cd9cd4b53f 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -121,7 +121,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle, } for (i = 0; i < 5 && i < nr_vbs; i++) { - if (draw->pt.vertex_buffer[i].pitch == 0) + if (draw->pt.vertex_buffer[i].stride == 0) fse->key.const_vbuffers |= (1<<i); } @@ -189,7 +189,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle, i, ((const ubyte *) draw->pt.user.vbuffer[i] + draw->pt.vertex_buffer[i].buffer_offset), - draw->pt.vertex_buffer[i].pitch ); + draw->pt.vertex_buffer[i].stride ); } *max_vertices = (draw->render->max_vertex_buffer_bytes / @@ -234,14 +234,17 @@ static void fse_run_linear( struct draw_pt_middle_end *middle, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); - hw_verts = draw->render->allocate_vertices( draw->render, - (ushort)fse->key.output_stride, - (ushort)count ); + if (count >= UNDEFINED_VERTEX_ID) + goto fail; - if (!hw_verts) { - assert(0); - return; - } + if (!draw->render->allocate_vertices( draw->render, + (ushort)fse->key.output_stride, + (ushort)count )) + goto fail; + + hw_verts = draw->render->map_vertices( draw->render ); + if (!hw_verts) + goto fail; /* Single routine to fetch vertices, run shader and emit HW verts. * Clipping is done elsewhere -- either by the API or on hardware, @@ -251,13 +254,7 @@ static void fse_run_linear( struct draw_pt_middle_end *middle, start, count, hw_verts ); - /* Draw arrays path to avoid re-emitting index list again and - * again. - */ - draw->render->draw_arrays( draw->render, - 0, - count ); - + if (0) { unsigned i; for (i = 0; i < count; i++) { @@ -269,12 +266,24 @@ static void fse_run_linear( struct draw_pt_middle_end *middle, (const uint8_t *)hw_verts + fse->key.output_stride * i ); } } + + draw->render->unmap_vertices( draw->render, 0, (ushort)(count - 1) ); + /* Draw arrays path to avoid re-emitting index list again and + * again. + */ + draw->render->draw_arrays( draw->render, + 0, + count ); + - draw->render->release_vertices( draw->render, - hw_verts, - fse->key.output_stride, - count ); + draw->render->release_vertices( draw->render ); + + return; + +fail: + assert(0); + return; } @@ -293,13 +302,17 @@ fse_run(struct draw_pt_middle_end *middle, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); - hw_verts = draw->render->allocate_vertices( draw->render, - (ushort)fse->key.output_stride, - (ushort)fetch_count ); - if (!hw_verts) { - assert(0); - return; - } + if (fetch_count >= UNDEFINED_VERTEX_ID) + goto fail; + + if (!draw->render->allocate_vertices( draw->render, + (ushort)fse->key.output_stride, + (ushort)fetch_count )) + goto fail; + + hw_verts = draw->render->map_vertices( draw->render ); + if (!hw_verts) + goto fail; /* Single routine to fetch vertices, run shader and emit HW verts. @@ -309,9 +322,6 @@ fse_run(struct draw_pt_middle_end *middle, fetch_count, hw_verts ); - draw->render->draw( draw->render, - draw_elts, - draw_count ); if (0) { unsigned i; @@ -323,12 +333,19 @@ fse_run(struct draw_pt_middle_end *middle, } } + draw->render->unmap_vertices( draw->render, 0, (ushort)(fetch_count - 1) ); + + draw->render->draw( draw->render, + draw_elts, + draw_count ); + - draw->render->release_vertices( draw->render, - hw_verts, - fse->key.output_stride, - fetch_count ); + draw->render->release_vertices( draw->render ); + return; +fail: + assert(0); + return; } @@ -347,13 +364,17 @@ static boolean fse_run_linear_elts( struct draw_pt_middle_end *middle, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); - hw_verts = draw->render->allocate_vertices( draw->render, - (ushort)fse->key.output_stride, - (ushort)count ); + if (count >= UNDEFINED_VERTEX_ID) + return FALSE; - if (!hw_verts) { + if (!draw->render->allocate_vertices( draw->render, + (ushort)fse->key.output_stride, + (ushort)count )) + return FALSE; + + hw_verts = draw->render->map_vertices( draw->render ); + if (!hw_verts) return FALSE; - } /* Single routine to fetch vertices, run shader and emit HW verts. * Clipping is done elsewhere -- either by the API or on hardware, @@ -369,11 +390,9 @@ static boolean fse_run_linear_elts( struct draw_pt_middle_end *middle, draw_count ); + draw->render->unmap_vertices( draw->render, 0, (ushort)(count - 1) ); - draw->render->release_vertices( draw->render, - hw_verts, - fse->key.output_stride, - count ); + draw->render->release_vertices( draw->render ); return TRUE; } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index ec3b41c320..38f9b604d3 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -251,9 +251,7 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle, else { draw_pt_emit_linear( fpme->emit, (const float (*)[4])pipeline_verts->data, - count, fpme->vertex_size, - 0, /*start*/ count ); } diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c index c15afe65f1..d0e16c9bc3 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray.c +++ b/src/gallium/auxiliary/draw/draw_pt_varray.c @@ -67,7 +67,7 @@ static void varray_line_loop_segment(struct varray_frontend *varray, unsigned segment_count, boolean end ) { - assert(segment_count+1 < varray->fetch_max); + assert(segment_count < varray->fetch_max); if (segment_count >= 1) { unsigned nr = 0, i; @@ -77,7 +77,7 @@ static void varray_line_loop_segment(struct varray_frontend *varray, if (end) varray->fetch_elts[nr++] = start; - assert(nr < FETCH_MAX); + assert(nr <= FETCH_MAX); varray->middle->run(varray->middle, varray->fetch_elts, @@ -94,7 +94,7 @@ static void varray_fan_segment(struct varray_frontend *varray, unsigned segment_start, unsigned segment_count ) { - assert(segment_count+1 < varray->fetch_max); + assert(segment_count < varray->fetch_max); if (segment_count >= 2) { unsigned nr = 0, i; @@ -104,7 +104,7 @@ static void varray_fan_segment(struct varray_frontend *varray, for (i = 0 ; i < segment_count; i++) varray->fetch_elts[nr++] = start + segment_start + i; - assert(nr < FETCH_MAX); + assert(nr <= FETCH_MAX); varray->middle->run(varray->middle, varray->fetch_elts, diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index 80d7200ca6..5d268a2226 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -324,7 +324,7 @@ vcache_check_run( struct draw_pt_front_end *frontend, unsigned fetch_count = max_index + 1 - min_index; const ushort *transformed_elts; ushort *storage = NULL; - boolean ok; + boolean ok = FALSE; if (0) debug_printf("fetch_count %d fetch_max %d draw_count %d\n", fetch_count, @@ -413,11 +413,12 @@ vcache_check_run( struct draw_pt_front_end *frontend, transformed_elts = storage; } - ok = vcache->middle->run_linear_elts( vcache->middle, - min_index, /* start */ - fetch_count, - transformed_elts, - draw_count ); + if (fetch_count < UNDEFINED_VERTEX_ID) + ok = vcache->middle->run_linear_elts( vcache->middle, + min_index, /* start */ + fetch_count, + transformed_elts, + draw_count ); FREE(storage); diff --git a/src/gallium/auxiliary/draw/draw_vbuf.h b/src/gallium/auxiliary/draw/draw_vbuf.h index 9ac068c47b..cccd3bf435 100644 --- a/src/gallium/auxiliary/draw/draw_vbuf.h +++ b/src/gallium/auxiliary/draw/draw_vbuf.h @@ -30,14 +30,17 @@ * Vertex buffer drawing stage. * * \author Keith Whitwell <keith@tungstengraphics.com> - * \author José Fonseca <jrfonsec@tungstengraphics.com> + * \author Jose Fonseca <jrfonsec@tungstengraphics.com> */ #ifndef DRAW_VBUF_H_ #define DRAW_VBUF_H_ +#include "pipe/p_compiler.h" + +struct pipe_rasterizer_state; struct draw_context; struct vertex_info; @@ -77,9 +80,14 @@ struct vbuf_render { * Hardware renderers will use ttm memory, others will just malloc * something. */ - void *(*allocate_vertices)( struct vbuf_render *, - ushort vertex_size, - ushort nr_vertices ); + boolean (*allocate_vertices)( struct vbuf_render *, + ushort vertex_size, + ushort nr_vertices ); + + void *(*map_vertices)( struct vbuf_render * ); + void (*unmap_vertices)( struct vbuf_render *, + ushort min_index, + ushort max_index ); /** * Notify the renderer of the current primitive when it changes. @@ -106,10 +114,7 @@ struct vbuf_render { /** * Called when vbuf is done with this set of vertices: */ - void (*release_vertices)( struct vbuf_render *, - void *vertices, - unsigned vertex_size, - unsigned vertices_used ); + void (*release_vertices)( struct vbuf_render * ); void (*destroy)( struct vbuf_render * ); }; diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h index a943607d7e..c143cf2372 100644 --- a/src/gallium/auxiliary/draw/draw_vertex.h +++ b/src/gallium/auxiliary/draw/draw_vertex.h @@ -81,9 +81,9 @@ struct vertex_info * memcmp() comparisons. */ struct { - ubyte interp_mode:4; /**< INTERP_x */ - ubyte emit:4; /**< EMIT_x */ - ubyte src_index; /**< map to post-xform attribs */ + unsigned interp_mode:4; /**< INTERP_x */ + unsigned emit:4; /**< EMIT_x */ + unsigned src_index:8; /**< map to post-xform attribs */ } attrib[PIPE_MAX_SHADER_INPUTS]; }; diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index 34adbd49b0..c057cd67fd 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -50,7 +50,7 @@ void draw_vs_set_constants( struct draw_context *draw, const float (*constants)[4], unsigned size ) { - if (((unsigned)constants) & 0xf) { + if (((uintptr_t)constants) & 0xf) { if (size > draw->vs.const_storage_size) { if (draw->vs.aligned_constant_storage) align_free((void *)draw->vs.aligned_constant_storage); @@ -85,7 +85,10 @@ draw_create_vertex_shader(struct draw_context *draw, if (!vs) { vs = draw_create_vs_sse( draw, shader ); if (!vs) { - vs = draw_create_vs_exec( draw, shader ); + vs = draw_create_vs_ppc( draw, shader ); + if (!vs) { + vs = draw_create_vs_exec( draw, shader ); + } } } diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index 68c24abad3..89ae158751 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -158,6 +158,10 @@ draw_create_vs_sse(struct draw_context *draw, const struct pipe_shader_state *templ); struct draw_vertex_shader * +draw_create_vs_ppc(struct draw_context *draw, + const struct pipe_shader_state *templ); + +struct draw_vertex_shader * draw_create_vs_llvm(struct draw_context *draw, const struct pipe_shader_state *templ); diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 875ecb92db..1fb69ef81a 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -32,7 +32,7 @@ #include "util/u_memory.h" #include "util/u_math.h" #include "pipe/p_shader_tokens.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi/tgsi_exec.h" @@ -884,7 +884,7 @@ static void set_fpu_round_nearest( struct aos_compilation *cp ) } } - +#if 0 static void x87_emit_ex2( struct aos_compilation *cp ) { struct x86_reg st0 = x86_make_reg(file_x87, 0); @@ -907,13 +907,17 @@ static void x87_emit_ex2( struct aos_compilation *cp ) assert( stack == cp->func->x87_stack); } +#endif +#if 0 static void PIPE_CDECL print_reg( const char *msg, const float *reg ) { debug_printf("%s: %f %f %f %f\n", msg, reg[0], reg[1], reg[2], reg[3]); } +#endif +#if 0 static void emit_print( struct aos_compilation *cp, const char *message, /* must point to a static string! */ unsigned file, @@ -965,6 +969,7 @@ static void emit_print( struct aos_compilation *cp, /* Done... */ } +#endif /** * The traditional instructions. All operate on internal registers @@ -1103,7 +1108,7 @@ static boolean emit_LG2( struct aos_compilation *cp, const struct tgsi_full_inst return TRUE; } - +#if 0 static boolean emit_EX2( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { x87_fld_src(cp, &op->FullSrcRegisters[0], 0); @@ -1111,6 +1116,7 @@ static boolean emit_EX2( struct aos_compilation *cp, const struct tgsi_full_inst x87_fstp_dest4(cp, &op->FullDstRegisters[0]); return TRUE; } +#endif static boolean emit_FLR( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) @@ -1566,7 +1572,6 @@ static boolean emit_RCP( struct aos_compilation *cp, const struct tgsi_full_inst */ static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - if (0) { struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); struct x86_reg r = aos_get_xmm_reg(cp); @@ -1575,21 +1580,30 @@ static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_inst return TRUE; } else { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg r = aos_get_xmm_reg(cp); + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg r = aos_get_xmm_reg(cp); struct x86_reg neg_half = get_reg_ptr( cp, AOS_FILE_INTERNAL, IMM_RSQ ); struct x86_reg one_point_five = x86_make_disp( neg_half, 4 ); struct x86_reg src = get_xmm_writable( cp, arg0 ); - - sse_rsqrtss( cp->func, r, src ); /* rsqrtss(a) */ - sse_mulss( cp->func, src, neg_half ); /* -.5 * a */ - sse_mulss( cp->func, src, r ); /* -.5 * a * r */ - sse_mulss( cp->func, src, r ); /* -.5 * a * r * r */ - sse_addss( cp->func, src, one_point_five ); /* 1.5 - .5 * a * r * r */ - sse_mulss( cp->func, r, src ); /* r * (1.5 - .5 * a * r * r) */ + struct x86_reg neg = aos_get_internal(cp, IMM_NEGS); + struct x86_reg tmp = aos_get_xmm_reg(cp); + + sse_movaps(cp->func, tmp, src); + sse_mulps(cp->func, tmp, neg); + sse_maxps(cp->func, tmp, src); + + sse_rsqrtss( cp->func, r, tmp ); /* rsqrtss(a) */ + sse_mulss( cp->func, tmp, neg_half ); /* -.5 * a */ + sse_mulss( cp->func, tmp, r ); /* -.5 * a * r */ + sse_mulss( cp->func, tmp, r ); /* -.5 * a * r * r */ + sse_addss( cp->func, tmp, one_point_five ); /* 1.5 - .5 * a * r * r */ + sse_mulss( cp->func, r, tmp ); /* r * (1.5 - .5 * a * r * r) */ store_scalar_dest(cp, &op->FullDstRegisters[0], r); + + aos_release_xmm_reg(cp, tmp.idx); + return TRUE; } } @@ -1877,7 +1891,7 @@ static boolean note_immediate( struct aos_compilation *cp, unsigned pos = cp->num_immediates++; unsigned j; - for (j = 0; j < imm->Immediate.Size; j++) { + for (j = 0; j < imm->Immediate.NrTokens - 1; j++) { cp->vaos->machine->immediate[pos][j] = imm->u.ImmediateFloat32[j].Float; } diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 82d27d4493..b3200df811 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -62,12 +62,15 @@ vs_exec_prepare( struct draw_vertex_shader *shader, { struct exec_vertex_shader *evs = exec_vertex_shader(shader); - /* specify the vertex program to interpret/execute */ - tgsi_exec_machine_bind_shader(evs->machine, - shader->state.tokens, - PIPE_MAX_SAMPLERS, - NULL /*samplers*/ ); - + /* Specify the vertex program to interpret/execute. + * Avoid rebinding when possible. + */ + if (evs->machine->Tokens != shader->state.tokens) { + tgsi_exec_machine_bind_shader(evs->machine, + shader->state.tokens, + draw->vs.num_samplers, + draw->vs.samplers); + } } diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c new file mode 100644 index 0000000000..d35db57d57 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -0,0 +1,244 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * Brian Paul + */ + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "pipe/p_config.h" + +#include "draw_vs.h" + +#if defined(PIPE_ARCH_PPC) + +#include "pipe/p_shader_tokens.h" + +#include "draw_private.h" +#include "draw_context.h" + +#include "rtasm/rtasm_cpu.h" +#include "rtasm/rtasm_ppc.h" +#include "tgsi/tgsi_ppc.h" +#include "tgsi/tgsi_parse.h" + + + +typedef void (PIPE_CDECL *codegen_function) (float (*inputs)[4][4], + float (*outputs)[4][4], + float (*temps)[4][4], + float (*immeds)[4], + float (*consts)[4], + const float *builtins); + + +struct draw_ppc_vertex_shader { + struct draw_vertex_shader base; + struct ppc_function ppc_program; + + codegen_function func; +}; + + +static void +vs_ppc_prepare( struct draw_vertex_shader *base, + struct draw_context *draw ) +{ + /* nothing */ +} + + +/** + * Simplified vertex shader interface for the pt paths. Given the + * complexity of code-generating all the above operations together, + * it's time to try doing all the other stuff separately. + */ +static void +vs_ppc_run_linear( struct draw_vertex_shader *base, + const float (*input)[4], + float (*output)[4], + const float (*constants)[4], + unsigned count, + unsigned input_stride, + unsigned output_stride ) +{ + struct draw_ppc_vertex_shader *shader = (struct draw_ppc_vertex_shader *)base; + unsigned int i; + +#define MAX_VERTICES 4 + + /* loop over verts */ + for (i = 0; i < count; i += MAX_VERTICES) { + const uint max_vertices = MIN2(MAX_VERTICES, count - i); + float inputs_soa[PIPE_MAX_SHADER_INPUTS][4][4] ALIGN16_ATTRIB; + float outputs_soa[PIPE_MAX_SHADER_OUTPUTS][4][4] ALIGN16_ATTRIB; + float temps_soa[TGSI_EXEC_NUM_TEMPS][4][4] ALIGN16_ATTRIB; + uint attr; + + /* convert (up to) four input verts to SoA format */ + for (attr = 0; attr < base->info.num_inputs; attr++) { + const float *vIn = (const float *) input; + uint vert; + for (vert = 0; vert < max_vertices; vert++) { +#if 0 + if (attr==0) + printf("Input v%d a%d: %f %f %f %f\n", + vert, attr, vIn[0], vIn[1], vIn[2], vIn[3]); +#endif + inputs_soa[attr][0][vert] = vIn[attr * 4 + 0]; + inputs_soa[attr][1][vert] = vIn[attr * 4 + 1]; + inputs_soa[attr][2][vert] = vIn[attr * 4 + 2]; + inputs_soa[attr][3][vert] = vIn[attr * 4 + 3]; + vIn += input_stride / 4; + } + } + + /* run compiled shader + */ + shader->func(inputs_soa, outputs_soa, temps_soa, + (float (*)[4]) shader->base.immediates, + (float (*)[4]) constants, + ppc_builtin_constants); + + /* convert (up to) four output verts from SoA back to AoS format */ + for (attr = 0; attr < base->info.num_outputs; attr++) { + float *vOut = (float *) output; + uint vert; + for (vert = 0; vert < max_vertices; vert++) { + vOut[attr * 4 + 0] = outputs_soa[attr][0][vert]; + vOut[attr * 4 + 1] = outputs_soa[attr][1][vert]; + vOut[attr * 4 + 2] = outputs_soa[attr][2][vert]; + vOut[attr * 4 + 3] = outputs_soa[attr][3][vert]; +#if 0 + if (attr==0) + printf("Output v%d a%d: %f %f %f %f\n", + vert, attr, vOut[0], vOut[1], vOut[2], vOut[3]); +#endif + vOut += output_stride / 4; + } + } + + /* advance to next group of four input/output verts */ + input = (const float (*)[4])((const char *)input + input_stride * max_vertices); + output = (float (*)[4])((char *)output + output_stride * max_vertices); + } +} + + +static void +vs_ppc_delete( struct draw_vertex_shader *base ) +{ + struct draw_ppc_vertex_shader *shader = (struct draw_ppc_vertex_shader *)base; + + ppc_release_func( &shader->ppc_program ); + + align_free( (void *) shader->base.immediates ); + + FREE( (void*) shader->base.state.tokens ); + FREE( shader ); +} + + +struct draw_vertex_shader * +draw_create_vs_ppc(struct draw_context *draw, + const struct pipe_shader_state *templ) +{ + struct draw_ppc_vertex_shader *vs; + + vs = CALLOC_STRUCT( draw_ppc_vertex_shader ); + if (vs == NULL) + return NULL; + + /* we make a private copy of the tokens */ + vs->base.state.tokens = tgsi_dup_tokens(templ->tokens); + if (!vs->base.state.tokens) + goto fail; + + tgsi_scan_shader(templ->tokens, &vs->base.info); + + vs->base.draw = draw; +#if 0 + if (1) + vs->base.create_varient = draw_vs_varient_aos_ppc; + else +#endif + vs->base.create_varient = draw_vs_varient_generic; + vs->base.prepare = vs_ppc_prepare; + vs->base.run_linear = vs_ppc_run_linear; + vs->base.delete = vs_ppc_delete; + + vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 * + sizeof(float), 16); + + ppc_init_func( &vs->ppc_program ); + +#if 0 + ppc_print_code(&vs->ppc_program, TRUE); + ppc_indent(&vs->ppc_program, 8); +#endif + + if (!tgsi_emit_ppc( (struct tgsi_token *) vs->base.state.tokens, + &vs->ppc_program, + (float (*)[4]) vs->base.immediates, + TRUE )) + goto fail; + + vs->func = (codegen_function) ppc_get_func( &vs->ppc_program ); + if (!vs->func) { + goto fail; + } + + return &vs->base; + +fail: + /* + debug_error("tgsi_emit_ppc() failed, falling back to interpreter\n"); + */ + + ppc_release_func( &vs->ppc_program ); + + FREE(vs); + return NULL; +} + + + +#else /* PIPE_ARCH_PPC */ + + +struct draw_vertex_shader * +draw_create_vs_ppc( struct draw_context *draw, + const struct pipe_shader_state *templ ) +{ + return (void *) 0; +} + + +#endif /* PIPE_ARCH_PPC */ diff --git a/src/gallium/auxiliary/gallivm/Makefile b/src/gallium/auxiliary/gallivm/Makefile index c3f7bfba93..5a96d94ec3 100644 --- a/src/gallium/auxiliary/gallivm/Makefile +++ b/src/gallium/auxiliary/gallivm/Makefile @@ -66,12 +66,12 @@ depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(INC_SOURCES) gallivm_builtins.cpp: llvm_builtins.c clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp1.bin - (echo "static const unsigned char llvm_builtins_data[] = {"; od -txC temp1.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/};/") >$@ + (echo "static const unsigned char llvm_builtins_data[] = {"; od -txC temp1.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/,0x00};/") >$@ rm temp1.bin gallivmsoabuiltins.cpp: soabuiltins.c clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp2.bin - (echo "static const unsigned char soabuiltins_data[] = {"; od -txC temp2.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/};/") >$@ + (echo "static const unsigned char soabuiltins_data[] = {"; od -txC temp2.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/,0x00};/") >$@ rm temp2.bin # Emacs tags diff --git a/src/gallium/auxiliary/gallivm/gallivm.cpp b/src/gallium/auxiliary/gallivm/gallivm.cpp index 29adeea47d..f4af5cc8ad 100644 --- a/src/gallium/auxiliary/gallivm/gallivm.cpp +++ b/src/gallium/auxiliary/gallivm/gallivm.cpp @@ -53,7 +53,7 @@ #include <llvm/ModuleProvider.h> #include <llvm/Pass.h> #include <llvm/PassManager.h> -#include <llvm/ParameterAttributes.h> +#include <llvm/Attributes.h> #include <llvm/Support/PatternMatch.h> #include <llvm/ExecutionEngine/JIT.h> #include <llvm/ExecutionEngine/Interpreter.h> diff --git a/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp b/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp index fcc5c05794..634bac0150 100644 --- a/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp +++ b/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp @@ -137,4 +137,4 @@ static const unsigned char llvm_builtins_data[] = { 0x58,0x85,0x05,0x14,0xbe,0x34,0x45,0xb5,0x21,0x10,0x82,0x23,0x15,0x46,0x30,0x2c, 0xc8,0x64,0x02,0x06,0xf0,0x3c,0x91,0x73,0x19,0x00,0xe1,0x4b,0x53,0x64,0x0a,0x84, 0x84,0x34,0x85,0x25,0x0c,0x92,0x20,0x59,0xc1,0x20,0x30,0x8f,0x2d,0x10,0x95,0x84, -0x34,0x00,0x00,0x00,0x00,0x00,0x00,0x00}; +0x34,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}; diff --git a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp index 3a4a41e544..1bd00a0c2a 100644 --- a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp +++ b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp @@ -56,7 +56,7 @@ #include <llvm/ModuleProvider.h> #include <llvm/Pass.h> #include <llvm/PassManager.h> -#include <llvm/ParameterAttributes.h> +#include <llvm/Attributes.h> #include <llvm/Support/PatternMatch.h> #include <llvm/ExecutionEngine/JIT.h> #include <llvm/ExecutionEngine/Interpreter.h> @@ -158,8 +158,8 @@ void gallivm_cpu_jit_compile(struct gallivm_cpu_engine *cpu, struct gallivm_prog llvm::ExistingModuleProvider *mp = new llvm::ExistingModuleProvider(mod); llvm::ExecutionEngine *ee = cpu->engine; assert(ee); - /*FIXME : remove */ - ee->DisableLazyCompilation(); + /*FIXME : why was this disabled ? we need it for pow/sqrt/... */ + ee->DisableLazyCompilation(false); ee->addModuleProvider(mp); llvm::Function *func = func_for_shader(prog); @@ -179,8 +179,7 @@ struct gallivm_cpu_engine * gallivm_global_cpu_engine() typedef void (*vertex_shader_runner)(void *ainputs, void *dests, - float (*aconsts)[4], - void *temps); + float (*aconsts)[4]); #define MAX_TGSI_VERTICES 4 /*! @@ -202,7 +201,6 @@ int gallivm_cpu_vs_exec(struct gallivm_prog *prog, unsigned int i, j; unsigned slot; vertex_shader_runner runner = reinterpret_cast<vertex_shader_runner>(prog->function); - assert(runner); for (i = 0; i < count; i += MAX_TGSI_VERTICES) { @@ -224,8 +222,7 @@ int gallivm_cpu_vs_exec(struct gallivm_prog *prog, /* run shader */ runner(machine->Inputs, machine->Outputs, - (float (*)[4]) constants, - machine->Temps); + (float (*)[4]) constants); /* Unswizzle all output results */ diff --git a/src/gallium/auxiliary/gallivm/gallivm_p.h b/src/gallium/auxiliary/gallivm/gallivm_p.h index ebf3e11cd5..d2c5852bdf 100644 --- a/src/gallium/auxiliary/gallivm/gallivm_p.h +++ b/src/gallium/auxiliary/gallivm/gallivm_p.h @@ -101,10 +101,10 @@ static INLINE int gallivm_w_swizzle(int swizzle) return w; } -#endif /* MESA_LLVM */ - #if defined __cplusplus } #endif +#endif /* MESA_LLVM */ + #endif diff --git a/src/gallium/auxiliary/gallivm/instructions.cpp b/src/gallium/auxiliary/gallivm/instructions.cpp index 599975d5ad..ee8162efce 100644 --- a/src/gallium/auxiliary/gallivm/instructions.cpp +++ b/src/gallium/auxiliary/gallivm/instructions.cpp @@ -43,7 +43,7 @@ #include <llvm/Function.h> #include <llvm/InstrTypes.h> #include <llvm/Instructions.h> -#include <llvm/ParameterAttributes.h> +#include <llvm/Attributes.h> #include <llvm/Support/MemoryBuffer.h> #include <llvm/Bitcode/ReaderWriter.h> diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp index a658072551..925e948763 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.cpp +++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp @@ -37,7 +37,7 @@ #include <llvm/Function.h> #include <llvm/Instructions.h> #include <llvm/Transforms/Utils/Cloning.h> -#include <llvm/ParameterAttributes.h> +#include <llvm/Attributes.h> #include <llvm/Support/MemoryBuffer.h> #include <llvm/Bitcode/ReaderWriter.h> @@ -90,68 +90,11 @@ llvm::Value * InstructionsSoa::vectorFromVals(llvm::Value *x, llvm::Value *y, return res; } -std::vector<llvm::Value*> InstructionsSoa::arl(const std::vector<llvm::Value*> in) -{ - std::vector<llvm::Value*> res(4); - - //Extract x's - llvm::Value *x1 = m_builder.CreateExtractElement(in[0], - m_storage->constantInt(0), - name("extractX")); - //cast it to an unsigned int - x1 = m_builder.CreateFPToUI(x1, IntegerType::get(32), name("x1IntCast")); - - res[0] = x1;//vectorFromVals(x1, x2, x3, x4); - //only x is valid. the others shouldn't be necessary - /* - res[1] = Constant::getNullValue(m_floatVecType); - res[2] = Constant::getNullValue(m_floatVecType); - res[3] = Constant::getNullValue(m_floatVecType); - */ - - return res; -} - - -std::vector<llvm::Value*> InstructionsSoa::add(const std::vector<llvm::Value*> in1, - const std::vector<llvm::Value*> in2) -{ - std::vector<llvm::Value*> res(4); - - res[0] = m_builder.CreateAdd(in1[0], in2[0], name("addx")); - res[1] = m_builder.CreateAdd(in1[1], in2[1], name("addy")); - res[2] = m_builder.CreateAdd(in1[2], in2[2], name("addz")); - res[3] = m_builder.CreateAdd(in1[3], in2[3], name("addw")); - - return res; -} - -std::vector<llvm::Value*> InstructionsSoa::mul(const std::vector<llvm::Value*> in1, - const std::vector<llvm::Value*> in2) -{ - std::vector<llvm::Value*> res(4); - - res[0] = m_builder.CreateMul(in1[0], in2[0], name("mulx")); - res[1] = m_builder.CreateMul(in1[1], in2[1], name("muly")); - res[2] = m_builder.CreateMul(in1[2], in2[2], name("mulz")); - res[3] = m_builder.CreateMul(in1[3], in2[3], name("mulw")); - - return res; -} - void InstructionsSoa::end() { m_builder.CreateRetVoid(); } -std::vector<llvm::Value*> InstructionsSoa::madd(const std::vector<llvm::Value*> in1, - const std::vector<llvm::Value*> in2, - const std::vector<llvm::Value*> in3) -{ - std::vector<llvm::Value*> res = mul(in1, in2); - return add(res, in3); -} - std::vector<llvm::Value*> InstructionsSoa::extractVector(llvm::Value *vector) { std::vector<llvm::Value*> res(4); @@ -171,6 +114,11 @@ std::vector<llvm::Value*> InstructionsSoa::extractVector(llvm::Value *vector) return res; } +llvm::IRBuilder<>* InstructionsSoa::getIRBuilder() +{ + return &m_builder; +} + void InstructionsSoa::createFunctionMap() { m_functionsMap[TGSI_OPCODE_ABS] = "abs"; @@ -258,11 +206,12 @@ llvm::Module * InstructionsSoa::currentModule() const void InstructionsSoa::createBuiltins() { + std::string ErrMsg; MemoryBuffer *buffer = MemoryBuffer::getMemBuffer( (const char*)&soabuiltins_data[0], - (const char*)&soabuiltins_data[Elements(soabuiltins_data)]); - m_builtins = ParseBitcodeFile(buffer); - std::cout<<"Builtins created at "<<m_builtins<<std::endl; + (const char*)&soabuiltins_data[Elements(soabuiltins_data) - 1]); + m_builtins = ParseBitcodeFile(buffer, &ErrMsg); + std::cout<<"Builtins created at "<<m_builtins<<" ("<<ErrMsg<<")"<<std::endl; assert(m_builtins); createDependencies(); } @@ -274,6 +223,41 @@ std::vector<llvm::Value*> InstructionsSoa::abs(const std::vector<llvm::Value*> i return callBuiltin(func, in1); } +std::vector<llvm::Value*> InstructionsSoa::add(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + std::vector<llvm::Value*> res(4); + + res[0] = m_builder.CreateAdd(in1[0], in2[0], name("addx")); + res[1] = m_builder.CreateAdd(in1[1], in2[1], name("addy")); + res[2] = m_builder.CreateAdd(in1[2], in2[2], name("addz")); + res[3] = m_builder.CreateAdd(in1[3], in2[3], name("addw")); + + return res; +} + +std::vector<llvm::Value*> InstructionsSoa::arl(const std::vector<llvm::Value*> in) +{ + std::vector<llvm::Value*> res(4); + + //Extract x's + llvm::Value *x1 = m_builder.CreateExtractElement(in[0], + m_storage->constantInt(0), + name("extractX")); + //cast it to an unsigned int + x1 = m_builder.CreateFPToUI(x1, IntegerType::get(32), name("x1IntCast")); + + res[0] = x1;//vectorFromVals(x1, x2, x3, x4); + //only x is valid. the others shouldn't be necessary + /* + res[1] = Constant::getNullValue(m_floatVecType); + res[2] = Constant::getNullValue(m_floatVecType); + res[3] = Constant::getNullValue(m_floatVecType); + */ + + return res; +} + std::vector<llvm::Value*> InstructionsSoa::dp3(const std::vector<llvm::Value*> in1, const std::vector<llvm::Value*> in2) { @@ -281,6 +265,59 @@ std::vector<llvm::Value*> InstructionsSoa::dp3(const std::vector<llvm::Value*> i return callBuiltin(func, in1, in2); } +std::vector<llvm::Value*> InstructionsSoa::lit(const std::vector<llvm::Value*> in) +{ + llvm::Function *func = function(TGSI_OPCODE_LIT); + return callBuiltin(func, in); +} + +std::vector<llvm::Value*> InstructionsSoa::madd(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2, + const std::vector<llvm::Value*> in3) +{ + std::vector<llvm::Value*> res = mul(in1, in2); + return add(res, in3); +} + +std::vector<llvm::Value*> InstructionsSoa::max(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + llvm::Function *func = function(TGSI_OPCODE_MAX); + return callBuiltin(func, in1, in2); +} + +std::vector<llvm::Value*> InstructionsSoa::min(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + llvm::Function *func = function(TGSI_OPCODE_MIN); + return callBuiltin(func, in1, in2); +} + +std::vector<llvm::Value*> InstructionsSoa::mul(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + std::vector<llvm::Value*> res(4); + + res[0] = m_builder.CreateMul(in1[0], in2[0], name("mulx")); + res[1] = m_builder.CreateMul(in1[1], in2[1], name("muly")); + res[2] = m_builder.CreateMul(in1[2], in2[2], name("mulz")); + res[3] = m_builder.CreateMul(in1[3], in2[3], name("mulw")); + + return res; +} + +std::vector<llvm::Value*> InstructionsSoa::pow(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + llvm::Function *func = function(TGSI_OPCODE_POWER); + return callBuiltin(func, in1, in2); +} + +std::vector<llvm::Value*> InstructionsSoa::rsq(const std::vector<llvm::Value*> in) +{ + llvm::Function *func = function(TGSI_OPCODE_RSQ); + return callBuiltin(func, in); +} std::vector<llvm::Value*> InstructionsSoa::slt(const std::vector<llvm::Value*> in1, const std::vector<llvm::Value*> in2) @@ -289,6 +326,37 @@ std::vector<llvm::Value*> InstructionsSoa::slt(const std::vector<llvm::Value*> i return callBuiltin(func, in1, in2); } +std::vector<llvm::Value*> InstructionsSoa::sub(const std::vector<llvm::Value*> in1, + const std::vector<llvm::Value*> in2) +{ + std::vector<llvm::Value*> res(4); + + res[0] = m_builder.CreateSub(in1[0], in2[0], name("subx")); + res[1] = m_builder.CreateSub(in1[1], in2[1], name("suby")); + res[2] = m_builder.CreateSub(in1[2], in2[2], name("subz")); + res[3] = m_builder.CreateSub(in1[3], in2[3], name("subw")); + + return res; +} + +void checkFunction(Function *func) +{ + for (Function::const_iterator BI = func->begin(), BE = func->end(); + BI != BE; ++BI) { + const BasicBlock &BB = *BI; + for (BasicBlock::const_iterator II = BB.begin(), IE = BB.end(); + II != IE; ++II) { + const Instruction &I = *II; + std::cout<< "Instr = "<<I; + for (unsigned op = 0, E = I.getNumOperands(); op != E; ++op) { + const Value *Op = I.getOperand(op); + std::cout<< "\top = "<<Op<<"("<<op<<")"<<std::endl; + //I->setOperand(op, V); + } + } + } +} + llvm::Value * InstructionsSoa::allocaTemp() { VectorType *vector = VectorType::get(Type::FloatTy, 4); @@ -408,46 +476,6 @@ std::vector<Value*> InstructionsSoa::callBuiltin(llvm::Function *func, const std return allocaToResult(allocaPtr); } -std::vector<llvm::Value*> InstructionsSoa::pow(const std::vector<llvm::Value*> in1, - const std::vector<llvm::Value*> in2) -{ - llvm::Function *func = function(TGSI_OPCODE_POWER); - return callBuiltin(func, in1, in2); -} - -std::vector<llvm::Value*> InstructionsSoa::min(const std::vector<llvm::Value*> in1, - const std::vector<llvm::Value*> in2) -{ - llvm::Function *func = function(TGSI_OPCODE_MIN); - return callBuiltin(func, in1, in2); -} - - -std::vector<llvm::Value*> InstructionsSoa::max(const std::vector<llvm::Value*> in1, - const std::vector<llvm::Value*> in2) -{ - llvm::Function *func = function(TGSI_OPCODE_MAX); - return callBuiltin(func, in1, in2); -} - -void checkFunction(Function *func) -{ - for (Function::const_iterator BI = func->begin(), BE = func->end(); - BI != BE; ++BI) { - const BasicBlock &BB = *BI; - for (BasicBlock::const_iterator II = BB.begin(), IE = BB.end(); - II != IE; ++II) { - const Instruction &I = *II; - std::cout<< "Instr = "<<I; - for (unsigned op = 0, E = I.getNumOperands(); op != E; ++op) { - const Value *Op = I.getOperand(op); - std::cout<< "\top = "<<Op<<"("<<op<<")"<<std::endl; - //I->setOperand(op, V); - } - } - } -} - void InstructionsSoa::injectFunction(llvm::Function *originalFunc, int op) { assert(originalFunc); @@ -492,28 +520,4 @@ void InstructionsSoa::injectFunction(llvm::Function *originalFunc, int op) } } -std::vector<llvm::Value*> InstructionsSoa::sub(const std::vector<llvm::Value*> in1, - const std::vector<llvm::Value*> in2) -{ - std::vector<llvm::Value*> res(4); - - res[0] = m_builder.CreateSub(in1[0], in2[0], name("subx")); - res[1] = m_builder.CreateSub(in1[1], in2[1], name("suby")); - res[2] = m_builder.CreateSub(in1[2], in2[2], name("subz")); - res[3] = m_builder.CreateSub(in1[3], in2[3], name("subw")); - - return res; -} - -std::vector<llvm::Value*> InstructionsSoa::lit(const std::vector<llvm::Value*> in) -{ - llvm::Function *func = function(TGSI_OPCODE_LIT); - return callBuiltin(func, in); -} - -std::vector<llvm::Value*> InstructionsSoa::rsq(const std::vector<llvm::Value*> in) -{ - llvm::Function *func = function(TGSI_OPCODE_RSQ); - return callBuiltin(func, in); -} diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.h b/src/gallium/auxiliary/gallivm/instructionssoa.h index 3817fdc904..d6831e0a6b 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.h +++ b/src/gallium/auxiliary/gallivm/instructionssoa.h @@ -76,6 +76,7 @@ public: void end(); std::vector<llvm::Value*> extractVector(llvm::Value *vector); + llvm::IRBuilder<>* getIRBuilder(); private: const char * name(const char *prefix) const; llvm::Value *vectorFromVals(llvm::Value *x, llvm::Value *y, diff --git a/src/gallium/auxiliary/gallivm/storage.cpp b/src/gallium/auxiliary/gallivm/storage.cpp index 6f373f6dd5..73df24c976 100644 --- a/src/gallium/auxiliary/gallivm/storage.cpp +++ b/src/gallium/auxiliary/gallivm/storage.cpp @@ -323,7 +323,7 @@ llvm::Value * Storage::elemIdx(llvm::Value *ptr, int idx, if (indIdx) { getElem = GetElementPtrInst::Create(ptr, - BinaryOperator::create(Instruction::Add, + BinaryOperator::Create(Instruction::Add, indIdx, constantInt(idx), name("add"), diff --git a/src/gallium/auxiliary/gallivm/storagesoa.cpp b/src/gallium/auxiliary/gallivm/storagesoa.cpp index 78d754371f..4984ce985c 100644 --- a/src/gallium/auxiliary/gallivm/storagesoa.cpp +++ b/src/gallium/auxiliary/gallivm/storagesoa.cpp @@ -30,7 +30,7 @@ #include "gallivm_p.h" #include "pipe/p_shader_tokens.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include <llvm/BasicBlock.h> #include <llvm/Module.h> @@ -48,13 +48,11 @@ using namespace llvm; StorageSoa::StorageSoa(llvm::BasicBlock *block, llvm::Value *input, llvm::Value *output, - llvm::Value *consts, - llvm::Value *temps) + llvm::Value *consts) : m_block(block), m_input(input), m_output(output), m_consts(consts), - m_temps(temps), m_immediates(0), m_idx(0) { @@ -93,7 +91,7 @@ void StorageSoa::declareImmediates() std::vector<float> vals(4); std::vector<Constant*> channelArray; - vals[0] = vec[0]; vals[1] = vec[0]; vals[2] = vec[0]; vals[3] = vec[0]; + vals[0] = vec[0]; vals[1] = vec[1]; vals[2] = vec[2]; vals[3] = vec[3]; llvm::Constant *xChannel = createConstGlobalVector(vals); vals[0] = vec[1]; vals[1] = vec[1]; vals[2] = vec[1]; vals[3] = vec[1]; @@ -144,22 +142,43 @@ std::vector<llvm::Value*> StorageSoa::inputElement(llvm::Value *idx) return res; } -std::vector<llvm::Value*> StorageSoa::constElement(llvm::Value *idx) +llvm::Value* StorageSoa::unpackConstElement(llvm::IRBuilder<>* m_builder, llvm::Value* vector, int cc) { - std::vector<llvm::Value*> res(4); - llvm::Value *xChannel, *yChannel, *zChannel, *wChannel; + std::vector<llvm::Value*> x(4); + x[0] = m_builder->CreateExtractElement(vector, + constantInt(cc), + name("x")); + + VectorType *vectorType = VectorType::get(Type::FloatTy, 4); + Constant *constVector = Constant::getNullValue(vectorType); + Value *res = m_builder->CreateInsertElement(constVector, x[0], + constantInt(0), + name("vecx")); + res = m_builder->CreateInsertElement(res, x[0], constantInt(1), + name("vecxx")); + res = m_builder->CreateInsertElement(res, x[0], constantInt(2), + name("vecxxx")); + res = m_builder->CreateInsertElement(res, x[0], constantInt(3), + name("vecxxxx")); + return res; +} + +std::vector<llvm::Value*> StorageSoa::constElement(llvm::IRBuilder<>* m_builder, llvm::Value *idx) +{ + llvm::Value* res; + std::vector<llvm::Value*> res2(4); + llvm::Value *xChannel; xChannel = elementPointer(m_consts, idx, 0); - yChannel = elementPointer(m_consts, idx, 1); - zChannel = elementPointer(m_consts, idx, 2); - wChannel = elementPointer(m_consts, idx, 3); - res[0] = alignedArrayLoad(xChannel); - res[1] = alignedArrayLoad(yChannel); - res[2] = alignedArrayLoad(zChannel); - res[3] = alignedArrayLoad(wChannel); + res = alignedArrayLoad(xChannel); - return res; + res2[0]=unpackConstElement(m_builder, res,0); + res2[1]=unpackConstElement(m_builder, res,1); + res2[2]=unpackConstElement(m_builder, res,2); + res2[3]=unpackConstElement(m_builder, res,3); + + return res2; } std::vector<llvm::Value*> StorageSoa::outputElement(llvm::Value *idx) @@ -174,14 +193,15 @@ std::vector<llvm::Value*> StorageSoa::outputElement(llvm::Value *idx) return res; } -std::vector<llvm::Value*> StorageSoa::tempElement(llvm::Value *idx) +std::vector<llvm::Value*> StorageSoa::tempElement(llvm::IRBuilder<>* m_builder, int idx) { std::vector<llvm::Value*> res(4); + llvm::Value *temp = m_temps[idx]; - res[0] = element(m_temps, idx, 0); - res[1] = element(m_temps, idx, 1); - res[2] = element(m_temps, idx, 2); - res[3] = element(m_temps, idx, 3); + res[0] = element(temp, constantInt(0), 0); + res[1] = element(temp, constantInt(0), 1); + res[2] = element(temp, constantInt(0), 2); + res[3] = element(temp, constantInt(0), 3); return res; } @@ -260,6 +280,12 @@ llvm::Module * StorageSoa::currentModule() const return m_block->getParent()->getParent(); } +llvm::Constant * StorageSoa::createConstGlobalFloat(const float val) +{ + Constant*c = ConstantFP::get(APFloat(val)); + return c; +} + llvm::Constant * StorageSoa::createConstGlobalVector(const std::vector<float> &vec) { VectorType *vectorType = VectorType::get(Type::FloatTy, 4); @@ -278,7 +304,7 @@ llvm::Constant * StorageSoa::createConstGlobalVector(const std::vector<float> &v } std::vector<llvm::Value*> StorageSoa::load(enum tgsi_file_type type, int idx, int swizzle, - llvm::Value *indIdx) + llvm::IRBuilder<>* m_builder,llvm::Value *indIdx) { std::vector<llvm::Value*> val(4); @@ -299,10 +325,10 @@ std::vector<llvm::Value*> StorageSoa::load(enum tgsi_file_type type, int idx, in val = outputElement(realIndex); break; case TGSI_FILE_TEMPORARY: - val = tempElement(realIndex); + val = tempElement(m_builder, idx); break; case TGSI_FILE_CONSTANT: - val = constElement(realIndex); + val = constElement(m_builder, realIndex); break; case TGSI_FILE_IMMEDIATE: val = immediateElement(realIndex); @@ -328,19 +354,39 @@ std::vector<llvm::Value*> StorageSoa::load(enum tgsi_file_type type, int idx, in return res; } +llvm::Value * StorageSoa::allocaTemp(llvm::IRBuilder<>* m_builder) +{ + VectorType *vector = VectorType::get(Type::FloatTy, 4); + ArrayType *vecArray = ArrayType::get(vector, 4); + AllocaInst *alloca = new AllocaInst(vecArray, "temp", + m_builder->GetInsertBlock()); + + return alloca; +} + + void StorageSoa::store(enum tgsi_file_type type, int idx, const std::vector<llvm::Value*> &val, - int mask) + int mask, llvm::IRBuilder<>* m_builder) { llvm::Value *out = 0; + llvm::Value *realIndex = 0; switch(type) { case TGSI_FILE_OUTPUT: out = m_output; + realIndex = constantInt(idx); break; case TGSI_FILE_TEMPORARY: - out = m_temps; + // if that temp doesn't already exist, alloca it + if (m_temps.find(idx) == m_temps.end()) + m_temps[idx] = allocaTemp(m_builder); + + out = m_temps[idx]; + + realIndex = constantInt(0); break; case TGSI_FILE_INPUT: out = m_input; + realIndex = constantInt(idx); break; case TGSI_FILE_ADDRESS: { llvm::Value *addr = m_addresses[idx]; @@ -358,7 +404,6 @@ void StorageSoa::store(enum tgsi_file_type type, int idx, const std::vector<llvm assert(0); break; } - llvm::Value *realIndex = constantInt(idx); if ((mask & TGSI_WRITEMASK_X)) { llvm::Value *xChannel = elementPointer(out, realIndex, 0); new StoreInst(val[0], xChannel, false, m_block); diff --git a/src/gallium/auxiliary/gallivm/storagesoa.h b/src/gallium/auxiliary/gallivm/storagesoa.h index ae2fc7c6ae..56886f85e7 100644 --- a/src/gallium/auxiliary/gallivm/storagesoa.h +++ b/src/gallium/auxiliary/gallivm/storagesoa.h @@ -29,6 +29,7 @@ #define STORAGESOA_H #include <pipe/p_shader_tokens.h> +#include <llvm/Support/IRBuilder.h> #include <vector> #include <list> @@ -51,14 +52,13 @@ public: StorageSoa(llvm::BasicBlock *block, llvm::Value *input, llvm::Value *output, - llvm::Value *consts, - llvm::Value *temps); + llvm::Value *consts); std::vector<llvm::Value*> load(enum tgsi_file_type type, int idx, int swizzle, - llvm::Value *indIdx =0); + llvm::IRBuilder<>* m_builder, llvm::Value *indIdx =0); void store(enum tgsi_file_type type, int idx, const std::vector<llvm::Value*> &val, - int mask); + int mask, llvm::IRBuilder<>* m_builder); void addImmediate(float *vec); void declareImmediates(); @@ -76,12 +76,14 @@ private: const char *name(const char *prefix) const; llvm::Value *alignedArrayLoad(llvm::Value *val); llvm::Module *currentModule() const; + llvm::Constant *createConstGlobalFloat(const float val); llvm::Constant *createConstGlobalVector(const std::vector<float> &vec); std::vector<llvm::Value*> inputElement(llvm::Value *indIdx); - std::vector<llvm::Value*> constElement(llvm::Value *indIdx); + llvm::Value* unpackConstElement(llvm::IRBuilder<>* m_builder, llvm::Value *indIdx, int cc); + std::vector<llvm::Value*> constElement(llvm::IRBuilder<>* m_builder, llvm::Value *indIdx); std::vector<llvm::Value*> outputElement(llvm::Value *indIdx); - std::vector<llvm::Value*> tempElement(llvm::Value *indIdx); + std::vector<llvm::Value*> tempElement(llvm::IRBuilder<>* m_builder, int idx); std::vector<llvm::Value*> immediateElement(llvm::Value *indIdx); private: llvm::BasicBlock *m_block; @@ -89,12 +91,13 @@ private: llvm::Value *m_input; llvm::Value *m_output; llvm::Value *m_consts; - llvm::Value *m_temps; + std::map<int, llvm::Value*> m_temps; llvm::GlobalVariable *m_immediates; std::map<int, llvm::Value*> m_addresses; std::vector<std::vector<float> > m_immediatesToFlush; + llvm::Value * allocaTemp(llvm::IRBuilder<>* m_builder); mutable std::map<int, llvm::ConstantInt*> m_constInts; mutable char m_name[32]; diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp index 7292c0e366..5b08200d14 100644 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp @@ -25,7 +25,7 @@ #include <llvm/ModuleProvider.h> #include <llvm/Pass.h> #include <llvm/PassManager.h> -#include <llvm/ParameterAttributes.h> +#include <llvm/Attributes.h> #include <llvm/Support/PatternMatch.h> #include <llvm/ExecutionEngine/JIT.h> #include <llvm/ExecutionEngine/Interpreter.h> @@ -52,8 +52,7 @@ static inline FunctionType *vertexShaderFunctionType() // pass are castable to the following: // [4 x <4 x float>] inputs, // [4 x <4 x float>] output, - // [4 x [4 x float]] consts, - // [4 x <4 x float>] temps + // [4 x [1 x float]] consts, std::vector<const Type*> funcArgs; VectorType *vectorType = VectorType::get(Type::FloatTy, 4); @@ -61,13 +60,12 @@ static inline FunctionType *vertexShaderFunctionType() PointerType *vectorArrayPtr = PointerType::get(vectorArray, 0); ArrayType *floatArray = ArrayType::get(Type::FloatTy, 4); - ArrayType *constsArray = ArrayType::get(floatArray, 4); + ArrayType *constsArray = ArrayType::get(floatArray, 1); PointerType *constsArrayPtr = PointerType::get(constsArray, 0); funcArgs.push_back(vectorArrayPtr);//inputs funcArgs.push_back(vectorArrayPtr);//output funcArgs.push_back(constsArrayPtr);//consts - funcArgs.push_back(vectorArrayPtr);//temps FunctionType *functionType = FunctionType::get( /*Result=*/Type::VoidTy, @@ -162,7 +160,7 @@ translate_immediate(Storage *storage, { float vec[4]; int i; - for (i = 0; i < imm->Immediate.Size - 1; ++i) { + for (i = 0; i < imm->Immediate.NrTokens - 1; ++i) { switch (imm->Immediate.DataType) { case TGSI_IMM_FLOAT32: vec[i] = imm->u.ImmediateFloat32[i].Float; @@ -181,7 +179,7 @@ translate_immediateir(StorageSoa *storage, { float vec[4]; int i; - for (i = 0; i < imm->Immediate.Size - 1; ++i) { + for (i = 0; i < imm->Immediate.NrTokens - 1; ++i) { switch (imm->Immediate.DataType) { case TGSI_IMM_FLOAT32: vec[i] = imm->u.ImmediateFloat32[i].Float; @@ -707,9 +705,8 @@ translate_instructionir(llvm::Module *module, if (src->SrcRegister.Indirect) { indIdx = storage->addrElement(src->SrcRegisterInd.Index); } - val = storage->load((enum tgsi_file_type)src->SrcRegister.File, - src->SrcRegister.Index, swizzle, indIdx); + src->SrcRegister.Index, swizzle, instr->getIRBuilder(), indIdx); inputs[i] = val; } @@ -1025,9 +1022,9 @@ translate_instructionir(llvm::Module *module, /* store results */ for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) { struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; - storage->store((enum tgsi_file_type)dst->DstRegister.File, - dst->DstRegister.Index, out, dst->DstRegister.WriteMask); + dst->DstRegister.Index, out, dst->DstRegister.WriteMask, + instr->getIRBuilder() ); } } @@ -1122,8 +1119,6 @@ llvm::Module * tgsi_to_llvmir(struct gallivm_ir *ir, output->setName("outputs"); Value *consts = args++; consts->setName("consts"); - Value *temps = args++; - temps->setName("temps"); BasicBlock *label_entry = BasicBlock::Create("entry", shader, 0); @@ -1132,7 +1127,7 @@ llvm::Module * tgsi_to_llvmir(struct gallivm_ir *ir, fi = tgsi_default_full_instruction(); fd = tgsi_default_full_declaration(); - StorageSoa storage(label_entry, input, output, consts, temps); + StorageSoa storage(label_entry, input, output, consts); InstructionsSoa instr(mod, shader, label_entry, &storage); while(!tgsi_parse_end_of_tokens(&parse)) { diff --git a/src/gallium/auxiliary/indices/Makefile b/src/gallium/auxiliary/indices/Makefile new file mode 100644 index 0000000000..25ee899c40 --- /dev/null +++ b/src/gallium/auxiliary/indices/Makefile @@ -0,0 +1,12 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = indices + +C_SOURCES = \ + u_indices_gen.c + +include ../../Makefile.template + +u_indices_gen.c: u_indices_gen.py + python $< > $@ diff --git a/src/gallium/auxiliary/indices/SConscript b/src/gallium/auxiliary/indices/SConscript new file mode 100644 index 0000000000..65a43a9f64 --- /dev/null +++ b/src/gallium/auxiliary/indices/SConscript @@ -0,0 +1,17 @@ +Import('*') + +env.CodeGenerate( + target = 'u_indices_gen.c', + script = 'u_indices_gen.py', + source = [], + command = 'python $SCRIPT > $TARGET' +) + +indices = env.ConvenienceLibrary( + target = 'indices', + source = [ +# 'u_indices.c', + 'u_indices_gen.c', + ]) + +auxiliaries.insert(0, indices) diff --git a/src/gallium/auxiliary/indices/u_indices.c b/src/gallium/auxiliary/indices/u_indices.c new file mode 100644 index 0000000000..0cf7d88653 --- /dev/null +++ b/src/gallium/auxiliary/indices/u_indices.c @@ -0,0 +1,253 @@ +/* + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * VMWARE AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "u_indices.h" +#include "u_indices_priv.h" + +static void translate_memcpy_ushort( const void *in, + unsigned nr, + void *out ) +{ + memcpy(out, in, nr*sizeof(short)); +} + +static void translate_memcpy_uint( const void *in, + unsigned nr, + void *out ) +{ + memcpy(out, in, nr*sizeof(int)); +} + + +int u_index_translator( unsigned hw_mask, + unsigned prim, + unsigned in_index_size, + unsigned nr, + unsigned in_pv, + unsigned out_pv, + unsigned *out_prim, + unsigned *out_index_size, + unsigned *out_nr, + u_translate_func *out_translate ) +{ + unsigned in_idx; + unsigned out_idx; + int ret = U_TRANSLATE_NORMAL; + + u_index_init(); + + in_idx = in_size_idx(in_index_size); + *out_index_size = (in_index_size == 4) ? 4 : 2; + out_idx = out_size_idx(*out_index_size); + + if ((hw_mask & (1<<prim)) && + in_index_size == *out_index_size && + in_pv == out_pv) + { + if (in_index_size == 4) + *out_translate = translate_memcpy_uint; + else + *out_translate = translate_memcpy_ushort; + + *out_prim = prim; + *out_nr = nr; + + return U_TRANSLATE_MEMCPY; + } + else { + switch (prim) { + case PIPE_PRIM_POINTS: + *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_POINTS; + *out_nr = nr; + break; + + case PIPE_PRIM_LINES: + *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_LINES; + *out_nr = nr; + break; + + case PIPE_PRIM_LINE_STRIP: + *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_LINES; + *out_nr = (nr - 1) * 2; + break; + + case PIPE_PRIM_LINE_LOOP: + *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_LINES; + *out_nr = nr * 2; + break; + + case PIPE_PRIM_TRIANGLES: + *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_TRIANGLES; + *out_nr = nr; + break; + + case PIPE_PRIM_TRIANGLE_STRIP: + *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_TRIANGLES; + *out_nr = (nr - 2) * 3; + break; + + case PIPE_PRIM_TRIANGLE_FAN: + *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_TRIANGLES; + *out_nr = (nr - 2) * 3; + break; + + case PIPE_PRIM_QUADS: + *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_TRIANGLES; + *out_nr = (nr / 4) * 6; + break; + + case PIPE_PRIM_QUAD_STRIP: + *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_TRIANGLES; + *out_nr = (nr - 2) * 3; + break; + + case PIPE_PRIM_POLYGON: + *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_TRIANGLES; + *out_nr = (nr - 2) * 3; + break; + + default: + assert(0); + *out_translate = translate[in_idx][out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_POINTS; + *out_nr = nr; + return U_TRANSLATE_ERROR; + } + } + + return ret; +} + + + + + +int u_index_generator( unsigned hw_mask, + unsigned prim, + unsigned start, + unsigned nr, + unsigned in_pv, + unsigned out_pv, + unsigned *out_prim, + unsigned *out_index_size, + unsigned *out_nr, + u_generate_func *out_generate ) + +{ + unsigned out_idx; + + u_index_init(); + + *out_index_size = ((start + nr) > 0xfffe) ? 4 : 2; + out_idx = out_size_idx(*out_index_size); + + if ((hw_mask & (1<<prim)) && + (in_pv == out_pv)) { + + *out_generate = generate[out_idx][in_pv][out_pv][PIPE_PRIM_POINTS]; + *out_prim = prim; + *out_nr = nr; + return U_GENERATE_LINEAR; + } + else { + switch (prim) { + case PIPE_PRIM_POINTS: + *out_generate = generate[out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_POINTS; + *out_nr = nr; + return U_GENERATE_REUSABLE; + + case PIPE_PRIM_LINES: + *out_generate = generate[out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_LINES; + *out_nr = nr; + return U_GENERATE_REUSABLE; + + case PIPE_PRIM_LINE_STRIP: + *out_generate = generate[out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_LINES; + *out_nr = (nr - 1) * 2; + return U_GENERATE_REUSABLE; + + case PIPE_PRIM_LINE_LOOP: + *out_generate = generate[out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_LINES; + *out_nr = nr * 2; + return U_GENERATE_ONE_OFF; + + case PIPE_PRIM_TRIANGLES: + *out_generate = generate[out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_TRIANGLES; + *out_nr = nr; + return U_GENERATE_REUSABLE; + + case PIPE_PRIM_TRIANGLE_STRIP: + *out_generate = generate[out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_TRIANGLES; + *out_nr = (nr - 2) * 3; + return U_GENERATE_REUSABLE; + + case PIPE_PRIM_TRIANGLE_FAN: + *out_generate = generate[out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_TRIANGLES; + *out_nr = (nr - 2) * 3; + return U_GENERATE_REUSABLE; + + case PIPE_PRIM_QUADS: + *out_generate = generate[out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_TRIANGLES; + *out_nr = (nr / 4) * 6; + return U_GENERATE_REUSABLE; + + case PIPE_PRIM_QUAD_STRIP: + *out_generate = generate[out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_TRIANGLES; + *out_nr = (nr - 2) * 3; + return U_GENERATE_REUSABLE; + + case PIPE_PRIM_POLYGON: + *out_generate = generate[out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_TRIANGLES; + *out_nr = (nr - 2) * 3; + return U_GENERATE_REUSABLE; + + default: + assert(0); + *out_generate = generate[out_idx][in_pv][out_pv][prim]; + *out_prim = PIPE_PRIM_POINTS; + *out_nr = nr; + return U_TRANSLATE_ERROR; + } + } +} diff --git a/src/gallium/auxiliary/indices/u_indices.h b/src/gallium/auxiliary/indices/u_indices.h new file mode 100644 index 0000000000..abf5a3037d --- /dev/null +++ b/src/gallium/auxiliary/indices/u_indices.h @@ -0,0 +1,83 @@ +/* + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef U_INDICES_H +#define U_INDICES_H + +#include "pipe/p_compiler.h" + +#define PV_FIRST 0 +#define PV_LAST 1 +#define PV_COUNT 2 + +typedef void (*u_translate_func)( const void *in, + unsigned nr, + void *out ); + +typedef void (*u_generate_func)( unsigned nr, + void *out ); + + +/* Return codes describe the translate/generate operation. Caller may + * be able to reuse translated indices under some circumstances. + */ +#define U_TRANSLATE_ERROR -1 +#define U_TRANSLATE_NORMAL 1 +#define U_TRANSLATE_MEMCPY 2 +#define U_GENERATE_LINEAR 3 +#define U_GENERATE_REUSABLE 4 +#define U_GENERATE_ONE_OFF 5 + + +void u_index_init( void ); + +int u_index_translator( unsigned hw_mask, + unsigned prim, + unsigned in_index_size, + unsigned nr, + unsigned in_pv, /* API */ + unsigned out_pv, /* hardware */ + unsigned *out_prim, + unsigned *out_index_size, + unsigned *out_nr, + u_translate_func *out_translate ); + +/* Note that even when generating it is necessary to know what the + * API's PV is, as the indices generated will depend on whether it is + * the same as hardware or not, and in the case of triangle strips, + * whether it is first or last. + */ +int u_index_generator( unsigned hw_mask, + unsigned prim, + unsigned start, + unsigned nr, + unsigned in_pv, /* API */ + unsigned out_pv, /* hardware */ + unsigned *out_prim, + unsigned *out_index_size, + unsigned *out_nr, + u_generate_func *out_generate ); + + +#endif diff --git a/src/gallium/auxiliary/indices/u_indices_gen.c b/src/gallium/auxiliary/indices/u_indices_gen.c new file mode 100644 index 0000000000..3c981e5d7f --- /dev/null +++ b/src/gallium/auxiliary/indices/u_indices_gen.c @@ -0,0 +1,5129 @@ +/* File automatically generated by indices.py */ + +/* + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * VMWARE AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + + +/** + * @file + * Functions to translate and generate index lists + */ + +#include "indices/u_indices.h" +#include "indices/u_indices_priv.h" +#include "pipe/p_compiler.h" +#include "util/u_debug.h" +#include "pipe/p_defines.h" +#include "util/u_memory.h" + + +static unsigned out_size_idx( unsigned index_size ) +{ + switch (index_size) { + case 4: return OUT_UINT; + case 2: return OUT_USHORT; + default: assert(0); return OUT_USHORT; + } +} + +static unsigned in_size_idx( unsigned index_size ) +{ + switch (index_size) { + case 4: return IN_UINT; + case 2: return IN_USHORT; + case 1: return IN_UBYTE; + default: assert(0); return IN_UBYTE; + } +} + + +static u_translate_func translate[IN_COUNT][OUT_COUNT][PV_COUNT][PV_COUNT][PRIM_COUNT]; +static u_generate_func generate[OUT_COUNT][PV_COUNT][PV_COUNT][PRIM_COUNT]; + + + +static void generate_points_ushort_first2first( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (ushort)(i); + } +} +static void generate_lines_ushort_first2first( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (ushort)(i); + (out+i)[1] = (ushort)(i+1); + } +} +static void generate_linestrip_ushort_first2first( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (ushort)(i); + (out+j)[1] = (ushort)(i+1); + } +} +static void generate_lineloop_ushort_first2first( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (ushort)(i); + (out+j)[1] = (ushort)(i+1); + } + (out+j)[0] = (ushort)(i); + (out+j)[1] = (ushort)(0); +} +static void generate_tris_ushort_first2first( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (ushort)(i); + (out+i)[1] = (ushort)(i+1); + (out+i)[2] = (ushort)(i+2); + } +} +static void generate_tristrip_ushort_first2first( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)(i); + (out+j)[1] = (ushort)(i+1+(i&1)); + (out+j)[2] = (ushort)(i+2-(i&1)); + } +} +static void generate_trifan_ushort_first2first( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)(0); + (out+j)[1] = (ushort)(i+1); + (out+j)[2] = (ushort)(i+2); + } +} +static void generate_quads_ushort_first2first( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (ushort)(i+0); + (out+j+0)[1] = (ushort)(i+1); + (out+j+0)[2] = (ushort)(i+3); + (out+j+3)[0] = (ushort)(i+1); + (out+j+3)[1] = (ushort)(i+2); + (out+j+3)[2] = (ushort)(i+3); + } +} +static void generate_quadstrip_ushort_first2first( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (ushort)(i+2); + (out+j+0)[1] = (ushort)(i+0); + (out+j+0)[2] = (ushort)(i+3); + (out+j+3)[0] = (ushort)(i+0); + (out+j+3)[1] = (ushort)(i+1); + (out+j+3)[2] = (ushort)(i+3); + } +} +static void generate_polygon_ushort_first2first( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)(0); + (out+j)[1] = (ushort)(i+1); + (out+j)[2] = (ushort)(i+2); + } +} +static void generate_points_ushort_first2last( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (ushort)(i); + } +} +static void generate_lines_ushort_first2last( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (ushort)(i+1); + (out+i)[1] = (ushort)(i); + } +} +static void generate_linestrip_ushort_first2last( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (ushort)(i+1); + (out+j)[1] = (ushort)(i); + } +} +static void generate_lineloop_ushort_first2last( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (ushort)(i+1); + (out+j)[1] = (ushort)(i); + } + (out+j)[0] = (ushort)(0); + (out+j)[1] = (ushort)(i); +} +static void generate_tris_ushort_first2last( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (ushort)(i+1); + (out+i)[1] = (ushort)(i+2); + (out+i)[2] = (ushort)(i); + } +} +static void generate_tristrip_ushort_first2last( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)(i+1+(i&1)); + (out+j)[1] = (ushort)(i+2-(i&1)); + (out+j)[2] = (ushort)(i); + } +} +static void generate_trifan_ushort_first2last( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)(i+1); + (out+j)[1] = (ushort)(i+2); + (out+j)[2] = (ushort)(0); + } +} +static void generate_quads_ushort_first2last( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (ushort)(i+1); + (out+j+0)[1] = (ushort)(i+3); + (out+j+0)[2] = (ushort)(i+0); + (out+j+3)[0] = (ushort)(i+2); + (out+j+3)[1] = (ushort)(i+3); + (out+j+3)[2] = (ushort)(i+1); + } +} +static void generate_quadstrip_ushort_first2last( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (ushort)(i+0); + (out+j+0)[1] = (ushort)(i+3); + (out+j+0)[2] = (ushort)(i+2); + (out+j+3)[0] = (ushort)(i+1); + (out+j+3)[1] = (ushort)(i+3); + (out+j+3)[2] = (ushort)(i+0); + } +} +static void generate_polygon_ushort_first2last( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)(i+1); + (out+j)[1] = (ushort)(i+2); + (out+j)[2] = (ushort)(0); + } +} +static void generate_points_ushort_last2first( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (ushort)(i); + } +} +static void generate_lines_ushort_last2first( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (ushort)(i+1); + (out+i)[1] = (ushort)(i); + } +} +static void generate_linestrip_ushort_last2first( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (ushort)(i+1); + (out+j)[1] = (ushort)(i); + } +} +static void generate_lineloop_ushort_last2first( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (ushort)(i+1); + (out+j)[1] = (ushort)(i); + } + (out+j)[0] = (ushort)(0); + (out+j)[1] = (ushort)(i); +} +static void generate_tris_ushort_last2first( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (ushort)(i+2); + (out+i)[1] = (ushort)(i); + (out+i)[2] = (ushort)(i+1); + } +} +static void generate_tristrip_ushort_last2first( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)(i+2); + (out+j)[1] = (ushort)(i+(i&1)); + (out+j)[2] = (ushort)(i+1-(i&1)); + } +} +static void generate_trifan_ushort_last2first( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)(i+2); + (out+j)[1] = (ushort)(0); + (out+j)[2] = (ushort)(i+1); + } +} +static void generate_quads_ushort_last2first( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (ushort)(i+3); + (out+j+0)[1] = (ushort)(i+0); + (out+j+0)[2] = (ushort)(i+1); + (out+j+3)[0] = (ushort)(i+3); + (out+j+3)[1] = (ushort)(i+1); + (out+j+3)[2] = (ushort)(i+2); + } +} +static void generate_quadstrip_ushort_last2first( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (ushort)(i+3); + (out+j+0)[1] = (ushort)(i+2); + (out+j+0)[2] = (ushort)(i+0); + (out+j+3)[0] = (ushort)(i+3); + (out+j+3)[1] = (ushort)(i+0); + (out+j+3)[2] = (ushort)(i+1); + } +} +static void generate_polygon_ushort_last2first( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)(0); + (out+j)[1] = (ushort)(i+1); + (out+j)[2] = (ushort)(i+2); + } +} +static void generate_points_ushort_last2last( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (ushort)(i); + } +} +static void generate_lines_ushort_last2last( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (ushort)(i); + (out+i)[1] = (ushort)(i+1); + } +} +static void generate_linestrip_ushort_last2last( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (ushort)(i); + (out+j)[1] = (ushort)(i+1); + } +} +static void generate_lineloop_ushort_last2last( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (ushort)(i); + (out+j)[1] = (ushort)(i+1); + } + (out+j)[0] = (ushort)(i); + (out+j)[1] = (ushort)(0); +} +static void generate_tris_ushort_last2last( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (ushort)(i); + (out+i)[1] = (ushort)(i+1); + (out+i)[2] = (ushort)(i+2); + } +} +static void generate_tristrip_ushort_last2last( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)(i+(i&1)); + (out+j)[1] = (ushort)(i+1-(i&1)); + (out+j)[2] = (ushort)(i+2); + } +} +static void generate_trifan_ushort_last2last( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)(0); + (out+j)[1] = (ushort)(i+1); + (out+j)[2] = (ushort)(i+2); + } +} +static void generate_quads_ushort_last2last( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (ushort)(i+0); + (out+j+0)[1] = (ushort)(i+1); + (out+j+0)[2] = (ushort)(i+3); + (out+j+3)[0] = (ushort)(i+1); + (out+j+3)[1] = (ushort)(i+2); + (out+j+3)[2] = (ushort)(i+3); + } +} +static void generate_quadstrip_ushort_last2last( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (ushort)(i+2); + (out+j+0)[1] = (ushort)(i+0); + (out+j+0)[2] = (ushort)(i+3); + (out+j+3)[0] = (ushort)(i+0); + (out+j+3)[1] = (ushort)(i+1); + (out+j+3)[2] = (ushort)(i+3); + } +} +static void generate_polygon_ushort_last2last( + unsigned nr, + void *_out ) +{ + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)(i+1); + (out+j)[1] = (ushort)(i+2); + (out+j)[2] = (ushort)(0); + } +} +static void generate_points_uint_first2first( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (uint)(i); + } +} +static void generate_lines_uint_first2first( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (uint)(i); + (out+i)[1] = (uint)(i+1); + } +} +static void generate_linestrip_uint_first2first( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (uint)(i); + (out+j)[1] = (uint)(i+1); + } +} +static void generate_lineloop_uint_first2first( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (uint)(i); + (out+j)[1] = (uint)(i+1); + } + (out+j)[0] = (uint)(i); + (out+j)[1] = (uint)(0); +} +static void generate_tris_uint_first2first( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (uint)(i); + (out+i)[1] = (uint)(i+1); + (out+i)[2] = (uint)(i+2); + } +} +static void generate_tristrip_uint_first2first( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)(i); + (out+j)[1] = (uint)(i+1+(i&1)); + (out+j)[2] = (uint)(i+2-(i&1)); + } +} +static void generate_trifan_uint_first2first( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)(0); + (out+j)[1] = (uint)(i+1); + (out+j)[2] = (uint)(i+2); + } +} +static void generate_quads_uint_first2first( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (uint)(i+0); + (out+j+0)[1] = (uint)(i+1); + (out+j+0)[2] = (uint)(i+3); + (out+j+3)[0] = (uint)(i+1); + (out+j+3)[1] = (uint)(i+2); + (out+j+3)[2] = (uint)(i+3); + } +} +static void generate_quadstrip_uint_first2first( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (uint)(i+2); + (out+j+0)[1] = (uint)(i+0); + (out+j+0)[2] = (uint)(i+3); + (out+j+3)[0] = (uint)(i+0); + (out+j+3)[1] = (uint)(i+1); + (out+j+3)[2] = (uint)(i+3); + } +} +static void generate_polygon_uint_first2first( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)(0); + (out+j)[1] = (uint)(i+1); + (out+j)[2] = (uint)(i+2); + } +} +static void generate_points_uint_first2last( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (uint)(i); + } +} +static void generate_lines_uint_first2last( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (uint)(i+1); + (out+i)[1] = (uint)(i); + } +} +static void generate_linestrip_uint_first2last( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (uint)(i+1); + (out+j)[1] = (uint)(i); + } +} +static void generate_lineloop_uint_first2last( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (uint)(i+1); + (out+j)[1] = (uint)(i); + } + (out+j)[0] = (uint)(0); + (out+j)[1] = (uint)(i); +} +static void generate_tris_uint_first2last( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (uint)(i+1); + (out+i)[1] = (uint)(i+2); + (out+i)[2] = (uint)(i); + } +} +static void generate_tristrip_uint_first2last( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)(i+1+(i&1)); + (out+j)[1] = (uint)(i+2-(i&1)); + (out+j)[2] = (uint)(i); + } +} +static void generate_trifan_uint_first2last( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)(i+1); + (out+j)[1] = (uint)(i+2); + (out+j)[2] = (uint)(0); + } +} +static void generate_quads_uint_first2last( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (uint)(i+1); + (out+j+0)[1] = (uint)(i+3); + (out+j+0)[2] = (uint)(i+0); + (out+j+3)[0] = (uint)(i+2); + (out+j+3)[1] = (uint)(i+3); + (out+j+3)[2] = (uint)(i+1); + } +} +static void generate_quadstrip_uint_first2last( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (uint)(i+0); + (out+j+0)[1] = (uint)(i+3); + (out+j+0)[2] = (uint)(i+2); + (out+j+3)[0] = (uint)(i+1); + (out+j+3)[1] = (uint)(i+3); + (out+j+3)[2] = (uint)(i+0); + } +} +static void generate_polygon_uint_first2last( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)(i+1); + (out+j)[1] = (uint)(i+2); + (out+j)[2] = (uint)(0); + } +} +static void generate_points_uint_last2first( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (uint)(i); + } +} +static void generate_lines_uint_last2first( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (uint)(i+1); + (out+i)[1] = (uint)(i); + } +} +static void generate_linestrip_uint_last2first( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (uint)(i+1); + (out+j)[1] = (uint)(i); + } +} +static void generate_lineloop_uint_last2first( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (uint)(i+1); + (out+j)[1] = (uint)(i); + } + (out+j)[0] = (uint)(0); + (out+j)[1] = (uint)(i); +} +static void generate_tris_uint_last2first( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (uint)(i+2); + (out+i)[1] = (uint)(i); + (out+i)[2] = (uint)(i+1); + } +} +static void generate_tristrip_uint_last2first( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)(i+2); + (out+j)[1] = (uint)(i+(i&1)); + (out+j)[2] = (uint)(i+1-(i&1)); + } +} +static void generate_trifan_uint_last2first( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)(i+2); + (out+j)[1] = (uint)(0); + (out+j)[2] = (uint)(i+1); + } +} +static void generate_quads_uint_last2first( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (uint)(i+3); + (out+j+0)[1] = (uint)(i+0); + (out+j+0)[2] = (uint)(i+1); + (out+j+3)[0] = (uint)(i+3); + (out+j+3)[1] = (uint)(i+1); + (out+j+3)[2] = (uint)(i+2); + } +} +static void generate_quadstrip_uint_last2first( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (uint)(i+3); + (out+j+0)[1] = (uint)(i+2); + (out+j+0)[2] = (uint)(i+0); + (out+j+3)[0] = (uint)(i+3); + (out+j+3)[1] = (uint)(i+0); + (out+j+3)[2] = (uint)(i+1); + } +} +static void generate_polygon_uint_last2first( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)(0); + (out+j)[1] = (uint)(i+1); + (out+j)[2] = (uint)(i+2); + } +} +static void generate_points_uint_last2last( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (uint)(i); + } +} +static void generate_lines_uint_last2last( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (uint)(i); + (out+i)[1] = (uint)(i+1); + } +} +static void generate_linestrip_uint_last2last( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (uint)(i); + (out+j)[1] = (uint)(i+1); + } +} +static void generate_lineloop_uint_last2last( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (uint)(i); + (out+j)[1] = (uint)(i+1); + } + (out+j)[0] = (uint)(i); + (out+j)[1] = (uint)(0); +} +static void generate_tris_uint_last2last( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (uint)(i); + (out+i)[1] = (uint)(i+1); + (out+i)[2] = (uint)(i+2); + } +} +static void generate_tristrip_uint_last2last( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)(i+(i&1)); + (out+j)[1] = (uint)(i+1-(i&1)); + (out+j)[2] = (uint)(i+2); + } +} +static void generate_trifan_uint_last2last( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)(0); + (out+j)[1] = (uint)(i+1); + (out+j)[2] = (uint)(i+2); + } +} +static void generate_quads_uint_last2last( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (uint)(i+0); + (out+j+0)[1] = (uint)(i+1); + (out+j+0)[2] = (uint)(i+3); + (out+j+3)[0] = (uint)(i+1); + (out+j+3)[1] = (uint)(i+2); + (out+j+3)[2] = (uint)(i+3); + } +} +static void generate_quadstrip_uint_last2last( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (uint)(i+2); + (out+j+0)[1] = (uint)(i+0); + (out+j+0)[2] = (uint)(i+3); + (out+j+3)[0] = (uint)(i+0); + (out+j+3)[1] = (uint)(i+1); + (out+j+3)[2] = (uint)(i+3); + } +} +static void generate_polygon_uint_last2last( + unsigned nr, + void *_out ) +{ + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)(i+1); + (out+j)[1] = (uint)(i+2); + (out+j)[2] = (uint)(0); + } +} +static void translate_points_ubyte2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (ushort)in[i]; + } +} +static void translate_lines_ubyte2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (ushort)in[i]; + (out+i)[1] = (ushort)in[i+1]; + } +} +static void translate_linestrip_ubyte2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[i+1]; + } +} +static void translate_lineloop_ubyte2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[i+1]; + } + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[0]; +} +static void translate_tris_ubyte2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (ushort)in[i]; + (out+i)[1] = (ushort)in[i+1]; + (out+i)[2] = (ushort)in[i+2]; + } +} +static void translate_tristrip_ubyte2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[i+1+(i&1)]; + (out+j)[2] = (ushort)in[i+2-(i&1)]; + } +} +static void translate_trifan_ubyte2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[0]; + (out+j)[1] = (ushort)in[i+1]; + (out+j)[2] = (ushort)in[i+2]; + } +} +static void translate_quads_ubyte2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (ushort)in[i+0]; + (out+j+0)[1] = (ushort)in[i+1]; + (out+j+0)[2] = (ushort)in[i+3]; + (out+j+3)[0] = (ushort)in[i+1]; + (out+j+3)[1] = (ushort)in[i+2]; + (out+j+3)[2] = (ushort)in[i+3]; + } +} +static void translate_quadstrip_ubyte2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (ushort)in[i+2]; + (out+j+0)[1] = (ushort)in[i+0]; + (out+j+0)[2] = (ushort)in[i+3]; + (out+j+3)[0] = (ushort)in[i+0]; + (out+j+3)[1] = (ushort)in[i+1]; + (out+j+3)[2] = (ushort)in[i+3]; + } +} +static void translate_polygon_ubyte2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[0]; + (out+j)[1] = (ushort)in[i+1]; + (out+j)[2] = (ushort)in[i+2]; + } +} +static void translate_points_ubyte2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (ushort)in[i]; + } +} +static void translate_lines_ubyte2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (ushort)in[i+1]; + (out+i)[1] = (ushort)in[i]; + } +} +static void translate_linestrip_ubyte2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i]; + } +} +static void translate_lineloop_ubyte2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i]; + } + (out+j)[0] = (ushort)in[0]; + (out+j)[1] = (ushort)in[i]; +} +static void translate_tris_ubyte2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (ushort)in[i+1]; + (out+i)[1] = (ushort)in[i+2]; + (out+i)[2] = (ushort)in[i]; + } +} +static void translate_tristrip_ubyte2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+1+(i&1)]; + (out+j)[1] = (ushort)in[i+2-(i&1)]; + (out+j)[2] = (ushort)in[i]; + } +} +static void translate_trifan_ubyte2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i+2]; + (out+j)[2] = (ushort)in[0]; + } +} +static void translate_quads_ubyte2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (ushort)in[i+1]; + (out+j+0)[1] = (ushort)in[i+3]; + (out+j+0)[2] = (ushort)in[i+0]; + (out+j+3)[0] = (ushort)in[i+2]; + (out+j+3)[1] = (ushort)in[i+3]; + (out+j+3)[2] = (ushort)in[i+1]; + } +} +static void translate_quadstrip_ubyte2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (ushort)in[i+0]; + (out+j+0)[1] = (ushort)in[i+3]; + (out+j+0)[2] = (ushort)in[i+2]; + (out+j+3)[0] = (ushort)in[i+1]; + (out+j+3)[1] = (ushort)in[i+3]; + (out+j+3)[2] = (ushort)in[i+0]; + } +} +static void translate_polygon_ubyte2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i+2]; + (out+j)[2] = (ushort)in[0]; + } +} +static void translate_points_ubyte2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (ushort)in[i]; + } +} +static void translate_lines_ubyte2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (ushort)in[i+1]; + (out+i)[1] = (ushort)in[i]; + } +} +static void translate_linestrip_ubyte2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i]; + } +} +static void translate_lineloop_ubyte2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i]; + } + (out+j)[0] = (ushort)in[0]; + (out+j)[1] = (ushort)in[i]; +} +static void translate_tris_ubyte2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (ushort)in[i+2]; + (out+i)[1] = (ushort)in[i]; + (out+i)[2] = (ushort)in[i+1]; + } +} +static void translate_tristrip_ubyte2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+2]; + (out+j)[1] = (ushort)in[i+(i&1)]; + (out+j)[2] = (ushort)in[i+1-(i&1)]; + } +} +static void translate_trifan_ubyte2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+2]; + (out+j)[1] = (ushort)in[0]; + (out+j)[2] = (ushort)in[i+1]; + } +} +static void translate_quads_ubyte2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (ushort)in[i+3]; + (out+j+0)[1] = (ushort)in[i+0]; + (out+j+0)[2] = (ushort)in[i+1]; + (out+j+3)[0] = (ushort)in[i+3]; + (out+j+3)[1] = (ushort)in[i+1]; + (out+j+3)[2] = (ushort)in[i+2]; + } +} +static void translate_quadstrip_ubyte2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (ushort)in[i+3]; + (out+j+0)[1] = (ushort)in[i+2]; + (out+j+0)[2] = (ushort)in[i+0]; + (out+j+3)[0] = (ushort)in[i+3]; + (out+j+3)[1] = (ushort)in[i+0]; + (out+j+3)[2] = (ushort)in[i+1]; + } +} +static void translate_polygon_ubyte2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[0]; + (out+j)[1] = (ushort)in[i+1]; + (out+j)[2] = (ushort)in[i+2]; + } +} +static void translate_points_ubyte2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (ushort)in[i]; + } +} +static void translate_lines_ubyte2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (ushort)in[i]; + (out+i)[1] = (ushort)in[i+1]; + } +} +static void translate_linestrip_ubyte2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[i+1]; + } +} +static void translate_lineloop_ubyte2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[i+1]; + } + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[0]; +} +static void translate_tris_ubyte2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (ushort)in[i]; + (out+i)[1] = (ushort)in[i+1]; + (out+i)[2] = (ushort)in[i+2]; + } +} +static void translate_tristrip_ubyte2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+(i&1)]; + (out+j)[1] = (ushort)in[i+1-(i&1)]; + (out+j)[2] = (ushort)in[i+2]; + } +} +static void translate_trifan_ubyte2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[0]; + (out+j)[1] = (ushort)in[i+1]; + (out+j)[2] = (ushort)in[i+2]; + } +} +static void translate_quads_ubyte2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (ushort)in[i+0]; + (out+j+0)[1] = (ushort)in[i+1]; + (out+j+0)[2] = (ushort)in[i+3]; + (out+j+3)[0] = (ushort)in[i+1]; + (out+j+3)[1] = (ushort)in[i+2]; + (out+j+3)[2] = (ushort)in[i+3]; + } +} +static void translate_quadstrip_ubyte2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (ushort)in[i+2]; + (out+j+0)[1] = (ushort)in[i+0]; + (out+j+0)[2] = (ushort)in[i+3]; + (out+j+3)[0] = (ushort)in[i+0]; + (out+j+3)[1] = (ushort)in[i+1]; + (out+j+3)[2] = (ushort)in[i+3]; + } +} +static void translate_polygon_ubyte2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i+2]; + (out+j)[2] = (ushort)in[0]; + } +} +static void translate_points_ubyte2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (uint)in[i]; + } +} +static void translate_lines_ubyte2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (uint)in[i]; + (out+i)[1] = (uint)in[i+1]; + } +} +static void translate_linestrip_ubyte2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[i+1]; + } +} +static void translate_lineloop_ubyte2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[i+1]; + } + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[0]; +} +static void translate_tris_ubyte2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (uint)in[i]; + (out+i)[1] = (uint)in[i+1]; + (out+i)[2] = (uint)in[i+2]; + } +} +static void translate_tristrip_ubyte2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[i+1+(i&1)]; + (out+j)[2] = (uint)in[i+2-(i&1)]; + } +} +static void translate_trifan_ubyte2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[0]; + (out+j)[1] = (uint)in[i+1]; + (out+j)[2] = (uint)in[i+2]; + } +} +static void translate_quads_ubyte2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (uint)in[i+0]; + (out+j+0)[1] = (uint)in[i+1]; + (out+j+0)[2] = (uint)in[i+3]; + (out+j+3)[0] = (uint)in[i+1]; + (out+j+3)[1] = (uint)in[i+2]; + (out+j+3)[2] = (uint)in[i+3]; + } +} +static void translate_quadstrip_ubyte2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (uint)in[i+2]; + (out+j+0)[1] = (uint)in[i+0]; + (out+j+0)[2] = (uint)in[i+3]; + (out+j+3)[0] = (uint)in[i+0]; + (out+j+3)[1] = (uint)in[i+1]; + (out+j+3)[2] = (uint)in[i+3]; + } +} +static void translate_polygon_ubyte2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[0]; + (out+j)[1] = (uint)in[i+1]; + (out+j)[2] = (uint)in[i+2]; + } +} +static void translate_points_ubyte2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (uint)in[i]; + } +} +static void translate_lines_ubyte2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (uint)in[i+1]; + (out+i)[1] = (uint)in[i]; + } +} +static void translate_linestrip_ubyte2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i]; + } +} +static void translate_lineloop_ubyte2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i]; + } + (out+j)[0] = (uint)in[0]; + (out+j)[1] = (uint)in[i]; +} +static void translate_tris_ubyte2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (uint)in[i+1]; + (out+i)[1] = (uint)in[i+2]; + (out+i)[2] = (uint)in[i]; + } +} +static void translate_tristrip_ubyte2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+1+(i&1)]; + (out+j)[1] = (uint)in[i+2-(i&1)]; + (out+j)[2] = (uint)in[i]; + } +} +static void translate_trifan_ubyte2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i+2]; + (out+j)[2] = (uint)in[0]; + } +} +static void translate_quads_ubyte2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (uint)in[i+1]; + (out+j+0)[1] = (uint)in[i+3]; + (out+j+0)[2] = (uint)in[i+0]; + (out+j+3)[0] = (uint)in[i+2]; + (out+j+3)[1] = (uint)in[i+3]; + (out+j+3)[2] = (uint)in[i+1]; + } +} +static void translate_quadstrip_ubyte2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (uint)in[i+0]; + (out+j+0)[1] = (uint)in[i+3]; + (out+j+0)[2] = (uint)in[i+2]; + (out+j+3)[0] = (uint)in[i+1]; + (out+j+3)[1] = (uint)in[i+3]; + (out+j+3)[2] = (uint)in[i+0]; + } +} +static void translate_polygon_ubyte2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i+2]; + (out+j)[2] = (uint)in[0]; + } +} +static void translate_points_ubyte2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (uint)in[i]; + } +} +static void translate_lines_ubyte2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (uint)in[i+1]; + (out+i)[1] = (uint)in[i]; + } +} +static void translate_linestrip_ubyte2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i]; + } +} +static void translate_lineloop_ubyte2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i]; + } + (out+j)[0] = (uint)in[0]; + (out+j)[1] = (uint)in[i]; +} +static void translate_tris_ubyte2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (uint)in[i+2]; + (out+i)[1] = (uint)in[i]; + (out+i)[2] = (uint)in[i+1]; + } +} +static void translate_tristrip_ubyte2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+2]; + (out+j)[1] = (uint)in[i+(i&1)]; + (out+j)[2] = (uint)in[i+1-(i&1)]; + } +} +static void translate_trifan_ubyte2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+2]; + (out+j)[1] = (uint)in[0]; + (out+j)[2] = (uint)in[i+1]; + } +} +static void translate_quads_ubyte2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (uint)in[i+3]; + (out+j+0)[1] = (uint)in[i+0]; + (out+j+0)[2] = (uint)in[i+1]; + (out+j+3)[0] = (uint)in[i+3]; + (out+j+3)[1] = (uint)in[i+1]; + (out+j+3)[2] = (uint)in[i+2]; + } +} +static void translate_quadstrip_ubyte2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (uint)in[i+3]; + (out+j+0)[1] = (uint)in[i+2]; + (out+j+0)[2] = (uint)in[i+0]; + (out+j+3)[0] = (uint)in[i+3]; + (out+j+3)[1] = (uint)in[i+0]; + (out+j+3)[2] = (uint)in[i+1]; + } +} +static void translate_polygon_ubyte2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[0]; + (out+j)[1] = (uint)in[i+1]; + (out+j)[2] = (uint)in[i+2]; + } +} +static void translate_points_ubyte2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (uint)in[i]; + } +} +static void translate_lines_ubyte2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (uint)in[i]; + (out+i)[1] = (uint)in[i+1]; + } +} +static void translate_linestrip_ubyte2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[i+1]; + } +} +static void translate_lineloop_ubyte2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[i+1]; + } + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[0]; +} +static void translate_tris_ubyte2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (uint)in[i]; + (out+i)[1] = (uint)in[i+1]; + (out+i)[2] = (uint)in[i+2]; + } +} +static void translate_tristrip_ubyte2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+(i&1)]; + (out+j)[1] = (uint)in[i+1-(i&1)]; + (out+j)[2] = (uint)in[i+2]; + } +} +static void translate_trifan_ubyte2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[0]; + (out+j)[1] = (uint)in[i+1]; + (out+j)[2] = (uint)in[i+2]; + } +} +static void translate_quads_ubyte2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (uint)in[i+0]; + (out+j+0)[1] = (uint)in[i+1]; + (out+j+0)[2] = (uint)in[i+3]; + (out+j+3)[0] = (uint)in[i+1]; + (out+j+3)[1] = (uint)in[i+2]; + (out+j+3)[2] = (uint)in[i+3]; + } +} +static void translate_quadstrip_ubyte2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (uint)in[i+2]; + (out+j+0)[1] = (uint)in[i+0]; + (out+j+0)[2] = (uint)in[i+3]; + (out+j+3)[0] = (uint)in[i+0]; + (out+j+3)[1] = (uint)in[i+1]; + (out+j+3)[2] = (uint)in[i+3]; + } +} +static void translate_polygon_ubyte2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ubyte*in = (const ubyte*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i+2]; + (out+j)[2] = (uint)in[0]; + } +} +static void translate_points_ushort2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (ushort)in[i]; + } +} +static void translate_lines_ushort2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (ushort)in[i]; + (out+i)[1] = (ushort)in[i+1]; + } +} +static void translate_linestrip_ushort2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[i+1]; + } +} +static void translate_lineloop_ushort2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[i+1]; + } + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[0]; +} +static void translate_tris_ushort2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (ushort)in[i]; + (out+i)[1] = (ushort)in[i+1]; + (out+i)[2] = (ushort)in[i+2]; + } +} +static void translate_tristrip_ushort2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[i+1+(i&1)]; + (out+j)[2] = (ushort)in[i+2-(i&1)]; + } +} +static void translate_trifan_ushort2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[0]; + (out+j)[1] = (ushort)in[i+1]; + (out+j)[2] = (ushort)in[i+2]; + } +} +static void translate_quads_ushort2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (ushort)in[i+0]; + (out+j+0)[1] = (ushort)in[i+1]; + (out+j+0)[2] = (ushort)in[i+3]; + (out+j+3)[0] = (ushort)in[i+1]; + (out+j+3)[1] = (ushort)in[i+2]; + (out+j+3)[2] = (ushort)in[i+3]; + } +} +static void translate_quadstrip_ushort2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (ushort)in[i+2]; + (out+j+0)[1] = (ushort)in[i+0]; + (out+j+0)[2] = (ushort)in[i+3]; + (out+j+3)[0] = (ushort)in[i+0]; + (out+j+3)[1] = (ushort)in[i+1]; + (out+j+3)[2] = (ushort)in[i+3]; + } +} +static void translate_polygon_ushort2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[0]; + (out+j)[1] = (ushort)in[i+1]; + (out+j)[2] = (ushort)in[i+2]; + } +} +static void translate_points_ushort2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (ushort)in[i]; + } +} +static void translate_lines_ushort2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (ushort)in[i+1]; + (out+i)[1] = (ushort)in[i]; + } +} +static void translate_linestrip_ushort2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i]; + } +} +static void translate_lineloop_ushort2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i]; + } + (out+j)[0] = (ushort)in[0]; + (out+j)[1] = (ushort)in[i]; +} +static void translate_tris_ushort2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (ushort)in[i+1]; + (out+i)[1] = (ushort)in[i+2]; + (out+i)[2] = (ushort)in[i]; + } +} +static void translate_tristrip_ushort2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+1+(i&1)]; + (out+j)[1] = (ushort)in[i+2-(i&1)]; + (out+j)[2] = (ushort)in[i]; + } +} +static void translate_trifan_ushort2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i+2]; + (out+j)[2] = (ushort)in[0]; + } +} +static void translate_quads_ushort2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (ushort)in[i+1]; + (out+j+0)[1] = (ushort)in[i+3]; + (out+j+0)[2] = (ushort)in[i+0]; + (out+j+3)[0] = (ushort)in[i+2]; + (out+j+3)[1] = (ushort)in[i+3]; + (out+j+3)[2] = (ushort)in[i+1]; + } +} +static void translate_quadstrip_ushort2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (ushort)in[i+0]; + (out+j+0)[1] = (ushort)in[i+3]; + (out+j+0)[2] = (ushort)in[i+2]; + (out+j+3)[0] = (ushort)in[i+1]; + (out+j+3)[1] = (ushort)in[i+3]; + (out+j+3)[2] = (ushort)in[i+0]; + } +} +static void translate_polygon_ushort2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i+2]; + (out+j)[2] = (ushort)in[0]; + } +} +static void translate_points_ushort2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (ushort)in[i]; + } +} +static void translate_lines_ushort2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (ushort)in[i+1]; + (out+i)[1] = (ushort)in[i]; + } +} +static void translate_linestrip_ushort2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i]; + } +} +static void translate_lineloop_ushort2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i]; + } + (out+j)[0] = (ushort)in[0]; + (out+j)[1] = (ushort)in[i]; +} +static void translate_tris_ushort2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (ushort)in[i+2]; + (out+i)[1] = (ushort)in[i]; + (out+i)[2] = (ushort)in[i+1]; + } +} +static void translate_tristrip_ushort2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+2]; + (out+j)[1] = (ushort)in[i+(i&1)]; + (out+j)[2] = (ushort)in[i+1-(i&1)]; + } +} +static void translate_trifan_ushort2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+2]; + (out+j)[1] = (ushort)in[0]; + (out+j)[2] = (ushort)in[i+1]; + } +} +static void translate_quads_ushort2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (ushort)in[i+3]; + (out+j+0)[1] = (ushort)in[i+0]; + (out+j+0)[2] = (ushort)in[i+1]; + (out+j+3)[0] = (ushort)in[i+3]; + (out+j+3)[1] = (ushort)in[i+1]; + (out+j+3)[2] = (ushort)in[i+2]; + } +} +static void translate_quadstrip_ushort2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (ushort)in[i+3]; + (out+j+0)[1] = (ushort)in[i+2]; + (out+j+0)[2] = (ushort)in[i+0]; + (out+j+3)[0] = (ushort)in[i+3]; + (out+j+3)[1] = (ushort)in[i+0]; + (out+j+3)[2] = (ushort)in[i+1]; + } +} +static void translate_polygon_ushort2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[0]; + (out+j)[1] = (ushort)in[i+1]; + (out+j)[2] = (ushort)in[i+2]; + } +} +static void translate_points_ushort2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (ushort)in[i]; + } +} +static void translate_lines_ushort2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (ushort)in[i]; + (out+i)[1] = (ushort)in[i+1]; + } +} +static void translate_linestrip_ushort2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[i+1]; + } +} +static void translate_lineloop_ushort2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[i+1]; + } + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[0]; +} +static void translate_tris_ushort2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (ushort)in[i]; + (out+i)[1] = (ushort)in[i+1]; + (out+i)[2] = (ushort)in[i+2]; + } +} +static void translate_tristrip_ushort2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+(i&1)]; + (out+j)[1] = (ushort)in[i+1-(i&1)]; + (out+j)[2] = (ushort)in[i+2]; + } +} +static void translate_trifan_ushort2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[0]; + (out+j)[1] = (ushort)in[i+1]; + (out+j)[2] = (ushort)in[i+2]; + } +} +static void translate_quads_ushort2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (ushort)in[i+0]; + (out+j+0)[1] = (ushort)in[i+1]; + (out+j+0)[2] = (ushort)in[i+3]; + (out+j+3)[0] = (ushort)in[i+1]; + (out+j+3)[1] = (ushort)in[i+2]; + (out+j+3)[2] = (ushort)in[i+3]; + } +} +static void translate_quadstrip_ushort2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (ushort)in[i+2]; + (out+j+0)[1] = (ushort)in[i+0]; + (out+j+0)[2] = (ushort)in[i+3]; + (out+j+3)[0] = (ushort)in[i+0]; + (out+j+3)[1] = (ushort)in[i+1]; + (out+j+3)[2] = (ushort)in[i+3]; + } +} +static void translate_polygon_ushort2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i+2]; + (out+j)[2] = (ushort)in[0]; + } +} +static void translate_points_ushort2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (uint)in[i]; + } +} +static void translate_lines_ushort2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (uint)in[i]; + (out+i)[1] = (uint)in[i+1]; + } +} +static void translate_linestrip_ushort2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[i+1]; + } +} +static void translate_lineloop_ushort2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[i+1]; + } + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[0]; +} +static void translate_tris_ushort2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (uint)in[i]; + (out+i)[1] = (uint)in[i+1]; + (out+i)[2] = (uint)in[i+2]; + } +} +static void translate_tristrip_ushort2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[i+1+(i&1)]; + (out+j)[2] = (uint)in[i+2-(i&1)]; + } +} +static void translate_trifan_ushort2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[0]; + (out+j)[1] = (uint)in[i+1]; + (out+j)[2] = (uint)in[i+2]; + } +} +static void translate_quads_ushort2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (uint)in[i+0]; + (out+j+0)[1] = (uint)in[i+1]; + (out+j+0)[2] = (uint)in[i+3]; + (out+j+3)[0] = (uint)in[i+1]; + (out+j+3)[1] = (uint)in[i+2]; + (out+j+3)[2] = (uint)in[i+3]; + } +} +static void translate_quadstrip_ushort2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (uint)in[i+2]; + (out+j+0)[1] = (uint)in[i+0]; + (out+j+0)[2] = (uint)in[i+3]; + (out+j+3)[0] = (uint)in[i+0]; + (out+j+3)[1] = (uint)in[i+1]; + (out+j+3)[2] = (uint)in[i+3]; + } +} +static void translate_polygon_ushort2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[0]; + (out+j)[1] = (uint)in[i+1]; + (out+j)[2] = (uint)in[i+2]; + } +} +static void translate_points_ushort2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (uint)in[i]; + } +} +static void translate_lines_ushort2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (uint)in[i+1]; + (out+i)[1] = (uint)in[i]; + } +} +static void translate_linestrip_ushort2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i]; + } +} +static void translate_lineloop_ushort2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i]; + } + (out+j)[0] = (uint)in[0]; + (out+j)[1] = (uint)in[i]; +} +static void translate_tris_ushort2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (uint)in[i+1]; + (out+i)[1] = (uint)in[i+2]; + (out+i)[2] = (uint)in[i]; + } +} +static void translate_tristrip_ushort2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+1+(i&1)]; + (out+j)[1] = (uint)in[i+2-(i&1)]; + (out+j)[2] = (uint)in[i]; + } +} +static void translate_trifan_ushort2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i+2]; + (out+j)[2] = (uint)in[0]; + } +} +static void translate_quads_ushort2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (uint)in[i+1]; + (out+j+0)[1] = (uint)in[i+3]; + (out+j+0)[2] = (uint)in[i+0]; + (out+j+3)[0] = (uint)in[i+2]; + (out+j+3)[1] = (uint)in[i+3]; + (out+j+3)[2] = (uint)in[i+1]; + } +} +static void translate_quadstrip_ushort2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (uint)in[i+0]; + (out+j+0)[1] = (uint)in[i+3]; + (out+j+0)[2] = (uint)in[i+2]; + (out+j+3)[0] = (uint)in[i+1]; + (out+j+3)[1] = (uint)in[i+3]; + (out+j+3)[2] = (uint)in[i+0]; + } +} +static void translate_polygon_ushort2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i+2]; + (out+j)[2] = (uint)in[0]; + } +} +static void translate_points_ushort2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (uint)in[i]; + } +} +static void translate_lines_ushort2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (uint)in[i+1]; + (out+i)[1] = (uint)in[i]; + } +} +static void translate_linestrip_ushort2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i]; + } +} +static void translate_lineloop_ushort2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i]; + } + (out+j)[0] = (uint)in[0]; + (out+j)[1] = (uint)in[i]; +} +static void translate_tris_ushort2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (uint)in[i+2]; + (out+i)[1] = (uint)in[i]; + (out+i)[2] = (uint)in[i+1]; + } +} +static void translate_tristrip_ushort2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+2]; + (out+j)[1] = (uint)in[i+(i&1)]; + (out+j)[2] = (uint)in[i+1-(i&1)]; + } +} +static void translate_trifan_ushort2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+2]; + (out+j)[1] = (uint)in[0]; + (out+j)[2] = (uint)in[i+1]; + } +} +static void translate_quads_ushort2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (uint)in[i+3]; + (out+j+0)[1] = (uint)in[i+0]; + (out+j+0)[2] = (uint)in[i+1]; + (out+j+3)[0] = (uint)in[i+3]; + (out+j+3)[1] = (uint)in[i+1]; + (out+j+3)[2] = (uint)in[i+2]; + } +} +static void translate_quadstrip_ushort2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (uint)in[i+3]; + (out+j+0)[1] = (uint)in[i+2]; + (out+j+0)[2] = (uint)in[i+0]; + (out+j+3)[0] = (uint)in[i+3]; + (out+j+3)[1] = (uint)in[i+0]; + (out+j+3)[2] = (uint)in[i+1]; + } +} +static void translate_polygon_ushort2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[0]; + (out+j)[1] = (uint)in[i+1]; + (out+j)[2] = (uint)in[i+2]; + } +} +static void translate_points_ushort2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (uint)in[i]; + } +} +static void translate_lines_ushort2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (uint)in[i]; + (out+i)[1] = (uint)in[i+1]; + } +} +static void translate_linestrip_ushort2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[i+1]; + } +} +static void translate_lineloop_ushort2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[i+1]; + } + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[0]; +} +static void translate_tris_ushort2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (uint)in[i]; + (out+i)[1] = (uint)in[i+1]; + (out+i)[2] = (uint)in[i+2]; + } +} +static void translate_tristrip_ushort2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+(i&1)]; + (out+j)[1] = (uint)in[i+1-(i&1)]; + (out+j)[2] = (uint)in[i+2]; + } +} +static void translate_trifan_ushort2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[0]; + (out+j)[1] = (uint)in[i+1]; + (out+j)[2] = (uint)in[i+2]; + } +} +static void translate_quads_ushort2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (uint)in[i+0]; + (out+j+0)[1] = (uint)in[i+1]; + (out+j+0)[2] = (uint)in[i+3]; + (out+j+3)[0] = (uint)in[i+1]; + (out+j+3)[1] = (uint)in[i+2]; + (out+j+3)[2] = (uint)in[i+3]; + } +} +static void translate_quadstrip_ushort2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (uint)in[i+2]; + (out+j+0)[1] = (uint)in[i+0]; + (out+j+0)[2] = (uint)in[i+3]; + (out+j+3)[0] = (uint)in[i+0]; + (out+j+3)[1] = (uint)in[i+1]; + (out+j+3)[2] = (uint)in[i+3]; + } +} +static void translate_polygon_ushort2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const ushort*in = (const ushort*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i+2]; + (out+j)[2] = (uint)in[0]; + } +} +static void translate_points_uint2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (ushort)in[i]; + } +} +static void translate_lines_uint2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (ushort)in[i]; + (out+i)[1] = (ushort)in[i+1]; + } +} +static void translate_linestrip_uint2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[i+1]; + } +} +static void translate_lineloop_uint2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[i+1]; + } + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[0]; +} +static void translate_tris_uint2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (ushort)in[i]; + (out+i)[1] = (ushort)in[i+1]; + (out+i)[2] = (ushort)in[i+2]; + } +} +static void translate_tristrip_uint2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[i+1+(i&1)]; + (out+j)[2] = (ushort)in[i+2-(i&1)]; + } +} +static void translate_trifan_uint2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[0]; + (out+j)[1] = (ushort)in[i+1]; + (out+j)[2] = (ushort)in[i+2]; + } +} +static void translate_quads_uint2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (ushort)in[i+0]; + (out+j+0)[1] = (ushort)in[i+1]; + (out+j+0)[2] = (ushort)in[i+3]; + (out+j+3)[0] = (ushort)in[i+1]; + (out+j+3)[1] = (ushort)in[i+2]; + (out+j+3)[2] = (ushort)in[i+3]; + } +} +static void translate_quadstrip_uint2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (ushort)in[i+2]; + (out+j+0)[1] = (ushort)in[i+0]; + (out+j+0)[2] = (ushort)in[i+3]; + (out+j+3)[0] = (ushort)in[i+0]; + (out+j+3)[1] = (ushort)in[i+1]; + (out+j+3)[2] = (ushort)in[i+3]; + } +} +static void translate_polygon_uint2ushort_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[0]; + (out+j)[1] = (ushort)in[i+1]; + (out+j)[2] = (ushort)in[i+2]; + } +} +static void translate_points_uint2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (ushort)in[i]; + } +} +static void translate_lines_uint2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (ushort)in[i+1]; + (out+i)[1] = (ushort)in[i]; + } +} +static void translate_linestrip_uint2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i]; + } +} +static void translate_lineloop_uint2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i]; + } + (out+j)[0] = (ushort)in[0]; + (out+j)[1] = (ushort)in[i]; +} +static void translate_tris_uint2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (ushort)in[i+1]; + (out+i)[1] = (ushort)in[i+2]; + (out+i)[2] = (ushort)in[i]; + } +} +static void translate_tristrip_uint2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+1+(i&1)]; + (out+j)[1] = (ushort)in[i+2-(i&1)]; + (out+j)[2] = (ushort)in[i]; + } +} +static void translate_trifan_uint2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i+2]; + (out+j)[2] = (ushort)in[0]; + } +} +static void translate_quads_uint2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (ushort)in[i+1]; + (out+j+0)[1] = (ushort)in[i+3]; + (out+j+0)[2] = (ushort)in[i+0]; + (out+j+3)[0] = (ushort)in[i+2]; + (out+j+3)[1] = (ushort)in[i+3]; + (out+j+3)[2] = (ushort)in[i+1]; + } +} +static void translate_quadstrip_uint2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (ushort)in[i+0]; + (out+j+0)[1] = (ushort)in[i+3]; + (out+j+0)[2] = (ushort)in[i+2]; + (out+j+3)[0] = (ushort)in[i+1]; + (out+j+3)[1] = (ushort)in[i+3]; + (out+j+3)[2] = (ushort)in[i+0]; + } +} +static void translate_polygon_uint2ushort_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i+2]; + (out+j)[2] = (ushort)in[0]; + } +} +static void translate_points_uint2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (ushort)in[i]; + } +} +static void translate_lines_uint2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (ushort)in[i+1]; + (out+i)[1] = (ushort)in[i]; + } +} +static void translate_linestrip_uint2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i]; + } +} +static void translate_lineloop_uint2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i]; + } + (out+j)[0] = (ushort)in[0]; + (out+j)[1] = (ushort)in[i]; +} +static void translate_tris_uint2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (ushort)in[i+2]; + (out+i)[1] = (ushort)in[i]; + (out+i)[2] = (ushort)in[i+1]; + } +} +static void translate_tristrip_uint2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+2]; + (out+j)[1] = (ushort)in[i+(i&1)]; + (out+j)[2] = (ushort)in[i+1-(i&1)]; + } +} +static void translate_trifan_uint2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+2]; + (out+j)[1] = (ushort)in[0]; + (out+j)[2] = (ushort)in[i+1]; + } +} +static void translate_quads_uint2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (ushort)in[i+3]; + (out+j+0)[1] = (ushort)in[i+0]; + (out+j+0)[2] = (ushort)in[i+1]; + (out+j+3)[0] = (ushort)in[i+3]; + (out+j+3)[1] = (ushort)in[i+1]; + (out+j+3)[2] = (ushort)in[i+2]; + } +} +static void translate_quadstrip_uint2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (ushort)in[i+3]; + (out+j+0)[1] = (ushort)in[i+2]; + (out+j+0)[2] = (ushort)in[i+0]; + (out+j+3)[0] = (ushort)in[i+3]; + (out+j+3)[1] = (ushort)in[i+0]; + (out+j+3)[2] = (ushort)in[i+1]; + } +} +static void translate_polygon_uint2ushort_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[0]; + (out+j)[1] = (ushort)in[i+1]; + (out+j)[2] = (ushort)in[i+2]; + } +} +static void translate_points_uint2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (ushort)in[i]; + } +} +static void translate_lines_uint2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (ushort)in[i]; + (out+i)[1] = (ushort)in[i+1]; + } +} +static void translate_linestrip_uint2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[i+1]; + } +} +static void translate_lineloop_uint2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[i+1]; + } + (out+j)[0] = (ushort)in[i]; + (out+j)[1] = (ushort)in[0]; +} +static void translate_tris_uint2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (ushort)in[i]; + (out+i)[1] = (ushort)in[i+1]; + (out+i)[2] = (ushort)in[i+2]; + } +} +static void translate_tristrip_uint2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+(i&1)]; + (out+j)[1] = (ushort)in[i+1-(i&1)]; + (out+j)[2] = (ushort)in[i+2]; + } +} +static void translate_trifan_uint2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[0]; + (out+j)[1] = (ushort)in[i+1]; + (out+j)[2] = (ushort)in[i+2]; + } +} +static void translate_quads_uint2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (ushort)in[i+0]; + (out+j+0)[1] = (ushort)in[i+1]; + (out+j+0)[2] = (ushort)in[i+3]; + (out+j+3)[0] = (ushort)in[i+1]; + (out+j+3)[1] = (ushort)in[i+2]; + (out+j+3)[2] = (ushort)in[i+3]; + } +} +static void translate_quadstrip_uint2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (ushort)in[i+2]; + (out+j+0)[1] = (ushort)in[i+0]; + (out+j+0)[2] = (ushort)in[i+3]; + (out+j+3)[0] = (ushort)in[i+0]; + (out+j+3)[1] = (ushort)in[i+1]; + (out+j+3)[2] = (ushort)in[i+3]; + } +} +static void translate_polygon_uint2ushort_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + ushort *out = (ushort*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (ushort)in[i+1]; + (out+j)[1] = (ushort)in[i+2]; + (out+j)[2] = (ushort)in[0]; + } +} +static void translate_points_uint2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (uint)in[i]; + } +} +static void translate_lines_uint2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (uint)in[i]; + (out+i)[1] = (uint)in[i+1]; + } +} +static void translate_linestrip_uint2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[i+1]; + } +} +static void translate_lineloop_uint2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[i+1]; + } + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[0]; +} +static void translate_tris_uint2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (uint)in[i]; + (out+i)[1] = (uint)in[i+1]; + (out+i)[2] = (uint)in[i+2]; + } +} +static void translate_tristrip_uint2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[i+1+(i&1)]; + (out+j)[2] = (uint)in[i+2-(i&1)]; + } +} +static void translate_trifan_uint2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[0]; + (out+j)[1] = (uint)in[i+1]; + (out+j)[2] = (uint)in[i+2]; + } +} +static void translate_quads_uint2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (uint)in[i+0]; + (out+j+0)[1] = (uint)in[i+1]; + (out+j+0)[2] = (uint)in[i+3]; + (out+j+3)[0] = (uint)in[i+1]; + (out+j+3)[1] = (uint)in[i+2]; + (out+j+3)[2] = (uint)in[i+3]; + } +} +static void translate_quadstrip_uint2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (uint)in[i+2]; + (out+j+0)[1] = (uint)in[i+0]; + (out+j+0)[2] = (uint)in[i+3]; + (out+j+3)[0] = (uint)in[i+0]; + (out+j+3)[1] = (uint)in[i+1]; + (out+j+3)[2] = (uint)in[i+3]; + } +} +static void translate_polygon_uint2uint_first2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[0]; + (out+j)[1] = (uint)in[i+1]; + (out+j)[2] = (uint)in[i+2]; + } +} +static void translate_points_uint2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (uint)in[i]; + } +} +static void translate_lines_uint2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (uint)in[i+1]; + (out+i)[1] = (uint)in[i]; + } +} +static void translate_linestrip_uint2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i]; + } +} +static void translate_lineloop_uint2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i]; + } + (out+j)[0] = (uint)in[0]; + (out+j)[1] = (uint)in[i]; +} +static void translate_tris_uint2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (uint)in[i+1]; + (out+i)[1] = (uint)in[i+2]; + (out+i)[2] = (uint)in[i]; + } +} +static void translate_tristrip_uint2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+1+(i&1)]; + (out+j)[1] = (uint)in[i+2-(i&1)]; + (out+j)[2] = (uint)in[i]; + } +} +static void translate_trifan_uint2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i+2]; + (out+j)[2] = (uint)in[0]; + } +} +static void translate_quads_uint2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (uint)in[i+1]; + (out+j+0)[1] = (uint)in[i+3]; + (out+j+0)[2] = (uint)in[i+0]; + (out+j+3)[0] = (uint)in[i+2]; + (out+j+3)[1] = (uint)in[i+3]; + (out+j+3)[2] = (uint)in[i+1]; + } +} +static void translate_quadstrip_uint2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (uint)in[i+0]; + (out+j+0)[1] = (uint)in[i+3]; + (out+j+0)[2] = (uint)in[i+2]; + (out+j+3)[0] = (uint)in[i+1]; + (out+j+3)[1] = (uint)in[i+3]; + (out+j+3)[2] = (uint)in[i+0]; + } +} +static void translate_polygon_uint2uint_first2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i+2]; + (out+j)[2] = (uint)in[0]; + } +} +static void translate_points_uint2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (uint)in[i]; + } +} +static void translate_lines_uint2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (uint)in[i+1]; + (out+i)[1] = (uint)in[i]; + } +} +static void translate_linestrip_uint2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i]; + } +} +static void translate_lineloop_uint2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i]; + } + (out+j)[0] = (uint)in[0]; + (out+j)[1] = (uint)in[i]; +} +static void translate_tris_uint2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (uint)in[i+2]; + (out+i)[1] = (uint)in[i]; + (out+i)[2] = (uint)in[i+1]; + } +} +static void translate_tristrip_uint2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+2]; + (out+j)[1] = (uint)in[i+(i&1)]; + (out+j)[2] = (uint)in[i+1-(i&1)]; + } +} +static void translate_trifan_uint2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+2]; + (out+j)[1] = (uint)in[0]; + (out+j)[2] = (uint)in[i+1]; + } +} +static void translate_quads_uint2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (uint)in[i+3]; + (out+j+0)[1] = (uint)in[i+0]; + (out+j+0)[2] = (uint)in[i+1]; + (out+j+3)[0] = (uint)in[i+3]; + (out+j+3)[1] = (uint)in[i+1]; + (out+j+3)[2] = (uint)in[i+2]; + } +} +static void translate_quadstrip_uint2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (uint)in[i+3]; + (out+j+0)[1] = (uint)in[i+2]; + (out+j+0)[2] = (uint)in[i+0]; + (out+j+3)[0] = (uint)in[i+3]; + (out+j+3)[1] = (uint)in[i+0]; + (out+j+3)[2] = (uint)in[i+1]; + } +} +static void translate_polygon_uint2uint_last2first( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[0]; + (out+j)[1] = (uint)in[i+1]; + (out+j)[2] = (uint)in[i+2]; + } +} +static void translate_points_uint2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i++) { + (out+i)[0] = (uint)in[i]; + } +} +static void translate_lines_uint2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=2) { + (out+i)[0] = (uint)in[i]; + (out+i)[1] = (uint)in[i+1]; + } +} +static void translate_linestrip_uint2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=2, i++) { + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[i+1]; + } +} +static void translate_lineloop_uint2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr - 2; j+=2, i++) { + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[i+1]; + } + (out+j)[0] = (uint)in[i]; + (out+j)[1] = (uint)in[0]; +} +static void translate_tris_uint2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (i = 0; i < nr; i+=3) { + (out+i)[0] = (uint)in[i]; + (out+i)[1] = (uint)in[i+1]; + (out+i)[2] = (uint)in[i+2]; + } +} +static void translate_tristrip_uint2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+(i&1)]; + (out+j)[1] = (uint)in[i+1-(i&1)]; + (out+j)[2] = (uint)in[i+2]; + } +} +static void translate_trifan_uint2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[0]; + (out+j)[1] = (uint)in[i+1]; + (out+j)[2] = (uint)in[i+2]; + } +} +static void translate_quads_uint2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=4) { + (out+j+0)[0] = (uint)in[i+0]; + (out+j+0)[1] = (uint)in[i+1]; + (out+j+0)[2] = (uint)in[i+3]; + (out+j+3)[0] = (uint)in[i+1]; + (out+j+3)[1] = (uint)in[i+2]; + (out+j+3)[2] = (uint)in[i+3]; + } +} +static void translate_quadstrip_uint2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=6, i+=2) { + (out+j+0)[0] = (uint)in[i+2]; + (out+j+0)[1] = (uint)in[i+0]; + (out+j+0)[2] = (uint)in[i+3]; + (out+j+3)[0] = (uint)in[i+0]; + (out+j+3)[1] = (uint)in[i+1]; + (out+j+3)[2] = (uint)in[i+3]; + } +} +static void translate_polygon_uint2uint_last2last( + const void * _in, + unsigned nr, + void *_out ) +{ + const uint*in = (const uint*)_in; + uint *out = (uint*)_out; + unsigned i, j; + (void)j; + for (j = i = 0; j < nr; j+=3, i++) { + (out+j)[0] = (uint)in[i+1]; + (out+j)[1] = (uint)in[i+2]; + (out+j)[2] = (uint)in[0]; + } +} +void u_index_init( void ) +{ + static int firsttime = 1; + if (!firsttime) return; + firsttime = 0; +generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_POINTS] = generate_points_ushort_first2first; +generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINES] = generate_lines_ushort_first2first; +generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = generate_linestrip_ushort_first2first; +generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = generate_lineloop_ushort_first2first; +generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLES] = generate_tris_ushort_first2first; +generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_ushort_first2first; +generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_ushort_first2first; +generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUADS] = generate_quads_ushort_first2first; +generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_ushort_first2first; +generate[OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_POLYGON] = generate_polygon_ushort_first2first; +generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_POINTS] = generate_points_ushort_first2last; +generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINES] = generate_lines_ushort_first2last; +generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_STRIP] = generate_linestrip_ushort_first2last; +generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_LOOP] = generate_lineloop_ushort_first2last; +generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLES] = generate_tris_ushort_first2last; +generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_ushort_first2last; +generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_ushort_first2last; +generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_QUADS] = generate_quads_ushort_first2last; +generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_ushort_first2last; +generate[OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_POLYGON] = generate_polygon_ushort_first2last; +generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_POINTS] = generate_points_ushort_last2first; +generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINES] = generate_lines_ushort_last2first; +generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = generate_linestrip_ushort_last2first; +generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = generate_lineloop_ushort_last2first; +generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLES] = generate_tris_ushort_last2first; +generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_ushort_last2first; +generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_ushort_last2first; +generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_QUADS] = generate_quads_ushort_last2first; +generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_ushort_last2first; +generate[OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_POLYGON] = generate_polygon_ushort_last2first; +generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_POINTS] = generate_points_ushort_last2last; +generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINES] = generate_lines_ushort_last2last; +generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_STRIP] = generate_linestrip_ushort_last2last; +generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_LOOP] = generate_lineloop_ushort_last2last; +generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLES] = generate_tris_ushort_last2last; +generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_ushort_last2last; +generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_ushort_last2last; +generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_QUADS] = generate_quads_ushort_last2last; +generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_ushort_last2last; +generate[OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_POLYGON] = generate_polygon_ushort_last2last; +generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_POINTS] = generate_points_uint_first2first; +generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINES] = generate_lines_uint_first2first; +generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = generate_linestrip_uint_first2first; +generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = generate_lineloop_uint_first2first; +generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLES] = generate_tris_uint_first2first; +generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_uint_first2first; +generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_uint_first2first; +generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUADS] = generate_quads_uint_first2first; +generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_uint_first2first; +generate[OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_POLYGON] = generate_polygon_uint_first2first; +generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_POINTS] = generate_points_uint_first2last; +generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINES] = generate_lines_uint_first2last; +generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_STRIP] = generate_linestrip_uint_first2last; +generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_LOOP] = generate_lineloop_uint_first2last; +generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLES] = generate_tris_uint_first2last; +generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_uint_first2last; +generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_uint_first2last; +generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_QUADS] = generate_quads_uint_first2last; +generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_uint_first2last; +generate[OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_POLYGON] = generate_polygon_uint_first2last; +generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_POINTS] = generate_points_uint_last2first; +generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINES] = generate_lines_uint_last2first; +generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = generate_linestrip_uint_last2first; +generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = generate_lineloop_uint_last2first; +generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLES] = generate_tris_uint_last2first; +generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_uint_last2first; +generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_uint_last2first; +generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_QUADS] = generate_quads_uint_last2first; +generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_uint_last2first; +generate[OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_POLYGON] = generate_polygon_uint_last2first; +generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_POINTS] = generate_points_uint_last2last; +generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINES] = generate_lines_uint_last2last; +generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_STRIP] = generate_linestrip_uint_last2last; +generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_LOOP] = generate_lineloop_uint_last2last; +generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLES] = generate_tris_uint_last2last; +generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = generate_trifan_uint_last2last; +generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = generate_tristrip_uint_last2last; +generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_QUADS] = generate_quads_uint_last2last; +generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = generate_quadstrip_uint_last2last; +generate[OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_POLYGON] = generate_polygon_uint_last2last; +translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_ubyte2ushort_first2first; +translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_ubyte2ushort_first2first; +translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ubyte2ushort_first2first; +translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ubyte2ushort_first2first; +translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2ushort_first2first; +translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2ushort_first2first; +translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2ushort_first2first; +translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_ubyte2ushort_first2first; +translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2ushort_first2first; +translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2ushort_first2first; +translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_ubyte2ushort_first2last; +translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_ubyte2ushort_first2last; +translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ubyte2ushort_first2last; +translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ubyte2ushort_first2last; +translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2ushort_first2last; +translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2ushort_first2last; +translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2ushort_first2last; +translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_ubyte2ushort_first2last; +translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2ushort_first2last; +translate[IN_UBYTE][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2ushort_first2last; +translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_ubyte2ushort_last2first; +translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_ubyte2ushort_last2first; +translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ubyte2ushort_last2first; +translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ubyte2ushort_last2first; +translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2ushort_last2first; +translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2ushort_last2first; +translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2ushort_last2first; +translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_ubyte2ushort_last2first; +translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2ushort_last2first; +translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2ushort_last2first; +translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_ubyte2ushort_last2last; +translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_ubyte2ushort_last2last; +translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ubyte2ushort_last2last; +translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ubyte2ushort_last2last; +translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2ushort_last2last; +translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2ushort_last2last; +translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2ushort_last2last; +translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_ubyte2ushort_last2last; +translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2ushort_last2last; +translate[IN_UBYTE][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2ushort_last2last; +translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_ubyte2uint_first2first; +translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_ubyte2uint_first2first; +translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ubyte2uint_first2first; +translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ubyte2uint_first2first; +translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2uint_first2first; +translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2uint_first2first; +translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2uint_first2first; +translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_ubyte2uint_first2first; +translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2uint_first2first; +translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2uint_first2first; +translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_ubyte2uint_first2last; +translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_ubyte2uint_first2last; +translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ubyte2uint_first2last; +translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ubyte2uint_first2last; +translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2uint_first2last; +translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2uint_first2last; +translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2uint_first2last; +translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_ubyte2uint_first2last; +translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2uint_first2last; +translate[IN_UBYTE][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2uint_first2last; +translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_ubyte2uint_last2first; +translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_ubyte2uint_last2first; +translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ubyte2uint_last2first; +translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ubyte2uint_last2first; +translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2uint_last2first; +translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2uint_last2first; +translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2uint_last2first; +translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_ubyte2uint_last2first; +translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2uint_last2first; +translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2uint_last2first; +translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_ubyte2uint_last2last; +translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_ubyte2uint_last2last; +translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ubyte2uint_last2last; +translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ubyte2uint_last2last; +translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_ubyte2uint_last2last; +translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ubyte2uint_last2last; +translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ubyte2uint_last2last; +translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_ubyte2uint_last2last; +translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ubyte2uint_last2last; +translate[IN_UBYTE][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_ubyte2uint_last2last; +translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_ushort2ushort_first2first; +translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_ushort2ushort_first2first; +translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ushort2ushort_first2first; +translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ushort2ushort_first2first; +translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2ushort_first2first; +translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2ushort_first2first; +translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2ushort_first2first; +translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_ushort2ushort_first2first; +translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2ushort_first2first; +translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_ushort2ushort_first2first; +translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_ushort2ushort_first2last; +translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_ushort2ushort_first2last; +translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ushort2ushort_first2last; +translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ushort2ushort_first2last; +translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2ushort_first2last; +translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2ushort_first2last; +translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2ushort_first2last; +translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_ushort2ushort_first2last; +translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2ushort_first2last; +translate[IN_USHORT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_ushort2ushort_first2last; +translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_ushort2ushort_last2first; +translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_ushort2ushort_last2first; +translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ushort2ushort_last2first; +translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ushort2ushort_last2first; +translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2ushort_last2first; +translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2ushort_last2first; +translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2ushort_last2first; +translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_ushort2ushort_last2first; +translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2ushort_last2first; +translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_ushort2ushort_last2first; +translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_ushort2ushort_last2last; +translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_ushort2ushort_last2last; +translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ushort2ushort_last2last; +translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ushort2ushort_last2last; +translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2ushort_last2last; +translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2ushort_last2last; +translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2ushort_last2last; +translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_ushort2ushort_last2last; +translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2ushort_last2last; +translate[IN_USHORT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_ushort2ushort_last2last; +translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_ushort2uint_first2first; +translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_ushort2uint_first2first; +translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ushort2uint_first2first; +translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ushort2uint_first2first; +translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2uint_first2first; +translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2uint_first2first; +translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2uint_first2first; +translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_ushort2uint_first2first; +translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2uint_first2first; +translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_ushort2uint_first2first; +translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_ushort2uint_first2last; +translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_ushort2uint_first2last; +translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ushort2uint_first2last; +translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ushort2uint_first2last; +translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2uint_first2last; +translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2uint_first2last; +translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2uint_first2last; +translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_ushort2uint_first2last; +translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2uint_first2last; +translate[IN_USHORT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_ushort2uint_first2last; +translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_ushort2uint_last2first; +translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_ushort2uint_last2first; +translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ushort2uint_last2first; +translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ushort2uint_last2first; +translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2uint_last2first; +translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2uint_last2first; +translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2uint_last2first; +translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_ushort2uint_last2first; +translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2uint_last2first; +translate[IN_USHORT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_ushort2uint_last2first; +translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_ushort2uint_last2last; +translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_ushort2uint_last2last; +translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_ushort2uint_last2last; +translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_ushort2uint_last2last; +translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_ushort2uint_last2last; +translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_ushort2uint_last2last; +translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_ushort2uint_last2last; +translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_ushort2uint_last2last; +translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_ushort2uint_last2last; +translate[IN_USHORT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_ushort2uint_last2last; +translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_uint2ushort_first2first; +translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_uint2ushort_first2first; +translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_uint2ushort_first2first; +translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_uint2ushort_first2first; +translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_uint2ushort_first2first; +translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2ushort_first2first; +translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2ushort_first2first; +translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_uint2ushort_first2first; +translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2ushort_first2first; +translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_uint2ushort_first2first; +translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_uint2ushort_first2last; +translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_uint2ushort_first2last; +translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_uint2ushort_first2last; +translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_uint2ushort_first2last; +translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_uint2ushort_first2last; +translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2ushort_first2last; +translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2ushort_first2last; +translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_uint2ushort_first2last; +translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2ushort_first2last; +translate[IN_UINT][OUT_USHORT][PV_FIRST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_uint2ushort_first2last; +translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_uint2ushort_last2first; +translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_uint2ushort_last2first; +translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_uint2ushort_last2first; +translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_uint2ushort_last2first; +translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_uint2ushort_last2first; +translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2ushort_last2first; +translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2ushort_last2first; +translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_uint2ushort_last2first; +translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2ushort_last2first; +translate[IN_UINT][OUT_USHORT][PV_LAST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_uint2ushort_last2first; +translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_uint2ushort_last2last; +translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_uint2ushort_last2last; +translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_uint2ushort_last2last; +translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_uint2ushort_last2last; +translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_uint2ushort_last2last; +translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2ushort_last2last; +translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2ushort_last2last; +translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_uint2ushort_last2last; +translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2ushort_last2last; +translate[IN_UINT][OUT_USHORT][PV_LAST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_uint2ushort_last2last; +translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_uint2uint_first2first; +translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_uint2uint_first2first; +translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_uint2uint_first2first; +translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_uint2uint_first2first; +translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_uint2uint_first2first; +translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2uint_first2first; +translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2uint_first2first; +translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_uint2uint_first2first; +translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2uint_first2first; +translate[IN_UINT][OUT_UINT][PV_FIRST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_uint2uint_first2first; +translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_uint2uint_first2last; +translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_uint2uint_first2last; +translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_uint2uint_first2last; +translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_uint2uint_first2last; +translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_uint2uint_first2last; +translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2uint_first2last; +translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2uint_first2last; +translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_uint2uint_first2last; +translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2uint_first2last; +translate[IN_UINT][OUT_UINT][PV_FIRST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_uint2uint_first2last; +translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_POINTS] = translate_points_uint2uint_last2first; +translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINES] = translate_lines_uint2uint_last2first; +translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_uint2uint_last2first; +translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_uint2uint_last2first; +translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLES] = translate_tris_uint2uint_last2first; +translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2uint_last2first; +translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2uint_last2first; +translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_QUADS] = translate_quads_uint2uint_last2first; +translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2uint_last2first; +translate[IN_UINT][OUT_UINT][PV_LAST][PV_FIRST][PIPE_PRIM_POLYGON] = translate_polygon_uint2uint_last2first; +translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_POINTS] = translate_points_uint2uint_last2last; +translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINES] = translate_lines_uint2uint_last2last; +translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_STRIP] = translate_linestrip_uint2uint_last2last; +translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_LINE_LOOP] = translate_lineloop_uint2uint_last2last; +translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLES] = translate_tris_uint2uint_last2last; +translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_FAN] = translate_trifan_uint2uint_last2last; +translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_TRIANGLE_STRIP] = translate_tristrip_uint2uint_last2last; +translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_QUADS] = translate_quads_uint2uint_last2last; +translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_QUAD_STRIP] = translate_quadstrip_uint2uint_last2last; +translate[IN_UINT][OUT_UINT][PV_LAST][PV_LAST][PIPE_PRIM_POLYGON] = translate_polygon_uint2uint_last2last; +} +#include "indices/u_indices.c" diff --git a/src/gallium/auxiliary/indices/u_indices_gen.py b/src/gallium/auxiliary/indices/u_indices_gen.py new file mode 100644 index 0000000000..af63d09930 --- /dev/null +++ b/src/gallium/auxiliary/indices/u_indices_gen.py @@ -0,0 +1,319 @@ +#!/usr/bin/env python +copyright = ''' +/* + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * VMWARE AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +''' + +GENERATE, UBYTE, USHORT, UINT = 'generate', 'ubyte', 'ushort', 'uint' +FIRST, LAST = 'first', 'last' + +INTYPES = (GENERATE, UBYTE, USHORT, UINT) +OUTTYPES = (USHORT, UINT) +PVS=(FIRST, LAST) +PRIMS=('points', + 'lines', + 'linestrip', + 'lineloop', + 'tris', + 'trifan', + 'tristrip', + 'quads', + 'quadstrip', + 'polygon') + +LONGPRIMS=('PIPE_PRIM_POINTS', + 'PIPE_PRIM_LINES', + 'PIPE_PRIM_LINE_STRIP', + 'PIPE_PRIM_LINE_LOOP', + 'PIPE_PRIM_TRIANGLES', + 'PIPE_PRIM_TRIANGLE_FAN', + 'PIPE_PRIM_TRIANGLE_STRIP', + 'PIPE_PRIM_QUADS', + 'PIPE_PRIM_QUAD_STRIP', + 'PIPE_PRIM_POLYGON') + +longprim = dict(zip(PRIMS, LONGPRIMS)) +intype_idx = dict(ubyte='IN_UBYTE', ushort='IN_USHORT', uint='IN_UINT') +outtype_idx = dict(ushort='OUT_USHORT', uint='OUT_UINT') +pv_idx = dict(first='PV_FIRST', last='PV_LAST') + + +def prolog(): + print '''/* File automatically generated by indices.py */''' + print copyright + print r''' + +/** + * @file + * Functions to translate and generate index lists + */ + +#include "indices/u_indices.h" +#include "indices/u_indices_priv.h" +#include "pipe/p_compiler.h" +#include "util/u_debug.h" +#include "pipe/p_defines.h" +#include "util/u_memory.h" + + +static unsigned out_size_idx( unsigned index_size ) +{ + switch (index_size) { + case 4: return OUT_UINT; + case 2: return OUT_USHORT; + default: assert(0); return OUT_USHORT; + } +} + +static unsigned in_size_idx( unsigned index_size ) +{ + switch (index_size) { + case 4: return IN_UINT; + case 2: return IN_USHORT; + case 1: return IN_UBYTE; + default: assert(0); return IN_UBYTE; + } +} + + +static u_translate_func translate[IN_COUNT][OUT_COUNT][PV_COUNT][PV_COUNT][PRIM_COUNT]; +static u_generate_func generate[OUT_COUNT][PV_COUNT][PV_COUNT][PRIM_COUNT]; + + +''' + +def vert( intype, outtype, v0 ): + if intype == GENERATE: + return '(' + outtype + ')(' + v0 + ')' + else: + return '(' + outtype + ')in[' + v0 + ']' + +def point( intype, outtype, ptr, v0 ): + print ' (' + ptr + ')[0] = ' + vert( intype, outtype, v0 ) + ';' + +def line( intype, outtype, ptr, v0, v1 ): + print ' (' + ptr + ')[0] = ' + vert( intype, outtype, v0 ) + ';' + print ' (' + ptr + ')[1] = ' + vert( intype, outtype, v1 ) + ';' + +def tri( intype, outtype, ptr, v0, v1, v2 ): + print ' (' + ptr + ')[0] = ' + vert( intype, outtype, v0 ) + ';' + print ' (' + ptr + ')[1] = ' + vert( intype, outtype, v1 ) + ';' + print ' (' + ptr + ')[2] = ' + vert( intype, outtype, v2 ) + ';' + +def do_point( intype, outtype, ptr, v0 ): + point( intype, outtype, ptr, v0 ) + +def do_line( intype, outtype, ptr, v0, v1, inpv, outpv ): + if inpv == outpv: + line( intype, outtype, ptr, v0, v1 ) + else: + line( intype, outtype, ptr, v1, v0 ) + +def do_tri( intype, outtype, ptr, v0, v1, v2, inpv, outpv ): + if inpv == outpv: + tri( intype, outtype, ptr, v0, v1, v2 ) + else: + if inpv == FIRST: + tri( intype, outtype, ptr, v1, v2, v0 ) + else: + tri( intype, outtype, ptr, v2, v0, v1 ) + +def do_quad( intype, outtype, ptr, v0, v1, v2, v3, inpv, outpv ): + do_tri( intype, outtype, ptr+'+0', v0, v1, v3, inpv, outpv ); + do_tri( intype, outtype, ptr+'+3', v1, v2, v3, inpv, outpv ); + +def name(intype, outtype, inpv, outpv, prim): + if intype == GENERATE: + return 'generate_' + prim + '_' + outtype + '_' + inpv + '2' + outpv + else: + return 'translate_' + prim + '_' + intype + '2' + outtype + '_' + inpv + '2' + outpv + +def preamble(intype, outtype, inpv, outpv, prim): + print 'static void ' + name( intype, outtype, inpv, outpv, prim ) + '(' + if intype != GENERATE: + print ' const void * _in,' + print ' unsigned nr,' + print ' void *_out )' + print '{' + if intype != GENERATE: + print ' const ' + intype + '*in = (const ' + intype + '*)_in;' + print ' ' + outtype + ' *out = (' + outtype + '*)_out;' + print ' unsigned i, j;' + print ' (void)j;' + +def postamble(): + print '}' + + +def points(intype, outtype, inpv, outpv): + preamble(intype, outtype, inpv, outpv, prim='points') + print ' for (i = 0; i < nr; i++) { ' + do_point( intype, outtype, 'out+i', 'i' ); + print ' }' + postamble() + +def lines(intype, outtype, inpv, outpv): + preamble(intype, outtype, inpv, outpv, prim='lines') + print ' for (i = 0; i < nr; i+=2) { ' + do_line( intype, outtype, 'out+i', 'i', 'i+1', inpv, outpv ); + print ' }' + postamble() + +def linestrip(intype, outtype, inpv, outpv): + preamble(intype, outtype, inpv, outpv, prim='linestrip') + print ' for (j = i = 0; j < nr; j+=2, i++) { ' + do_line( intype, outtype, 'out+j', 'i', 'i+1', inpv, outpv ); + print ' }' + postamble() + +def lineloop(intype, outtype, inpv, outpv): + preamble(intype, outtype, inpv, outpv, prim='lineloop') + print ' for (j = i = 0; j < nr - 2; j+=2, i++) { ' + do_line( intype, outtype, 'out+j', 'i', 'i+1', inpv, outpv ); + print ' }' + do_line( intype, outtype, 'out+j', 'i', '0', inpv, outpv ); + postamble() + +def tris(intype, outtype, inpv, outpv): + preamble(intype, outtype, inpv, outpv, prim='tris') + print ' for (i = 0; i < nr; i+=3) { ' + do_tri( intype, outtype, 'out+i', 'i', 'i+1', 'i+2', inpv, outpv ); + print ' }' + postamble() + + +def tristrip(intype, outtype, inpv, outpv): + preamble(intype, outtype, inpv, outpv, prim='tristrip') + print ' for (j = i = 0; j < nr; j+=3, i++) { ' + if inpv == FIRST: + do_tri( intype, outtype, 'out+j', 'i', 'i+1+(i&1)', 'i+2-(i&1)', inpv, outpv ); + else: + do_tri( intype, outtype, 'out+j', 'i+(i&1)', 'i+1-(i&1)', 'i+2', inpv, outpv ); + print ' }' + postamble() + + +def trifan(intype, outtype, inpv, outpv): + preamble(intype, outtype, inpv, outpv, prim='trifan') + print ' for (j = i = 0; j < nr; j+=3, i++) { ' + do_tri( intype, outtype, 'out+j', '0', 'i+1', 'i+2', inpv, outpv ); + print ' }' + postamble() + + + +def polygon(intype, outtype, inpv, outpv): + preamble(intype, outtype, inpv, outpv, prim='polygon') + print ' for (j = i = 0; j < nr; j+=3, i++) { ' + if inpv == FIRST: + do_tri( intype, outtype, 'out+j', '0', 'i+1', 'i+2', inpv, outpv ); + else: + do_tri( intype, outtype, 'out+j', 'i+1', 'i+2', '0', inpv, outpv ); + print ' }' + postamble() + + +def quads(intype, outtype, inpv, outpv): + preamble(intype, outtype, inpv, outpv, prim='quads') + print ' for (j = i = 0; j < nr; j+=6, i+=4) { ' + do_quad( intype, outtype, 'out+j', 'i+0', 'i+1', 'i+2', 'i+3', inpv, outpv ); + print ' }' + postamble() + + +def quadstrip(intype, outtype, inpv, outpv): + preamble(intype, outtype, inpv, outpv, prim='quadstrip') + print ' for (j = i = 0; j < nr; j+=6, i+=2) { ' + do_quad( intype, outtype, 'out+j', 'i+2', 'i+0', 'i+1', 'i+3', inpv, outpv ); + print ' }' + postamble() + + +def emit_funcs(): + for intype in INTYPES: + for outtype in OUTTYPES: + for inpv in (FIRST, LAST): + for outpv in (FIRST, LAST): + points(intype, outtype, inpv, outpv) + lines(intype, outtype, inpv, outpv) + linestrip(intype, outtype, inpv, outpv) + lineloop(intype, outtype, inpv, outpv) + tris(intype, outtype, inpv, outpv) + tristrip(intype, outtype, inpv, outpv) + trifan(intype, outtype, inpv, outpv) + quads(intype, outtype, inpv, outpv) + quadstrip(intype, outtype, inpv, outpv) + polygon(intype, outtype, inpv, outpv) + +def init(intype, outtype, inpv, outpv, prim): + if intype == GENERATE: + print ('generate[' + + outtype_idx[outtype] + + '][' + pv_idx[inpv] + + '][' + pv_idx[outpv] + + '][' + longprim[prim] + + '] = ' + name( intype, outtype, inpv, outpv, prim ) + ';') + else: + print ('translate[' + + intype_idx[intype] + + '][' + outtype_idx[outtype] + + '][' + pv_idx[inpv] + + '][' + pv_idx[outpv] + + '][' + longprim[prim] + + '] = ' + name( intype, outtype, inpv, outpv, prim ) + ';') + + +def emit_all_inits(): + for intype in INTYPES: + for outtype in OUTTYPES: + for inpv in PVS: + for outpv in PVS: + for prim in PRIMS: + init(intype, outtype, inpv, outpv, prim) + +def emit_init(): + print 'void u_index_init( void )' + print '{' + print ' static int firsttime = 1;' + print ' if (!firsttime) return;' + print ' firsttime = 0;' + emit_all_inits() + print '}' + + + + +def epilog(): + print '#include "indices/u_indices.c"' + + +def main(): + prolog() + emit_funcs() + emit_init() + epilog() + + +if __name__ == '__main__': + main() diff --git a/src/gallium/auxiliary/indices/u_indices_priv.h b/src/gallium/auxiliary/indices/u_indices_priv.h new file mode 100644 index 0000000000..9c3298c24d --- /dev/null +++ b/src/gallium/auxiliary/indices/u_indices_priv.h @@ -0,0 +1,43 @@ +/* + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#ifndef U_INDICES_PRIV_H +#define U_INDICES_PRIV_H + +#include "pipe/p_compiler.h" +#include "u_indices.h" + +#define IN_UBYTE 0 +#define IN_USHORT 1 +#define IN_UINT 2 +#define IN_COUNT 3 + +#define OUT_USHORT 0 +#define OUT_UINT 1 +#define OUT_COUNT 2 + + +#define PRIM_COUNT (PIPE_PRIM_POLYGON + 1) + +#endif diff --git a/src/gallium/auxiliary/pipebuffer/Makefile b/src/gallium/auxiliary/pipebuffer/Makefile index f9b39d9ce0..1c00ba8d98 100644 --- a/src/gallium/auxiliary/pipebuffer/Makefile +++ b/src/gallium/auxiliary/pipebuffer/Makefile @@ -11,12 +11,9 @@ C_SOURCES = \ pb_bufmgr_debug.c \ pb_bufmgr_fenced.c \ pb_bufmgr_mm.c \ + pb_bufmgr_ondemand.c \ pb_bufmgr_pool.c \ pb_bufmgr_slab.c \ - pb_validate.c \ - pb_winsys.c + pb_validate.c include ../../Makefile.template - -symlinks: - diff --git a/src/gallium/auxiliary/pipebuffer/SConscript b/src/gallium/auxiliary/pipebuffer/SConscript index 56a40dda0d..8e9f06abe4 100644 --- a/src/gallium/auxiliary/pipebuffer/SConscript +++ b/src/gallium/auxiliary/pipebuffer/SConscript @@ -10,10 +10,10 @@ pipebuffer = env.ConvenienceLibrary( 'pb_bufmgr_debug.c', 'pb_bufmgr_fenced.c', 'pb_bufmgr_mm.c', + 'pb_bufmgr_ondemand.c', 'pb_bufmgr_pool.c', 'pb_bufmgr_slab.c', 'pb_validate.c', - 'pb_winsys.c', ]) auxiliaries.insert(0, pipebuffer) diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h index 8505d333bd..e6b0b30ff4 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer.h +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h @@ -37,7 +37,7 @@ * There is no obligation of a winsys driver to use this library. And a pipe * driver should be completly agnostic about it. * - * \author Jos� Fonseca <jrfonseca@tungstengraphics.com> + * \author Jose Fonseca <jrfonseca@tungstengraphics.com> */ #ifndef PB_BUFFER_H_ @@ -45,7 +45,8 @@ #include "pipe/p_compiler.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" +#include "pipe/p_error.h" #include "pipe/p_state.h" #include "pipe/p_inlines.h" @@ -56,6 +57,8 @@ extern "C" { struct pb_vtbl; +struct pb_validate; + /** * Buffer description. @@ -104,6 +107,13 @@ struct pb_vtbl void (*unmap)( struct pb_buffer *buf ); + enum pipe_error (*validate)( struct pb_buffer *buf, + struct pb_validate *vl, + unsigned flags ); + + void (*fence)( struct pb_buffer *buf, + struct pipe_fence_handle *fence ); + /** * Get the base buffer and the offset. * @@ -118,6 +128,7 @@ struct pb_vtbl void (*get_base_buffer)( struct pb_buffer *buf, struct pb_buffer **base_buf, unsigned *offset ); + }; @@ -148,6 +159,7 @@ pb_map(struct pb_buffer *buf, assert(buf); if(!buf) return NULL; + assert(buf->base.refcount > 0); return buf->vtbl->map(buf, flags); } @@ -158,6 +170,7 @@ pb_unmap(struct pb_buffer *buf) assert(buf); if(!buf) return; + assert(buf->base.refcount > 0); buf->vtbl->unmap(buf); } @@ -173,7 +186,33 @@ pb_get_base_buffer( struct pb_buffer *buf, offset = 0; return; } + assert(buf->base.refcount > 0); + assert(buf->vtbl->get_base_buffer); buf->vtbl->get_base_buffer(buf, base_buf, offset); + assert(*base_buf); + assert(*offset < (*base_buf)->base.size); +} + + +static INLINE enum pipe_error +pb_validate(struct pb_buffer *buf, struct pb_validate *vl, unsigned flags) +{ + assert(buf); + if(!buf) + return PIPE_ERROR; + assert(buf->vtbl->validate); + return buf->vtbl->validate(buf, vl, flags); +} + + +static INLINE void +pb_fence(struct pb_buffer *buf, struct pipe_fence_handle *fence) +{ + assert(buf); + if(!buf) + return; + assert(buf->vtbl->fence); + buf->vtbl->fence(buf, fence); } @@ -183,6 +222,7 @@ pb_destroy(struct pb_buffer *buf) assert(buf); if(!buf) return; + assert(buf->base.refcount == 0); buf->vtbl->destroy(buf); } @@ -193,11 +233,16 @@ static INLINE void pb_reference(struct pb_buffer **dst, struct pb_buffer *src) { - if (src) + if (src) { + assert(src->base.refcount); src->base.refcount++; + } - if (*dst && --(*dst)->base.refcount == 0) - pb_destroy( *dst ); + if (*dst) { + assert((*dst)->base.refcount); + if(--(*dst)->base.refcount == 0) + pb_destroy( *dst ); + } *dst = src; } @@ -210,7 +255,13 @@ pb_reference(struct pb_buffer **dst, static INLINE boolean pb_check_alignment(size_t requested, size_t provided) { - return requested <= provided && (provided % requested) == 0 ? TRUE : FALSE; + if(!requested) + return TRUE; + if(requested > provided) + return FALSE; + if(provided % requested != 0) + return FALSE; + return TRUE; } @@ -234,10 +285,6 @@ pb_malloc_buffer_create(size_t size, const struct pb_desc *desc); -void -pb_init_winsys(struct pipe_winsys *winsys); - - #ifdef __cplusplus } #endif diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index c3d747898a..272e2205e3 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -29,7 +29,7 @@ * \file * Implementation of fenced buffers. * - * \author José Fonseca <jrfonseca-at-tungstengraphics-dot-com> + * \author Jose Fonseca <jrfonseca-at-tungstengraphics-dot-com> * \author Thomas Hellström <thomas-at-tungstengraphics-dot-com> */ @@ -43,8 +43,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_error.h" -#include "pipe/p_debug.h" -#include "pipe/p_winsys.h" +#include "util/u_debug.h" #include "pipe/p_thread.h" #include "util/u_memory.h" #include "util/u_double_list.h" @@ -59,19 +58,12 @@ */ #define SUPER(__derived) (&(__derived)->base) -#define PIPE_BUFFER_USAGE_CPU_READ_WRITE \ - ( PIPE_BUFFER_USAGE_CPU_READ | PIPE_BUFFER_USAGE_CPU_WRITE ) -#define PIPE_BUFFER_USAGE_GPU_READ_WRITE \ - ( PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE ) -#define PIPE_BUFFER_USAGE_WRITE \ - ( PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_GPU_WRITE ) - struct fenced_buffer_list { pipe_mutex mutex; - struct pipe_winsys *winsys; + struct pb_fence_ops *ops; size_t numDelayed; struct list_head delayed; @@ -101,6 +93,8 @@ struct fenced_buffer unsigned flags; unsigned mapcount; + struct pb_validate *vl; + unsigned validation_flags; struct pipe_fence_handle *fence; struct list_head head; @@ -112,7 +106,6 @@ static INLINE struct fenced_buffer * fenced_buffer(struct pb_buffer *buf) { assert(buf); - assert(buf->vtbl == &fenced_buffer_vtbl); return (struct fenced_buffer *)buf; } @@ -164,12 +157,12 @@ static INLINE void _fenced_buffer_remove(struct fenced_buffer_list *fenced_list, struct fenced_buffer *fenced_buf) { - struct pipe_winsys *winsys = fenced_list->winsys; + struct pb_fence_ops *ops = fenced_list->ops; assert(fenced_buf->fence); assert(fenced_buf->list == fenced_list); - winsys->fence_reference(winsys, &fenced_buf->fence, NULL); + ops->fence_reference(ops, &fenced_buf->fence, NULL); fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE; assert(fenced_buf->head.prev); @@ -193,7 +186,7 @@ static INLINE enum pipe_error _fenced_buffer_finish(struct fenced_buffer *fenced_buf) { struct fenced_buffer_list *fenced_list = fenced_buf->list; - struct pipe_winsys *winsys = fenced_list->winsys; + struct pb_fence_ops *ops = fenced_list->ops; #if 0 debug_warning("waiting for GPU"); @@ -201,7 +194,7 @@ _fenced_buffer_finish(struct fenced_buffer *fenced_buf) assert(fenced_buf->fence); if(fenced_buf->fence) { - if(winsys->fence_finish(winsys, fenced_buf->fence, 0) != 0) { + if(ops->fence_finish(ops, fenced_buf->fence, 0) != 0) { return PIPE_ERROR; } /* Remove from the fenced list */ @@ -221,7 +214,7 @@ static void _fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, int wait) { - struct pipe_winsys *winsys = fenced_list->winsys; + struct pb_fence_ops *ops = fenced_list->ops; struct list_head *curr, *next; struct fenced_buffer *fenced_buf; struct pipe_fence_handle *prev_fence = NULL; @@ -234,15 +227,15 @@ _fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, if(fenced_buf->fence != prev_fence) { int signaled; if (wait) - signaled = winsys->fence_finish(winsys, fenced_buf->fence, 0); + signaled = ops->fence_finish(ops, fenced_buf->fence, 0); else - signaled = winsys->fence_signalled(winsys, fenced_buf->fence, 0); + signaled = ops->fence_signalled(ops, fenced_buf->fence, 0); if (signaled != 0) break; prev_fence = fenced_buf->fence; } else { - assert(winsys->fence_signalled(winsys, fenced_buf->fence, 0) == 0); + assert(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0); } _fenced_buffer_remove(fenced_list, fenced_buf); @@ -262,14 +255,14 @@ fenced_buffer_destroy(struct pb_buffer *buf) pipe_mutex_lock(fenced_list->mutex); assert(fenced_buf->base.base.refcount == 0); if (fenced_buf->fence) { - struct pipe_winsys *winsys = fenced_list->winsys; - if(winsys->fence_signalled(winsys, fenced_buf->fence, 0) == 0) { + struct pb_fence_ops *ops = fenced_list->ops; + if(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0) { struct list_head *curr, *prev; curr = &fenced_buf->head; prev = curr->prev; do { fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); - assert(winsys->fence_signalled(winsys, fenced_buf->fence, 0) == 0); + assert(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0); _fenced_buffer_remove(fenced_list, fenced_buf); curr = prev; prev = curr->prev; @@ -293,6 +286,7 @@ fenced_buffer_map(struct pb_buffer *buf, struct fenced_buffer *fenced_buf = fenced_buffer(buf); void *map; + assert(flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE); assert(!(flags & ~PIPE_BUFFER_USAGE_CPU_READ_WRITE)); flags &= PIPE_BUFFER_USAGE_CPU_READ_WRITE; @@ -334,6 +328,93 @@ fenced_buffer_unmap(struct pb_buffer *buf) } +static enum pipe_error +fenced_buffer_validate(struct pb_buffer *buf, + struct pb_validate *vl, + unsigned flags) +{ + struct fenced_buffer *fenced_buf = fenced_buffer(buf); + enum pipe_error ret; + + if(!vl) { + /* invalidate */ + fenced_buf->vl = NULL; + fenced_buf->validation_flags = 0; + return PIPE_OK; + } + + assert(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE); + assert(!(flags & ~PIPE_BUFFER_USAGE_GPU_READ_WRITE)); + flags &= PIPE_BUFFER_USAGE_GPU_READ_WRITE; + + /* Buffer cannot be validated in two different lists */ + if(fenced_buf->vl && fenced_buf->vl != vl) + return PIPE_ERROR_RETRY; + + /* Do not validate if buffer is still mapped */ + if(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE) { + /* TODO: wait for the thread that mapped the buffer to unmap it */ + return PIPE_ERROR_RETRY; + } + + if(fenced_buf->vl == vl && + (fenced_buf->validation_flags & flags) == flags) { + /* Nothing to do -- buffer already validated */ + return PIPE_OK; + } + + /* Final sanity checking */ + assert(!(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE)); + assert(!fenced_buf->mapcount); + + ret = pb_validate(fenced_buf->buffer, vl, flags); + if (ret != PIPE_OK) + return ret; + + fenced_buf->vl = vl; + fenced_buf->validation_flags |= flags; + + return PIPE_OK; +} + + +static void +fenced_buffer_fence(struct pb_buffer *buf, + struct pipe_fence_handle *fence) +{ + struct fenced_buffer *fenced_buf; + struct fenced_buffer_list *fenced_list; + struct pb_fence_ops *ops; + + fenced_buf = fenced_buffer(buf); + fenced_list = fenced_buf->list; + ops = fenced_list->ops; + + if(fence == fenced_buf->fence) { + /* Nothing to do */ + return; + } + + assert(fenced_buf->vl); + assert(fenced_buf->validation_flags); + + pipe_mutex_lock(fenced_list->mutex); + if (fenced_buf->fence) + _fenced_buffer_remove(fenced_list, fenced_buf); + if (fence) { + ops->fence_reference(ops, &fenced_buf->fence, fence); + fenced_buf->flags |= fenced_buf->validation_flags; + _fenced_buffer_add(fenced_buf); + } + pipe_mutex_unlock(fenced_list->mutex); + + pb_fence(fenced_buf->buffer, fence); + + fenced_buf->vl = NULL; + fenced_buf->validation_flags = 0; +} + + static void fenced_buffer_get_base_buffer(struct pb_buffer *buf, struct pb_buffer **base_buf, @@ -344,11 +425,13 @@ fenced_buffer_get_base_buffer(struct pb_buffer *buf, } -const struct pb_vtbl +static const struct pb_vtbl fenced_buffer_vtbl = { fenced_buffer_destroy, fenced_buffer_map, fenced_buffer_unmap, + fenced_buffer_validate, + fenced_buffer_fence, fenced_buffer_get_base_buffer }; @@ -388,54 +471,8 @@ fenced_buffer_create(struct fenced_buffer_list *fenced_list, } -void -buffer_fence(struct pb_buffer *buf, - struct pipe_fence_handle *fence) -{ - struct fenced_buffer *fenced_buf; - struct fenced_buffer_list *fenced_list; - struct pipe_winsys *winsys; - /* FIXME: receive this as a parameter */ - unsigned flags = fence ? PIPE_BUFFER_USAGE_GPU_READ_WRITE : 0; - - /* This is a public function, so be extra cautious with the buffer passed, - * as happens frequently to receive null buffers, or pointer to buffers - * other than fenced buffers. */ - assert(buf); - if(!buf) - return; - assert(buf->vtbl == &fenced_buffer_vtbl); - if(buf->vtbl != &fenced_buffer_vtbl) - return; - - fenced_buf = fenced_buffer(buf); - fenced_list = fenced_buf->list; - winsys = fenced_list->winsys; - - if(!fence || fence == fenced_buf->fence) { - /* Handle the same fence case specially, not only because it is a fast - * path, but mostly to avoid serializing two writes with the same fence, - * as that would bring the hardware down to synchronous operation without - * any benefit. - */ - fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE; - return; - } - - pipe_mutex_lock(fenced_list->mutex); - if (fenced_buf->fence) - _fenced_buffer_remove(fenced_list, fenced_buf); - if (fence) { - winsys->fence_reference(winsys, &fenced_buf->fence, fence); - fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE; - _fenced_buffer_add(fenced_buf); - } - pipe_mutex_unlock(fenced_list->mutex); -} - - struct fenced_buffer_list * -fenced_buffer_list_create(struct pipe_winsys *winsys) +fenced_buffer_list_create(struct pb_fence_ops *ops) { struct fenced_buffer_list *fenced_list; @@ -443,7 +480,7 @@ fenced_buffer_list_create(struct pipe_winsys *winsys) if (!fenced_list) return NULL; - fenced_list->winsys = winsys; + fenced_list->ops = ops; LIST_INITHEAD(&fenced_list->delayed); fenced_list->numDelayed = 0; @@ -473,7 +510,7 @@ fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, void fenced_buffer_list_dump(struct fenced_buffer_list *fenced_list) { - struct pipe_winsys *winsys = fenced_list->winsys; + struct pb_fence_ops *ops = fenced_list->ops; struct list_head *curr, *next; struct fenced_buffer *fenced_buf; struct pipe_fence_handle *prev_fence = NULL; @@ -500,7 +537,7 @@ fenced_buffer_list_dump(struct fenced_buffer_list *fenced_list) while(curr != &fenced_list->delayed) { int signaled; fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); - signaled = winsys->fence_signalled(winsys, fenced_buf->fence, 0); + signaled = ops->fence_signalled(ops, fenced_buf->fence, 0); debug_printf("%10p %7u %10p %s\n", fenced_buf, fenced_buf->base.base.refcount, @@ -536,6 +573,8 @@ fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list) pipe_mutex_unlock(fenced_list->mutex); + fenced_list->ops->destroy(fenced_list->ops); + FREE(fenced_list); } diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h index 510f456508..034ca1e024 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h @@ -44,14 +44,14 @@ * Between the handle's destruction, and the fence signalling, the buffer is * stored in a fenced buffer list. * - * \author José Fonseca <jrfonseca@tungstengraphics.com> + * \author Jose Fonseca <jrfonseca@tungstengraphics.com> */ #ifndef PB_BUFFER_FENCED_H_ #define PB_BUFFER_FENCED_H_ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #ifdef __cplusplus @@ -59,7 +59,6 @@ extern "C" { #endif -struct pipe_winsys; struct pipe_buffer; struct pipe_fence_handle; @@ -70,12 +69,33 @@ struct pipe_fence_handle; struct fenced_buffer_list; -/** - * The fenced buffer's virtual function table. - * - * NOTE: Made public for debugging purposes. - */ -extern const struct pb_vtbl fenced_buffer_vtbl; +struct pb_fence_ops +{ + void (*destroy)( struct pb_fence_ops *ops ); + + /** Set ptr = fence, with reference counting */ + void (*fence_reference)( struct pb_fence_ops *ops, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence ); + + /** + * Checks whether the fence has been signalled. + * \param flags driver-specific meaning + * \return zero on success. + */ + int (*fence_signalled)( struct pb_fence_ops *ops, + struct pipe_fence_handle *fence, + unsigned flag ); + + /** + * Wait for the fence to finish. + * \param flags driver-specific meaning + * \return zero on success. + */ + int (*fence_finish)( struct pb_fence_ops *ops, + struct pipe_fence_handle *fence, + unsigned flag ); +}; /** @@ -84,7 +104,7 @@ extern const struct pb_vtbl fenced_buffer_vtbl; * See also fenced_bufmgr_create for a more convenient way to use this. */ struct fenced_buffer_list * -fenced_buffer_list_create(struct pipe_winsys *winsys); +fenced_buffer_list_create(struct pb_fence_ops *ops); /** @@ -115,17 +135,6 @@ fenced_buffer_create(struct fenced_buffer_list *fenced, struct pb_buffer *buffer); -/** - * Set a buffer's fence. - * - * NOTE: Although it takes a generic pb_buffer argument, it will fail - * on everything but buffers returned by fenced_buffer_create. - */ -void -buffer_fence(struct pb_buffer *buf, - struct pipe_fence_handle *fence); - - #ifdef __cplusplus } #endif diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c index 1bf22a2ec0..282802b171 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c @@ -34,7 +34,7 @@ */ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "util/u_memory.h" #include "pb_buffer.h" #include "pb_bufmgr.h" @@ -81,6 +81,24 @@ malloc_buffer_unmap(struct pb_buffer *buf) } +static enum pipe_error +malloc_buffer_validate(struct pb_buffer *buf, + struct pb_validate *vl, + unsigned flags) +{ + assert(0); + return PIPE_ERROR; +} + + +static void +malloc_buffer_fence(struct pb_buffer *buf, + struct pipe_fence_handle *fence) +{ + assert(0); +} + + static void malloc_buffer_get_base_buffer(struct pb_buffer *buf, struct pb_buffer **base_buf, @@ -96,6 +114,8 @@ malloc_buffer_vtbl = { malloc_buffer_destroy, malloc_buffer_map, malloc_buffer_unmap, + malloc_buffer_validate, + malloc_buffer_fence, malloc_buffer_get_base_buffer }; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h index cafbee045a..fec8db91c7 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h @@ -43,7 +43,7 @@ * - the fenced buffer manager, which will delay buffer destruction until the * the moment the card finishing processing it. * - * \author José Fonseca <jrfonseca@tungstengraphics.com> + * \author Jose Fonseca <jrfonseca@tungstengraphics.com> */ #ifndef PB_BUFMGR_H_ @@ -61,7 +61,6 @@ extern "C" { struct pb_desc; struct pipe_buffer; -struct pipe_winsys; /** @@ -163,6 +162,8 @@ pb_cache_manager_create(struct pb_manager *provider, unsigned usecs); +struct pb_fence_ops; + /** * Fenced buffer manager. * @@ -174,7 +175,7 @@ pb_cache_manager_create(struct pb_manager *provider, */ struct pb_manager * fenced_bufmgr_create(struct pb_manager *provider, - struct pipe_winsys *winsys); + struct pb_fence_ops *ops); struct pb_manager * @@ -183,6 +184,20 @@ pb_alt_manager_create(struct pb_manager *provider1, /** + * Ondemand buffer manager. + * + * Buffers are created in malloc'ed memory (fast and cached), and the constents + * is transfered to a buffer from the provider (typically in slow uncached + * memory) when there is an attempt to validate the buffer. + * + * Ideal for situations where one does not know before hand whether a given + * buffer will effectively be used by the hardware or not. + */ +struct pb_manager * +pb_ondemand_manager_create(struct pb_manager *provider); + + +/** * Debug buffer manager to detect buffer under- and overflows. * * Band size should be a multiple of the largest alignment diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c index c956924cc7..db67d46c56 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c @@ -34,7 +34,7 @@ #include "pipe/p_compiler.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "util/u_memory.h" #include "pb_buffer.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c index 8f118874ec..29117efe9b 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c @@ -29,14 +29,13 @@ * \file * Buffer cache. * - * \author José Fonseca <jrfonseca-at-tungstengraphics-dot-com> + * \author Jose Fonseca <jrfonseca-at-tungstengraphics-dot-com> * \author Thomas Hellström <thomas-at-tungstengraphics-dot-com> */ #include "pipe/p_compiler.h" -#include "pipe/p_debug.h" -#include "pipe/p_winsys.h" +#include "util/u_debug.h" #include "pipe/p_thread.h" #include "util/u_memory.h" #include "util/u_double_list.h" @@ -183,6 +182,25 @@ pb_cache_buffer_unmap(struct pb_buffer *_buf) } +static enum pipe_error +pb_cache_buffer_validate(struct pb_buffer *_buf, + struct pb_validate *vl, + unsigned flags) +{ + struct pb_cache_buffer *buf = pb_cache_buffer(_buf); + return pb_validate(buf->buffer, vl, flags); +} + + +static void +pb_cache_buffer_fence(struct pb_buffer *_buf, + struct pipe_fence_handle *fence) +{ + struct pb_cache_buffer *buf = pb_cache_buffer(_buf); + pb_fence(buf->buffer, fence); +} + + static void pb_cache_buffer_get_base_buffer(struct pb_buffer *_buf, struct pb_buffer **base_buf, @@ -198,6 +216,8 @@ pb_cache_buffer_vtbl = { pb_cache_buffer_destroy, pb_cache_buffer_map, pb_cache_buffer_unmap, + pb_cache_buffer_validate, + pb_cache_buffer_fence, pb_cache_buffer_get_base_buffer }; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c index 1675e6e182..070bf3f517 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c @@ -29,13 +29,12 @@ * \file * Debug buffer manager to detect buffer under- and overflows. * - * \author José Fonseca <jrfonseca@tungstengraphics.com> + * \author Jose Fonseca <jrfonseca@tungstengraphics.com> */ #include "pipe/p_compiler.h" -#include "pipe/p_debug.h" -#include "pipe/p_winsys.h" +#include "util/u_debug.h" #include "pipe/p_thread.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -255,11 +254,35 @@ pb_debug_buffer_get_base_buffer(struct pb_buffer *_buf, } +static enum pipe_error +pb_debug_buffer_validate(struct pb_buffer *_buf, + struct pb_validate *vl, + unsigned flags) +{ + struct pb_debug_buffer *buf = pb_debug_buffer(_buf); + + pb_debug_buffer_check(buf); + + return pb_validate(buf->buffer, vl, flags); +} + + +static void +pb_debug_buffer_fence(struct pb_buffer *_buf, + struct pipe_fence_handle *fence) +{ + struct pb_debug_buffer *buf = pb_debug_buffer(_buf); + pb_fence(buf->buffer, fence); +} + + const struct pb_vtbl pb_debug_buffer_vtbl = { pb_debug_buffer_destroy, pb_debug_buffer_map, pb_debug_buffer_unmap, + pb_debug_buffer_validate, + pb_debug_buffer_fence, pb_debug_buffer_get_base_buffer }; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c index 8d67efab6c..144db5669b 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c @@ -30,11 +30,11 @@ * \file * A buffer manager that wraps buffers in fenced buffers. * - * \author José Fonseca <jrfonseca@tungstengraphics.dot.com> + * \author Jose Fonseca <jrfonseca@tungstengraphics.dot.com> */ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "util/u_memory.h" #include "pb_buffer.h" @@ -90,8 +90,7 @@ fenced_bufmgr_create_buffer(struct pb_manager *mgr, fenced_buf = fenced_buffer_create(fenced_mgr->fenced_list, buf); if(!fenced_buf) { - assert(buf->base.refcount == 1); - pb_destroy(buf); + pb_reference(&buf, NULL); } return fenced_buf; @@ -127,7 +126,7 @@ fenced_bufmgr_destroy(struct pb_manager *mgr) struct pb_manager * fenced_bufmgr_create(struct pb_manager *provider, - struct pipe_winsys *winsys) + struct pb_fence_ops *ops) { struct fenced_pb_manager *fenced_mgr; @@ -143,7 +142,7 @@ fenced_bufmgr_create(struct pb_manager *provider, fenced_mgr->base.flush = fenced_bufmgr_flush; fenced_mgr->provider = provider; - fenced_mgr->fenced_list = fenced_buffer_list_create(winsys); + fenced_mgr->fenced_list = fenced_buffer_list_create(ops); if(!fenced_mgr->fenced_list) { FREE(fenced_mgr); return NULL; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c index 37984e7b7b..85ff3a09de 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c @@ -29,12 +29,12 @@ * \file * Buffer manager using the old texture memory manager. * - * \author José Fonseca <jrfonseca@tungstengraphics.com> + * \author Jose Fonseca <jrfonseca@tungstengraphics.com> */ #include "pipe/p_defines.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "pipe/p_thread.h" #include "util/u_memory.h" #include "util/u_double_list.h" @@ -100,7 +100,7 @@ mm_buffer_destroy(struct pb_buffer *buf) assert(buf->base.refcount == 0); pipe_mutex_lock(mm->mutex); - mmFreeMem(mm_buf->block); + u_mmFreeMem(mm_buf->block); FREE(buf); pipe_mutex_unlock(mm->mutex); } @@ -124,6 +124,27 @@ mm_buffer_unmap(struct pb_buffer *buf) } +static enum pipe_error +mm_buffer_validate(struct pb_buffer *buf, + struct pb_validate *vl, + unsigned flags) +{ + struct mm_buffer *mm_buf = mm_buffer(buf); + struct mm_pb_manager *mm = mm_buf->mgr; + return pb_validate(mm->buffer, vl, flags); +} + + +static void +mm_buffer_fence(struct pb_buffer *buf, + struct pipe_fence_handle *fence) +{ + struct mm_buffer *mm_buf = mm_buffer(buf); + struct mm_pb_manager *mm = mm_buf->mgr; + pb_fence(mm->buffer, fence); +} + + static void mm_buffer_get_base_buffer(struct pb_buffer *buf, struct pb_buffer **base_buf, @@ -141,6 +162,8 @@ mm_buffer_vtbl = { mm_buffer_destroy, mm_buffer_map, mm_buffer_unmap, + mm_buffer_validate, + mm_buffer_fence, mm_buffer_get_base_buffer }; @@ -154,8 +177,8 @@ mm_bufmgr_create_buffer(struct pb_manager *mgr, struct mm_buffer *mm_buf; /* We don't handle alignments larger then the one initially setup */ - assert(desc->alignment % (1 << mm->align2) == 0); - if(desc->alignment % (1 << mm->align2)) + assert(pb_check_alignment(desc->alignment, 1 << mm->align2)); + if(!pb_check_alignment(desc->alignment, 1 << mm->align2)) return NULL; pipe_mutex_lock(mm->mutex); @@ -175,7 +198,7 @@ mm_bufmgr_create_buffer(struct pb_manager *mgr, mm_buf->mgr = mm; - mm_buf->block = mmAllocMem(mm->heap, size, mm->align2, 0); + mm_buf->block = u_mmAllocMem(mm->heap, size, mm->align2, 0); if(!mm_buf->block) { debug_printf("warning: heap full\n"); #if 0 @@ -209,7 +232,7 @@ mm_bufmgr_destroy(struct pb_manager *mgr) pipe_mutex_lock(mm->mutex); - mmDestroy(mm->heap); + u_mmDestroy(mm->heap); pb_unmap(mm->buffer); pb_reference(&mm->buffer, NULL); @@ -250,7 +273,7 @@ mm_bufmgr_create_from_buffer(struct pb_buffer *buffer, if(!mm->map) goto failure; - mm->heap = mmInit(0, size); + mm->heap = u_mmInit(0, size); if (!mm->heap) goto failure; @@ -258,7 +281,7 @@ mm_bufmgr_create_from_buffer(struct pb_buffer *buffer, failure: if(mm->heap) - mmDestroy(mm->heap); + u_mmDestroy(mm->heap); if(mm->map) pb_unmap(mm->buffer); if(mm) diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c new file mode 100644 index 0000000000..3d9c7bba0b --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c @@ -0,0 +1,303 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * A variation of malloc buffers which get transferred to real graphics memory + * when there is an attempt to validate them. + * + * @author Jose Fonseca <jrfonseca@tungstengraphics.com> + */ + + +#include "util/u_debug.h" +#include "util/u_memory.h" +#include "pb_buffer.h" +#include "pb_bufmgr.h" + + +struct pb_ondemand_manager; + + +struct pb_ondemand_buffer +{ + struct pb_buffer base; + + struct pb_ondemand_manager *mgr; + + /** Regular malloc'ed memory */ + void *data; + unsigned mapcount; + + /** Real buffer */ + struct pb_buffer *buffer; + size_t size; + struct pb_desc desc; +}; + + +struct pb_ondemand_manager +{ + struct pb_manager base; + + struct pb_manager *provider; +}; + + +extern const struct pb_vtbl pb_ondemand_buffer_vtbl; + +static INLINE struct pb_ondemand_buffer * +pb_ondemand_buffer(struct pb_buffer *buf) +{ + assert(buf); + assert(buf->vtbl == &pb_ondemand_buffer_vtbl); + return (struct pb_ondemand_buffer *)buf; +} + +static INLINE struct pb_ondemand_manager * +pb_ondemand_manager(struct pb_manager *mgr) +{ + assert(mgr); + return (struct pb_ondemand_manager *)mgr; +} + + +static void +pb_ondemand_buffer_destroy(struct pb_buffer *_buf) +{ + struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); + + pb_reference(&buf->buffer, NULL); + + align_free(buf->data); + + FREE(buf); +} + + +static void * +pb_ondemand_buffer_map(struct pb_buffer *_buf, + unsigned flags) +{ + struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); + + if(buf->buffer) { + assert(!buf->data); + return pb_map(buf->buffer, flags); + } + else { + assert(buf->data); + ++buf->mapcount; + return buf->data; + } +} + + +static void +pb_ondemand_buffer_unmap(struct pb_buffer *_buf) +{ + struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); + + if(buf->buffer) { + assert(!buf->data); + pb_unmap(buf->buffer); + } + else { + assert(buf->data); + assert(buf->mapcount); + if(buf->mapcount) + --buf->mapcount; + } +} + + +static enum pipe_error +pb_ondemand_buffer_instantiate(struct pb_ondemand_buffer *buf) +{ + if(!buf->buffer) { + struct pb_manager *provider = buf->mgr->provider; + uint8_t *map; + + assert(!buf->mapcount); + + buf->buffer = provider->create_buffer(provider, buf->size, &buf->desc); + if(!buf->buffer) + return PIPE_ERROR_OUT_OF_MEMORY; + + map = pb_map(buf->buffer, PIPE_BUFFER_USAGE_CPU_READ); + if(!map) { + pb_reference(&buf->buffer, NULL); + return PIPE_ERROR; + } + + memcpy(map, buf->data, buf->size); + + pb_unmap(buf->buffer); + + if(!buf->mapcount) { + FREE(buf->data); + buf->data = NULL; + } + } + + return PIPE_OK; +} + +static enum pipe_error +pb_ondemand_buffer_validate(struct pb_buffer *_buf, + struct pb_validate *vl, + unsigned flags) +{ + struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); + enum pipe_error ret; + + assert(!buf->mapcount); + if(buf->mapcount) + return PIPE_ERROR; + + ret = pb_ondemand_buffer_instantiate(buf); + if(ret != PIPE_OK) + return ret; + + return pb_validate(buf->buffer, vl, flags); +} + + +static void +pb_ondemand_buffer_fence(struct pb_buffer *_buf, + struct pipe_fence_handle *fence) +{ + struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); + + assert(buf->buffer); + if(!buf->buffer) + return; + + pb_fence(buf->buffer, fence); +} + + +static void +pb_ondemand_buffer_get_base_buffer(struct pb_buffer *_buf, + struct pb_buffer **base_buf, + unsigned *offset) +{ + struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); + + if(pb_ondemand_buffer_instantiate(buf) != PIPE_OK) { + assert(0); + *base_buf = &buf->base; + *offset = 0; + return; + } + + pb_get_base_buffer(buf->buffer, base_buf, offset); +} + + +const struct pb_vtbl +pb_ondemand_buffer_vtbl = { + pb_ondemand_buffer_destroy, + pb_ondemand_buffer_map, + pb_ondemand_buffer_unmap, + pb_ondemand_buffer_validate, + pb_ondemand_buffer_fence, + pb_ondemand_buffer_get_base_buffer +}; + + +static struct pb_buffer * +pb_ondemand_manager_create_buffer(struct pb_manager *_mgr, + size_t size, + const struct pb_desc *desc) +{ + struct pb_ondemand_manager *mgr = pb_ondemand_manager(_mgr); + struct pb_ondemand_buffer *buf; + + buf = CALLOC_STRUCT(pb_ondemand_buffer); + if(!buf) + return NULL; + + buf->base.base.refcount = 1; + buf->base.base.alignment = desc->alignment; + buf->base.base.usage = desc->usage; + buf->base.base.size = size; + buf->base.vtbl = &pb_ondemand_buffer_vtbl; + + buf->mgr = mgr; + + buf->data = align_malloc(size, desc->alignment < sizeof(void*) ? sizeof(void*) : desc->alignment); + if(!buf->data) { + FREE(buf); + return NULL; + } + + buf->size = size; + buf->desc = *desc; + + return &buf->base; +} + + +static void +pb_ondemand_manager_flush(struct pb_manager *_mgr) +{ + struct pb_ondemand_manager *mgr = pb_ondemand_manager(_mgr); + + mgr->provider->flush(mgr->provider); +} + + +static void +pb_ondemand_manager_destroy(struct pb_manager *_mgr) +{ + struct pb_ondemand_manager *mgr = pb_ondemand_manager(_mgr); + + FREE(mgr); +} + + +struct pb_manager * +pb_ondemand_manager_create(struct pb_manager *provider) +{ + struct pb_ondemand_manager *mgr; + + if(!provider) + return NULL; + + mgr = CALLOC_STRUCT(pb_ondemand_manager); + if(!mgr) + return NULL; + + mgr->base.destroy = pb_ondemand_manager_destroy; + mgr->base.create_buffer = pb_ondemand_manager_create_buffer; + mgr->base.flush = pb_ondemand_manager_flush; + + mgr->provider = provider; + + return &mgr->base; +} diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c index 61ac291ed7..12447acfd9 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c @@ -30,13 +30,13 @@ * \file * Batch buffer pool management. * - * \author José Fonseca <jrfonseca-at-tungstengraphics-dot-com> + * \author Jose Fonseca <jrfonseca-at-tungstengraphics-dot-com> * \author Thomas Hellström <thomas-at-tungstengraphics-dot-com> */ #include "pipe/p_compiler.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "pipe/p_thread.h" #include "pipe/p_defines.h" #include "util/u_memory.h" @@ -138,6 +138,27 @@ pool_buffer_unmap(struct pb_buffer *buf) } +static enum pipe_error +pool_buffer_validate(struct pb_buffer *buf, + struct pb_validate *vl, + unsigned flags) +{ + struct pool_buffer *pool_buf = pool_buffer(buf); + struct pool_pb_manager *pool = pool_buf->mgr; + return pb_validate(pool->buffer, vl, flags); +} + + +static void +pool_buffer_fence(struct pb_buffer *buf, + struct pipe_fence_handle *fence) +{ + struct pool_buffer *pool_buf = pool_buffer(buf); + struct pool_pb_manager *pool = pool_buf->mgr; + pb_fence(pool->buffer, fence); +} + + static void pool_buffer_get_base_buffer(struct pb_buffer *buf, struct pb_buffer **base_buf, @@ -155,6 +176,8 @@ pool_buffer_vtbl = { pool_buffer_destroy, pool_buffer_map, pool_buffer_unmap, + pool_buffer_validate, + pool_buffer_fence, pool_buffer_get_base_buffer }; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c index 2a80154920..a3259351b9 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c @@ -38,7 +38,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_error.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "pipe/p_thread.h" #include "pipe/p_defines.h" #include "util/u_memory.h" @@ -248,6 +248,25 @@ pb_slab_buffer_unmap(struct pb_buffer *_buf) } +static enum pipe_error +pb_slab_buffer_validate(struct pb_buffer *_buf, + struct pb_validate *vl, + unsigned flags) +{ + struct pb_slab_buffer *buf = pb_slab_buffer(_buf); + return pb_validate(buf->slab->bo, vl, flags); +} + + +static void +pb_slab_buffer_fence(struct pb_buffer *_buf, + struct pipe_fence_handle *fence) +{ + struct pb_slab_buffer *buf = pb_slab_buffer(_buf); + pb_fence(buf->slab->bo, fence); +} + + static void pb_slab_buffer_get_base_buffer(struct pb_buffer *_buf, struct pb_buffer **base_buf, @@ -264,6 +283,8 @@ pb_slab_buffer_vtbl = { pb_slab_buffer_destroy, pb_slab_buffer_map, pb_slab_buffer_unmap, + pb_slab_buffer_validate, + pb_slab_buffer_fence, pb_slab_buffer_get_base_buffer }; diff --git a/src/gallium/auxiliary/pipebuffer/pb_validate.c b/src/gallium/auxiliary/pipebuffer/pb_validate.c index 1e54fc39d4..150fd50618 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_validate.c +++ b/src/gallium/auxiliary/pipebuffer/pb_validate.c @@ -36,7 +36,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_error.h" #include "util/u_memory.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "pb_buffer.h" #include "pb_buffer_fenced.h" @@ -46,9 +46,16 @@ #define PB_VALIDATE_INITIAL_SIZE 1 /* 512 */ +struct pb_validate_entry +{ + struct pb_buffer *buf; + unsigned flags; +}; + + struct pb_validate { - struct pb_buffer **buffers; + struct pb_validate_entry *entries; unsigned used; unsigned size; }; @@ -56,43 +63,50 @@ struct pb_validate enum pipe_error pb_validate_add_buffer(struct pb_validate *vl, - struct pb_buffer *buf) + struct pb_buffer *buf, + unsigned flags) { assert(buf); if(!buf) return PIPE_ERROR; + assert(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE); + assert(!(flags & ~PIPE_BUFFER_USAGE_GPU_READ_WRITE)); + flags &= PIPE_BUFFER_USAGE_GPU_READ_WRITE; + /* We only need to store one reference for each buffer, so avoid storing - * consecutive references for the same buffer. It might not be the more - * common pasttern, but it is easy to implement. + * consecutive references for the same buffer. It might not be the most + * common pattern, but it is easy to implement. */ - if(vl->used && vl->buffers[vl->used - 1] == buf) { + if(vl->used && vl->entries[vl->used - 1].buf == buf) { + vl->entries[vl->used - 1].flags |= flags; return PIPE_OK; } /* Grow the table */ if(vl->used == vl->size) { unsigned new_size; - struct pb_buffer **new_buffers; + struct pb_validate_entry *new_entries; new_size = vl->size * 2; if(!new_size) return PIPE_ERROR_OUT_OF_MEMORY; - new_buffers = (struct pb_buffer **)REALLOC(vl->buffers, - vl->size*sizeof(struct pb_buffer *), - new_size*sizeof(struct pb_buffer *)); - if(!new_buffers) + new_entries = (struct pb_validate_entry *)REALLOC(vl->entries, + vl->size*sizeof(struct pb_validate_entry), + new_size*sizeof(struct pb_validate_entry)); + if(!new_entries) return PIPE_ERROR_OUT_OF_MEMORY; - memset(new_buffers + vl->size, 0, (new_size - vl->size)*sizeof(struct pb_buffer *)); + memset(new_entries + vl->size, 0, (new_size - vl->size)*sizeof(struct pb_validate_entry)); vl->size = new_size; - vl->buffers = new_buffers; + vl->entries = new_entries; } - assert(!vl->buffers[vl->used]); - pb_reference(&vl->buffers[vl->used], buf); + assert(!vl->entries[vl->used].buf); + pb_reference(&vl->entries[vl->used].buf, buf); + vl->entries[vl->used].flags = flags; ++vl->used; return PIPE_OK; @@ -100,10 +114,36 @@ pb_validate_add_buffer(struct pb_validate *vl, enum pipe_error +pb_validate_foreach(struct pb_validate *vl, + enum pipe_error (*callback)(struct pb_buffer *buf, void *data), + void *data) +{ + unsigned i; + for(i = 0; i < vl->used; ++i) { + enum pipe_error ret; + ret = callback(vl->entries[i].buf, data); + if(ret != PIPE_OK) + return ret; + } + return PIPE_OK; +} + + +enum pipe_error pb_validate_validate(struct pb_validate *vl) { - /* FIXME: go through each buffer, ensure its not mapped, its address is - * available -- requires a new pb_buffer interface */ + unsigned i; + + for(i = 0; i < vl->used; ++i) { + enum pipe_error ret; + ret = pb_validate(vl->entries[i].buf, vl, vl->entries[i].flags); + if(ret != PIPE_OK) { + while(i--) + pb_validate(vl->entries[i].buf, NULL, 0); + return ret; + } + } + return PIPE_OK; } @@ -114,8 +154,8 @@ pb_validate_fence(struct pb_validate *vl, { unsigned i; for(i = 0; i < vl->used; ++i) { - buffer_fence(vl->buffers[i], fence); - pb_reference(&vl->buffers[i], NULL); + pb_fence(vl->entries[i].buf, fence); + pb_reference(&vl->entries[i].buf, NULL); } vl->used = 0; } @@ -126,8 +166,8 @@ pb_validate_destroy(struct pb_validate *vl) { unsigned i; for(i = 0; i < vl->used; ++i) - pb_reference(&vl->buffers[i], NULL); - FREE(vl->buffers); + pb_reference(&vl->entries[i].buf, NULL); + FREE(vl->entries); FREE(vl); } @@ -142,8 +182,8 @@ pb_validate_create() return NULL; vl->size = PB_VALIDATE_INITIAL_SIZE; - vl->buffers = (struct pb_buffer **)CALLOC(vl->size, sizeof(struct pb_buffer *)); - if(!vl->buffers) { + vl->entries = (struct pb_validate_entry *)CALLOC(vl->size, sizeof(struct pb_validate_entry)); + if(!vl->entries) { FREE(vl); return NULL; } diff --git a/src/gallium/auxiliary/pipebuffer/pb_validate.h b/src/gallium/auxiliary/pipebuffer/pb_validate.h index 3db1d5330b..dfb84df1ce 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_validate.h +++ b/src/gallium/auxiliary/pipebuffer/pb_validate.h @@ -58,7 +58,13 @@ struct pb_validate; enum pipe_error pb_validate_add_buffer(struct pb_validate *vl, - struct pb_buffer *buf); + struct pb_buffer *buf, + unsigned flags); + +enum pipe_error +pb_validate_foreach(struct pb_validate *vl, + enum pipe_error (*callback)(struct pb_buffer *buf, void *data), + void *data); /** * Validate all buffers for hardware access. @@ -71,7 +77,7 @@ pb_validate_validate(struct pb_validate *vl); /** * Fence all buffers and clear the list. * - * Should be called right before issuing commands to the hardware. + * Should be called right after issuing commands to the hardware. */ void pb_validate_fence(struct pb_validate *vl, diff --git a/src/gallium/auxiliary/pipebuffer/pb_winsys.c b/src/gallium/auxiliary/pipebuffer/pb_winsys.c deleted file mode 100644 index 28d137dbc4..0000000000 --- a/src/gallium/auxiliary/pipebuffer/pb_winsys.c +++ /dev/null @@ -1,170 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * \file - * Implementation of client buffer (also designated as "user buffers"), which - * are just state-tracker owned data masqueraded as buffers. - * - * \author José Fonseca <jrfonseca@tungstengraphics.com> - */ - - -#include "pipe/p_winsys.h" -#include "util/u_memory.h" - -#include "pb_buffer.h" - - -/** - * User buffers are special buffers that initially reference memory - * held by the user but which may if necessary copy that memory into - * device memory behind the scenes, for submission to hardware. - * - * These are particularly useful when the referenced data is never - * submitted to hardware at all, in the particular case of software - * vertex processing. - */ -struct pb_user_buffer -{ - struct pb_buffer base; - void *data; -}; - - -extern const struct pb_vtbl pb_user_buffer_vtbl; - - -static INLINE struct pb_user_buffer * -pb_user_buffer(struct pb_buffer *buf) -{ - assert(buf); - assert(buf->vtbl == &pb_user_buffer_vtbl); - return (struct pb_user_buffer *)buf; -} - - -static void -pb_user_buffer_destroy(struct pb_buffer *buf) -{ - assert(buf); - FREE(buf); -} - - -static void * -pb_user_buffer_map(struct pb_buffer *buf, - unsigned flags) -{ - return pb_user_buffer(buf)->data; -} - - -static void -pb_user_buffer_unmap(struct pb_buffer *buf) -{ - /* No-op */ -} - - -static void -pb_user_buffer_get_base_buffer(struct pb_buffer *buf, - struct pb_buffer **base_buf, - unsigned *offset) -{ - *base_buf = buf; - *offset = 0; -} - - -const struct pb_vtbl -pb_user_buffer_vtbl = { - pb_user_buffer_destroy, - pb_user_buffer_map, - pb_user_buffer_unmap, - pb_user_buffer_get_base_buffer -}; - - -static struct pipe_buffer * -pb_winsys_user_buffer_create(struct pipe_winsys *winsys, - void *data, - unsigned bytes) -{ - struct pb_user_buffer *buf = CALLOC_STRUCT(pb_user_buffer); - - if(!buf) - return NULL; - - buf->base.base.refcount = 1; - buf->base.base.size = bytes; - buf->base.base.alignment = 0; - buf->base.base.usage = 0; - - buf->base.vtbl = &pb_user_buffer_vtbl; - buf->data = data; - - return &buf->base.base; -} - - -static void * -pb_winsys_buffer_map(struct pipe_winsys *winsys, - struct pipe_buffer *buf, - unsigned flags) -{ - (void)winsys; - return pb_map(pb_buffer(buf), flags); -} - - -static void -pb_winsys_buffer_unmap(struct pipe_winsys *winsys, - struct pipe_buffer *buf) -{ - (void)winsys; - pb_unmap(pb_buffer(buf)); -} - - -static void -pb_winsys_buffer_destroy(struct pipe_winsys *winsys, - struct pipe_buffer *buf) -{ - (void)winsys; - pb_destroy(pb_buffer(buf)); -} - - -void -pb_init_winsys(struct pipe_winsys *winsys) -{ - winsys->user_buffer_create = pb_winsys_user_buffer_create; - winsys->buffer_map = pb_winsys_buffer_map; - winsys->buffer_unmap = pb_winsys_buffer_unmap; - winsys->buffer_destroy = pb_winsys_buffer_destroy; -} diff --git a/src/gallium/auxiliary/rtasm/Makefile b/src/gallium/auxiliary/rtasm/Makefile index 39b8a4dbd7..ab8ea464c6 100644 --- a/src/gallium/auxiliary/rtasm/Makefile +++ b/src/gallium/auxiliary/rtasm/Makefile @@ -7,9 +7,7 @@ C_SOURCES = \ rtasm_cpu.c \ rtasm_execmem.c \ rtasm_x86sse.c \ + rtasm_ppc.c \ rtasm_ppc_spe.c include ../../Makefile.template - -symlinks: - diff --git a/src/gallium/auxiliary/rtasm/SConscript b/src/gallium/auxiliary/rtasm/SConscript index 8ea25922aa..eb48368acc 100644 --- a/src/gallium/auxiliary/rtasm/SConscript +++ b/src/gallium/auxiliary/rtasm/SConscript @@ -6,6 +6,7 @@ rtasm = env.ConvenienceLibrary( 'rtasm_cpu.c', 'rtasm_execmem.c', 'rtasm_x86sse.c', + 'rtasm_ppc.c', 'rtasm_ppc_spe.c', ]) diff --git a/src/gallium/auxiliary/rtasm/rtasm_cpu.c b/src/gallium/auxiliary/rtasm/rtasm_cpu.c index 5499018b21..03bdd47238 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_cpu.c +++ b/src/gallium/auxiliary/rtasm/rtasm_cpu.c @@ -26,7 +26,7 @@ **************************************************************************/ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "rtasm_cpu.h" diff --git a/src/gallium/auxiliary/rtasm/rtasm_execmem.c b/src/gallium/auxiliary/rtasm/rtasm_execmem.c index f16191cb61..5acc5bcb7b 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_execmem.c +++ b/src/gallium/auxiliary/rtasm/rtasm_execmem.c @@ -31,19 +31,20 @@ #include "pipe/p_compiler.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "pipe/p_thread.h" #include "util/u_memory.h" #include "rtasm_execmem.h" -#if defined(__linux__) +#if defined(PIPE_OS_LINUX) + /* * Allocate a large block of memory which can hold code then dole it out * in pieces by means of the generic memory manager code. -*/ + */ #include <unistd.h> #include <sys/mman.h> @@ -62,7 +63,7 @@ static void init_heap(void) { if (!exec_heap) - exec_heap = mmInit( 0, EXEC_HEAP_SIZE ); + exec_heap = u_mmInit( 0, EXEC_HEAP_SIZE ); if (!exec_mem) exec_mem = (unsigned char *) mmap(0, EXEC_HEAP_SIZE, @@ -83,7 +84,7 @@ rtasm_exec_malloc(size_t size) if (exec_heap) { size = (size + 31) & ~31; /* next multiple of 32 bytes */ - block = mmAllocMem( exec_heap, size, 5, 0 ); /* 5 -> 32-byte alignment */ + block = u_mmAllocMem( exec_heap, size, 5, 0 ); /* 5 -> 32-byte alignment */ } if (block) @@ -103,17 +104,17 @@ rtasm_exec_free(void *addr) pipe_mutex_lock(exec_mutex); if (exec_heap) { - struct mem_block *block = mmFindBlock(exec_heap, (unsigned char *)addr - exec_mem); + struct mem_block *block = u_mmFindBlock(exec_heap, (unsigned char *)addr - exec_mem); if (block) - mmFreeMem(block); + u_mmFreeMem(block); } pipe_mutex_unlock(exec_mutex); } -#else +#else /* PIPE_OS_LINUX */ /* * Just use regular memory. @@ -133,4 +134,4 @@ rtasm_exec_free(void *addr) } -#endif +#endif /* PIPE_OS_LINUX */ diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c new file mode 100644 index 0000000000..e3586482db --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c @@ -0,0 +1,1077 @@ +/************************************************************************** + * + * Copyright (C) 2008 Tungsten Graphics, Inc. All Rights Reserved. + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * PPC code generation. + * For reference, see http://www.power.org/resources/reading/PowerISA_V2.05.pdf + * ABI info: http://www.cs.utsa.edu/~whaley/teach/cs6463FHPO/LEC/lec12_ho.pdf + * + * Other PPC refs: + * http://www-01.ibm.com/chips/techlib/techlib.nsf/techdocs/852569B20050FF778525699600719DF2 + * http://www.ibm.com/developerworks/eserver/library/es-archguide-v2.html + * http://www.freescale.com/files/product/doc/MPCFPE32B.pdf + * + * \author Brian Paul + */ + + +#include <stdio.h> +#include "util/u_memory.h" +#include "util/u_debug.h" +#include "rtasm_execmem.h" +#include "rtasm_ppc.h" + + +void +ppc_init_func(struct ppc_function *p) +{ + uint i; + + memset(p, 0, sizeof(*p)); + + p->num_inst = 0; + p->max_inst = 100; /* first guess at buffer size */ + p->store = rtasm_exec_malloc(p->max_inst * PPC_INST_SIZE); + p->reg_used = 0x0; + p->fp_used = 0x0; + p->vec_used = 0x0; + + p->print = FALSE; + p->indent = 0; + + /* only allow using gp registers 3..12 for now */ + for (i = 0; i < 3; i++) + ppc_reserve_register(p, i); + for (i = 12; i < PPC_NUM_REGS; i++) + ppc_reserve_register(p, i); +} + + +void +ppc_release_func(struct ppc_function *p) +{ + assert(p->num_inst <= p->max_inst); + if (p->store != NULL) { + rtasm_exec_free(p->store); + } + p->store = NULL; +} + + +uint +ppc_num_instructions(const struct ppc_function *p) +{ + return p->num_inst; +} + + +void (*ppc_get_func(struct ppc_function *p))(void) +{ +#if 0 + DUMP_END(); + if (DISASSEM && p->store) + debug_printf("disassemble %p %p\n", p->store, p->csr); + + if (p->store == p->error_overflow) + return (void (*)(void)) NULL; + else +#endif + return (void (*)(void)) p->store; +} + + +void +ppc_dump_func(const struct ppc_function *p) +{ + uint i; + for (i = 0; i < p->num_inst; i++) { + debug_printf("%3u: 0x%08x\n", i, p->store[i]); + } +} + + +void +ppc_print_code(struct ppc_function *p, boolean enable) +{ + p->print = enable; +} + + +void +ppc_indent(struct ppc_function *p, int spaces) +{ + p->indent += spaces; +} + + +static void +indent(const struct ppc_function *p) +{ + int i; + for (i = 0; i < p->indent; i++) { + putchar(' '); + } +} + + +void +ppc_comment(struct ppc_function *p, int rel_indent, const char *s) +{ + if (p->print) { + p->indent += rel_indent; + indent(p); + p->indent -= rel_indent; + printf("# %s\n", s); + } +} + + +/** + * Mark a register as being unavailable. + */ +int +ppc_reserve_register(struct ppc_function *p, int reg) +{ + assert(reg < PPC_NUM_REGS); + p->reg_used |= (1 << reg); + return reg; +} + + +/** + * Allocate a general purpose register. + * \return register index or -1 if none left. + */ +int +ppc_allocate_register(struct ppc_function *p) +{ + unsigned i; + for (i = 0; i < PPC_NUM_REGS; i++) { + const uint64_t mask = 1 << i; + if ((p->reg_used & mask) == 0) { + p->reg_used |= mask; + return i; + } + } + printf("OUT OF PPC registers!\n"); + return -1; +} + + +/** + * Mark the given general purpose register as "unallocated". + */ +void +ppc_release_register(struct ppc_function *p, int reg) +{ + assert(reg < PPC_NUM_REGS); + assert(p->reg_used & (1 << reg)); + p->reg_used &= ~(1 << reg); +} + + +/** + * Allocate a floating point register. + * \return register index or -1 if none left. + */ +int +ppc_allocate_fp_register(struct ppc_function *p) +{ + unsigned i; + for (i = 0; i < PPC_NUM_FP_REGS; i++) { + const uint64_t mask = 1 << i; + if ((p->fp_used & mask) == 0) { + p->fp_used |= mask; + return i; + } + } + printf("OUT OF PPC FP registers!\n"); + return -1; +} + + +/** + * Mark the given floating point register as "unallocated". + */ +void +ppc_release_fp_register(struct ppc_function *p, int reg) +{ + assert(reg < PPC_NUM_FP_REGS); + assert(p->fp_used & (1 << reg)); + p->fp_used &= ~(1 << reg); +} + + +/** + * Allocate a vector register. + * \return register index or -1 if none left. + */ +int +ppc_allocate_vec_register(struct ppc_function *p) +{ + unsigned i; + for (i = 0; i < PPC_NUM_VEC_REGS; i++) { + const uint64_t mask = 1 << i; + if ((p->vec_used & mask) == 0) { + p->vec_used |= mask; + return i; + } + } + printf("OUT OF PPC VEC registers!\n"); + return -1; +} + + +/** + * Mark the given vector register as "unallocated". + */ +void +ppc_release_vec_register(struct ppc_function *p, int reg) +{ + assert(reg < PPC_NUM_VEC_REGS); + assert(p->vec_used & (1 << reg)); + p->vec_used &= ~(1 << reg); +} + + +/** + * Append instruction to instruction buffer. Grow buffer if out of room. + */ +static void +emit_instruction(struct ppc_function *p, uint32_t inst_bits) +{ + if (!p->store) + return; /* out of memory, drop the instruction */ + + if (p->num_inst == p->max_inst) { + /* allocate larger buffer */ + uint32_t *newbuf; + p->max_inst *= 2; /* 2x larger */ + newbuf = rtasm_exec_malloc(p->max_inst * PPC_INST_SIZE); + if (newbuf) { + memcpy(newbuf, p->store, p->num_inst * PPC_INST_SIZE); + } + rtasm_exec_free(p->store); + p->store = newbuf; + if (!p->store) { + /* out of memory */ + p->num_inst = 0; + return; + } + } + + p->store[p->num_inst++] = inst_bits; +} + + +union vx_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned vD:5; + unsigned vA:5; + unsigned vB:5; + unsigned op2:11; + } inst; +}; + +static INLINE void +emit_vx(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, + const char *format, boolean transpose) +{ + union vx_inst inst; + inst.inst.op = 4; + inst.inst.vD = vD; + inst.inst.vA = vA; + inst.inst.vB = vB; + inst.inst.op2 = op2; + emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + if (transpose) + printf(format, vD, vB, vA); + else + printf(format, vD, vA, vB); + } +} + + +union vxr_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned vD:5; + unsigned vA:5; + unsigned vB:5; + unsigned rC:1; + unsigned op2:10; + } inst; +}; + +static INLINE void +emit_vxr(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, + const char *format) +{ + union vxr_inst inst; + inst.inst.op = 4; + inst.inst.vD = vD; + inst.inst.vA = vA; + inst.inst.vB = vB; + inst.inst.rC = 0; + inst.inst.op2 = op2; + emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + printf(format, vD, vA, vB); + } +} + + +union va_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned vD:5; + unsigned vA:5; + unsigned vB:5; + unsigned vC:5; + unsigned op2:6; + } inst; +}; + +static INLINE void +emit_va(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, uint vC, + const char *format) +{ + union va_inst inst; + inst.inst.op = 4; + inst.inst.vD = vD; + inst.inst.vA = vA; + inst.inst.vB = vB; + inst.inst.vC = vC; + inst.inst.op2 = op2; + emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + printf(format, vD, vA, vB, vC); + } +} + + +union i_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned li:24; + unsigned aa:1; + unsigned lk:1; + } inst; +}; + +static INLINE void +emit_i(struct ppc_function *p, uint op, uint li, uint aa, uint lk) +{ + union i_inst inst; + inst.inst.op = op; + inst.inst.li = li; + inst.inst.aa = aa; + inst.inst.lk = lk; + emit_instruction(p, inst.bits); +} + + +union xl_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned bo:5; + unsigned bi:5; + unsigned unused:3; + unsigned bh:2; + unsigned op2:10; + unsigned lk:1; + } inst; +}; + +static INLINE void +emit_xl(struct ppc_function *p, uint op, uint bo, uint bi, uint bh, + uint op2, uint lk) +{ + union xl_inst inst; + inst.inst.op = op; + inst.inst.bo = bo; + inst.inst.bi = bi; + inst.inst.unused = 0x0; + inst.inst.bh = bh; + inst.inst.op2 = op2; + inst.inst.lk = lk; + emit_instruction(p, inst.bits); +} + +static INLINE void +dump_xl(const char *name, uint inst) +{ + union xl_inst i; + + i.bits = inst; + debug_printf("%s = 0x%08x\n", name, inst); + debug_printf(" op: %d 0x%x\n", i.inst.op, i.inst.op); + debug_printf(" bo: %d 0x%x\n", i.inst.bo, i.inst.bo); + debug_printf(" bi: %d 0x%x\n", i.inst.bi, i.inst.bi); + debug_printf(" unused: %d 0x%x\n", i.inst.unused, i.inst.unused); + debug_printf(" bh: %d 0x%x\n", i.inst.bh, i.inst.bh); + debug_printf(" op2: %d 0x%x\n", i.inst.op2, i.inst.op2); + debug_printf(" lk: %d 0x%x\n", i.inst.lk, i.inst.lk); +} + + +union x_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned vrs:5; + unsigned ra:5; + unsigned rb:5; + unsigned op2:10; + unsigned unused:1; + } inst; +}; + +static INLINE void +emit_x(struct ppc_function *p, uint op, uint vrs, uint ra, uint rb, uint op2, + const char *format) +{ + union x_inst inst; + inst.inst.op = op; + inst.inst.vrs = vrs; + inst.inst.ra = ra; + inst.inst.rb = rb; + inst.inst.op2 = op2; + inst.inst.unused = 0x0; + emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + printf(format, vrs, ra, rb); + } +} + + +union d_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned rt:5; + unsigned ra:5; + unsigned si:16; + } inst; +}; + +static INLINE void +emit_d(struct ppc_function *p, uint op, uint rt, uint ra, int si, + const char *format, boolean transpose) +{ + union d_inst inst; + assert(si >= -32768); + assert(si <= 32767); + inst.inst.op = op; + inst.inst.rt = rt; + inst.inst.ra = ra; + inst.inst.si = (unsigned) (si & 0xffff); + emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + if (transpose) + printf(format, rt, si, ra); + else + printf(format, rt, ra, si); + } +} + + +union a_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned frt:5; + unsigned fra:5; + unsigned frb:5; + unsigned unused:5; + unsigned op2:5; + unsigned rc:1; + } inst; +}; + +static INLINE void +emit_a(struct ppc_function *p, uint op, uint frt, uint fra, uint frb, uint op2, + uint rc, const char *format) +{ + union a_inst inst; + inst.inst.op = op; + inst.inst.frt = frt; + inst.inst.fra = fra; + inst.inst.frb = frb; + inst.inst.unused = 0x0; + inst.inst.op2 = op2; + inst.inst.rc = rc; + emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + printf(format, frt, fra, frb); + } +} + + +union xo_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned rt:5; + unsigned ra:5; + unsigned rb:5; + unsigned oe:1; + unsigned op2:9; + unsigned rc:1; + } inst; +}; + +static INLINE void +emit_xo(struct ppc_function *p, uint op, uint rt, uint ra, uint rb, uint oe, + uint op2, uint rc, const char *format) +{ + union xo_inst inst; + inst.inst.op = op; + inst.inst.rt = rt; + inst.inst.ra = ra; + inst.inst.rb = rb; + inst.inst.oe = oe; + inst.inst.op2 = op2; + inst.inst.rc = rc; + emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + printf(format, rt, ra, rb); + } +} + + + + + +/** + ** float vector arithmetic + **/ + +/** vector float add */ +void +ppc_vaddfp(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 10, vD, vA, vB, "vaddfp\t%u, v%u, v%u\n", FALSE); +} + +/** vector float substract */ +void +ppc_vsubfp(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 74, vD, vA, vB, "vsubfp\tv%u, v%u, v%u\n", FALSE); +} + +/** vector float min */ +void +ppc_vminfp(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 1098, vD, vA, vB, "vminfp\tv%u, v%u, v%u\n", FALSE); +} + +/** vector float max */ +void +ppc_vmaxfp(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 1034, vD, vA, vB, "vmaxfp\tv%u, v%u, v%u\n", FALSE); +} + +/** vector float mult add: vD = vA * vB + vC */ +void +ppc_vmaddfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC) +{ + /* note arg order */ + emit_va(p, 46, vD, vA, vC, vB, "vmaddfp\tv%u, v%u, v%u, v%u\n"); +} + +/** vector float negative mult subtract: vD = vA - vB * vC */ +void +ppc_vnmsubfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC) +{ + /* note arg order */ + emit_va(p, 47, vD, vB, vA, vC, "vnmsubfp\tv%u, v%u, v%u, v%u\n"); +} + +/** vector float compare greater than */ +void +ppc_vcmpgtfpx(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vxr(p, 710, vD, vA, vB, "vcmpgtfpx\tv%u, v%u, v%u"); +} + +/** vector float compare greater than or equal to */ +void +ppc_vcmpgefpx(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vxr(p, 454, vD, vA, vB, "vcmpgefpx\tv%u, v%u, v%u"); +} + +/** vector float compare equal */ +void +ppc_vcmpeqfpx(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vxr(p, 198, vD, vA, vB, "vcmpeqfpx\tv%u, v%u, v%u"); +} + +/** vector float 2^x */ +void +ppc_vexptefp(struct ppc_function *p, uint vD, uint vB) +{ + emit_vx(p, 394, vD, 0, vB, "vexptefp\tv%u, 0%u, v%u\n", FALSE); +} + +/** vector float log2(x) */ +void +ppc_vlogefp(struct ppc_function *p, uint vD, uint vB) +{ + emit_vx(p, 458, vD, 0, vB, "vlogefp\tv%u, 0%u, v%u\n", FALSE); +} + +/** vector float reciprocol */ +void +ppc_vrefp(struct ppc_function *p, uint vD, uint vB) +{ + emit_vx(p, 266, vD, 0, vB, "vrefp\tv%u, 0%u, v%u\n", FALSE); +} + +/** vector float reciprocol sqrt estimate */ +void +ppc_vrsqrtefp(struct ppc_function *p, uint vD, uint vB) +{ + emit_vx(p, 330, vD, 0, vB, "vrsqrtefp\tv%u, 0%u, v%u\n", FALSE); +} + +/** vector float round to negative infinity */ +void +ppc_vrfim(struct ppc_function *p, uint vD, uint vB) +{ + emit_vx(p, 714, vD, 0, vB, "vrfim\tv%u, 0%u, v%u\n", FALSE); +} + +/** vector float round to positive infinity */ +void +ppc_vrfip(struct ppc_function *p, uint vD, uint vB) +{ + emit_vx(p, 650, vD, 0, vB, "vrfip\tv%u, 0%u, v%u\n", FALSE); +} + +/** vector float round to nearest int */ +void +ppc_vrfin(struct ppc_function *p, uint vD, uint vB) +{ + emit_vx(p, 522, vD, 0, vB, "vrfin\tv%u, 0%u, v%u\n", FALSE); +} + +/** vector float round to int toward zero */ +void +ppc_vrfiz(struct ppc_function *p, uint vD, uint vB) +{ + emit_vx(p, 586, vD, 0, vB, "vrfiz\tv%u, 0%u, v%u\n", FALSE); +} + +/** vector store: store vR at mem[rA+rB] */ +void +ppc_stvx(struct ppc_function *p, uint vR, uint rA, uint rB) +{ + emit_x(p, 31, vR, rA, rB, 231, "stvx\tv%u, r%u, r%u\n"); +} + +/** vector load: vR = mem[rA+rB] */ +void +ppc_lvx(struct ppc_function *p, uint vR, uint rA, uint rB) +{ + emit_x(p, 31, vR, rA, rB, 103, "lvx\tv%u, r%u, r%u\n"); +} + +/** load vector element word: vR = mem_word[ra+rb] */ +void +ppc_lvewx(struct ppc_function *p, uint vR, uint rA, uint rB) +{ + emit_x(p, 31, vR, rA, rB, 71, "lvewx\tv%u, r%u, r%u\n"); +} + + + + +/** + ** vector bitwise operations + **/ + +/** vector and */ +void +ppc_vand(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 1028, vD, vA, vB, "vand\tv%u, v%u, v%u\n", FALSE); +} + +/** vector and complement */ +void +ppc_vandc(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 1092, vD, vA, vB, "vandc\tv%u, v%u, v%u\n", FALSE); +} + +/** vector or */ +void +ppc_vor(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 1156, vD, vA, vB, "vor\tv%u, v%u, v%u\n", FALSE); +} + +/** vector nor */ +void +ppc_vnor(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 1284, vD, vA, vB, "vnor\tv%u, v%u, v%u\n", FALSE); +} + +/** vector xor */ +void +ppc_vxor(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 1220, vD, vA, vB, "vxor\tv%u, v%u, v%u\n", FALSE); +} + +/** Pseudo-instruction: vector move */ +void +ppc_vmove(struct ppc_function *p, uint vD, uint vA) +{ + boolean print = p->print; + p->print = FALSE; + ppc_vor(p, vD, vA, vA); + if (print) { + indent(p); + printf("vor\tv%u, v%u, v%u \t# v%u = v%u\n", vD, vA, vA, vD, vA); + } + p->print = print; +} + +/** Set vector register to {0,0,0,0} */ +void +ppc_vzero(struct ppc_function *p, uint vr) +{ + boolean print = p->print; + p->print = FALSE; + ppc_vxor(p, vr, vr, vr); + if (print) { + indent(p); + printf("vxor\tv%u, v%u, v%u \t# v%u = {0,0,0,0}\n", vr, vr, vr, vr); + } + p->print = print; +} + + + + +/** + ** Vector shuffle / select / splat / etc + **/ + +/** vector permute */ +void +ppc_vperm(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC) +{ + emit_va(p, 43, vD, vA, vB, vC, "vperm\tr%u, r%u, r%u, r%u"); +} + +/** vector select */ +void +ppc_vsel(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC) +{ + emit_va(p, 42, vD, vA, vB, vC, "vsel\tr%u, r%u, r%u, r%u"); +} + +/** vector splat byte */ +void +ppc_vspltb(struct ppc_function *p, uint vD, uint vB, uint imm) +{ + emit_vx(p, 42, vD, imm, vB, "vspltb\tv%u, v%u, %u\n", TRUE); +} + +/** vector splat half word */ +void +ppc_vsplthw(struct ppc_function *p, uint vD, uint vB, uint imm) +{ + emit_vx(p, 588, vD, imm, vB, "vsplthw\tv%u, v%u, %u\n", TRUE); +} + +/** vector splat word */ +void +ppc_vspltw(struct ppc_function *p, uint vD, uint vB, uint imm) +{ + emit_vx(p, 652, vD, imm, vB, "vspltw\tv%u, v%u, %u\n", TRUE); +} + +/** vector splat signed immediate word */ +void +ppc_vspltisw(struct ppc_function *p, uint vD, int imm) +{ + assert(imm >= -16); + assert(imm < 15); + emit_vx(p, 908, vD, imm, 0, "vspltisw\tv%u, %d, %u\n", FALSE); +} + +/** vector shift left word: vD[word] = vA[word] << (vB[word] & 0x1f) */ +void +ppc_vslw(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 388, vD, vA, vB, "vslw\tv%u, v%u, v%u\n", FALSE); +} + + + + +/** + ** integer arithmetic + **/ + +/** rt = ra + imm */ +void +ppc_addi(struct ppc_function *p, uint rt, uint ra, int imm) +{ + emit_d(p, 14, rt, ra, imm, "addi\tr%u, r%u, %d\n", FALSE); +} + +/** rt = ra + (imm << 16) */ +void +ppc_addis(struct ppc_function *p, uint rt, uint ra, int imm) +{ + emit_d(p, 15, rt, ra, imm, "addis\tr%u, r%u, %d\n", FALSE); +} + +/** rt = ra + rb */ +void +ppc_add(struct ppc_function *p, uint rt, uint ra, uint rb) +{ + emit_xo(p, 31, rt, ra, rb, 0, 266, 0, "add\tr%u, r%u, r%u\n"); +} + +/** rt = ra AND ra */ +void +ppc_and(struct ppc_function *p, uint rt, uint ra, uint rb) +{ + emit_x(p, 31, ra, rt, rb, 28, "and\tr%u, r%u, r%u\n"); /* note argument order */ +} + +/** rt = ra AND imm */ +void +ppc_andi(struct ppc_function *p, uint rt, uint ra, int imm) +{ + /* note argument order */ + emit_d(p, 28, ra, rt, imm, "andi\tr%u, r%u, %d\n", FALSE); +} + +/** rt = ra OR ra */ +void +ppc_or(struct ppc_function *p, uint rt, uint ra, uint rb) +{ + emit_x(p, 31, ra, rt, rb, 444, "or\tr%u, r%u, r%u\n"); /* note argument order */ +} + +/** rt = ra OR imm */ +void +ppc_ori(struct ppc_function *p, uint rt, uint ra, int imm) +{ + /* note argument order */ + emit_d(p, 24, ra, rt, imm, "ori\tr%u, r%u, %d\n", FALSE); +} + +/** rt = ra XOR ra */ +void +ppc_xor(struct ppc_function *p, uint rt, uint ra, uint rb) +{ + emit_x(p, 31, ra, rt, rb, 316, "xor\tr%u, r%u, r%u\n"); /* note argument order */ +} + +/** rt = ra XOR imm */ +void +ppc_xori(struct ppc_function *p, uint rt, uint ra, int imm) +{ + /* note argument order */ + emit_d(p, 26, ra, rt, imm, "xori\tr%u, r%u, %d\n", FALSE); +} + +/** pseudo instruction: move: rt = ra */ +void +ppc_mr(struct ppc_function *p, uint rt, uint ra) +{ + ppc_or(p, rt, ra, ra); +} + +/** pseudo instruction: load immediate: rt = imm */ +void +ppc_li(struct ppc_function *p, uint rt, int imm) +{ + boolean print = p->print; + p->print = FALSE; + ppc_addi(p, rt, 0, imm); + if (print) { + indent(p); + printf("addi\tr%u, r0, %d \t# r%u = %d\n", rt, imm, rt, imm); + } + p->print = print; +} + +/** rt = imm << 16 */ +void +ppc_lis(struct ppc_function *p, uint rt, int imm) +{ + ppc_addis(p, rt, 0, imm); +} + +/** rt = imm */ +void +ppc_load_int(struct ppc_function *p, uint rt, int imm) +{ + ppc_lis(p, rt, (imm >> 16)); /* rt = imm >> 16 */ + ppc_ori(p, rt, rt, (imm & 0xffff)); /* rt = rt | (imm & 0xffff) */ +} + + + + +/** + ** integer load/store + **/ + +/** store rs at memory[(ra)+d], + * then update ra = (ra)+d + */ +void +ppc_stwu(struct ppc_function *p, uint rs, uint ra, int d) +{ + emit_d(p, 37, rs, ra, d, "stwu\tr%u, %d(r%u)\n", TRUE); +} + +/** store rs at memory[(ra)+d] */ +void +ppc_stw(struct ppc_function *p, uint rs, uint ra, int d) +{ + emit_d(p, 36, rs, ra, d, "stw\tr%u, %d(r%u)\n", TRUE); +} + +/** Load rt = mem[(ra)+d]; then zero set high 32 bits to zero. */ +void +ppc_lwz(struct ppc_function *p, uint rt, uint ra, int d) +{ + emit_d(p, 32, rt, ra, d, "lwz\tr%u, %d(r%u)\n", TRUE); +} + + + +/** + ** Float (non-vector) arithmetic + **/ + +/** add: frt = fra + frb */ +void +ppc_fadd(struct ppc_function *p, uint frt, uint fra, uint frb) +{ + emit_a(p, 63, frt, fra, frb, 21, 0, "fadd\tf%u, f%u, f%u\n"); +} + +/** sub: frt = fra - frb */ +void +ppc_fsub(struct ppc_function *p, uint frt, uint fra, uint frb) +{ + emit_a(p, 63, frt, fra, frb, 20, 0, "fsub\tf%u, f%u, f%u\n"); +} + +/** convert to int: rt = (int) ra */ +void +ppc_fctiwz(struct ppc_function *p, uint rt, uint fra) +{ + emit_x(p, 63, rt, 0, fra, 15, "fctiwz\tr%u, r%u, r%u\n"); +} + +/** store frs at mem[(ra)+offset] */ +void +ppc_stfs(struct ppc_function *p, uint frs, uint ra, int offset) +{ + emit_d(p, 52, frs, ra, offset, "stfs\tr%u, %d(r%u)\n", TRUE); +} + +/** store frs at mem[(ra)+(rb)] */ +void +ppc_stfiwx(struct ppc_function *p, uint frs, uint ra, uint rb) +{ + emit_x(p, 31, frs, ra, rb, 983, "stfiwx\tr%u, r%u, r%u\n"); +} + +/** load frt = mem[(ra)+offset] */ +void +ppc_lfs(struct ppc_function *p, uint frt, uint ra, int offset) +{ + emit_d(p, 48, frt, ra, offset, "stfs\tr%u, %d(r%u)\n", TRUE); +} + + + + + +/** + ** branch instructions + **/ + +/** BLR: Branch to link register (p. 35) */ +void +ppc_blr(struct ppc_function *p) +{ + emit_i(p, 18, 0, 0, 1); + if (p->print) { + indent(p); + printf("blr\n"); + } +} + +/** Branch Conditional to Link Register (p. 36) */ +void +ppc_bclr(struct ppc_function *p, uint condOp, uint branchHint, uint condReg) +{ + emit_xl(p, 19, condOp, condReg, branchHint, 16, 0); + if (p->print) { + indent(p); + printf("bclr\t%u %u %u\n", condOp, branchHint, condReg); + } +} + +/** Pseudo instruction: return from subroutine */ +void +ppc_return(struct ppc_function *p) +{ + ppc_bclr(p, BRANCH_COND_ALWAYS, BRANCH_HINT_SUB_RETURN, 0); +} diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.h b/src/gallium/auxiliary/rtasm/rtasm_ppc.h new file mode 100644 index 0000000000..93e5f5187d --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.h @@ -0,0 +1,342 @@ +/************************************************************************** + * + * Copyright (C) 2008 Tungsten Graphics, Inc. All Rights Reserved. + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * PPC code generation. + * \author Brian Paul + */ + + +#ifndef RTASM_PPC_H +#define RTASM_PPC_H + + +#include "pipe/p_compiler.h" + + +#define PPC_INST_SIZE 4 /**< 4 bytes / instruction */ + +#define PPC_NUM_REGS 32 +#define PPC_NUM_FP_REGS 32 +#define PPC_NUM_VEC_REGS 32 + +/** Stack pointer register */ +#define PPC_REG_SP 1 + +/** Branch conditions */ +#define BRANCH_COND_ALWAYS 0x14 /* binary 1z1zz (z=ignored) */ + +/** Branch hints */ +#define BRANCH_HINT_SUB_RETURN 0x0 /* binary 00 */ + + +struct ppc_function +{ + uint32_t *store; /**< instruction buffer */ + uint num_inst; + uint max_inst; + uint32_t reg_used; /** used/free general-purpose registers bitmask */ + uint32_t fp_used; /** used/free floating point registers bitmask */ + uint32_t vec_used; /** used/free vector registers bitmask */ + int indent; + boolean print; +}; + + + +extern void ppc_init_func(struct ppc_function *p); +extern void ppc_release_func(struct ppc_function *p); +extern uint ppc_num_instructions(const struct ppc_function *p); +extern void (*ppc_get_func( struct ppc_function *p ))( void ); +extern void ppc_dump_func(const struct ppc_function *p); + +extern void ppc_print_code(struct ppc_function *p, boolean enable); +extern void ppc_indent(struct ppc_function *p, int spaces); +extern void ppc_comment(struct ppc_function *p, int rel_indent, const char *s); + +extern int ppc_reserve_register(struct ppc_function *p, int reg); +extern int ppc_allocate_register(struct ppc_function *p); +extern void ppc_release_register(struct ppc_function *p, int reg); +extern int ppc_allocate_fp_register(struct ppc_function *p); +extern void ppc_release_fp_register(struct ppc_function *p, int reg); +extern int ppc_allocate_vec_register(struct ppc_function *p); +extern void ppc_release_vec_register(struct ppc_function *p, int reg); + + + +/** + ** float vector arithmetic + **/ + +/** vector float add */ +extern void +ppc_vaddfp(struct ppc_function *p,uint vD, uint vA, uint vB); + +/** vector float substract */ +extern void +ppc_vsubfp(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector float min */ +extern void +ppc_vminfp(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector float max */ +extern void +ppc_vmaxfp(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector float mult add: vD = vA * vB + vC */ +extern void +ppc_vmaddfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC); + +/** vector float negative mult subtract: vD = vA - vB * vC */ +extern void +ppc_vnmsubfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC); + +/** vector float compare greater than */ +extern void +ppc_vcmpgtfpx(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector float compare greater than or equal to */ +extern void +ppc_vcmpgefpx(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector float compare equal */ +extern void +ppc_vcmpeqfpx(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector float 2^x */ +extern void +ppc_vexptefp(struct ppc_function *p, uint vD, uint vB); + +/** vector float log2(x) */ +extern void +ppc_vlogefp(struct ppc_function *p, uint vD, uint vB); + +/** vector float reciprocol */ +extern void +ppc_vrefp(struct ppc_function *p, uint vD, uint vB); + +/** vector float reciprocol sqrt estimate */ +extern void +ppc_vrsqrtefp(struct ppc_function *p, uint vD, uint vB); + +/** vector float round to negative infinity */ +extern void +ppc_vrfim(struct ppc_function *p, uint vD, uint vB); + +/** vector float round to positive infinity */ +extern void +ppc_vrfip(struct ppc_function *p, uint vD, uint vB); + +/** vector float round to nearest int */ +extern void +ppc_vrfin(struct ppc_function *p, uint vD, uint vB); + +/** vector float round to int toward zero */ +extern void +ppc_vrfiz(struct ppc_function *p, uint vD, uint vB); + + +/** vector store: store vR at mem[vA+vB] */ +extern void +ppc_stvx(struct ppc_function *p, uint vR, uint vA, uint vB); + +/** vector load: vR = mem[vA+vB] */ +extern void +ppc_lvx(struct ppc_function *p, uint vR, uint vA, uint vB); + +/** load vector element word: vR = mem_word[vA+vB] */ +extern void +ppc_lvewx(struct ppc_function *p, uint vR, uint vA, uint vB); + + + +/** + ** vector bitwise operations + **/ + + +/** vector and */ +extern void +ppc_vand(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector and complement */ +extern void +ppc_vandc(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector or */ +extern void +ppc_vor(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector nor */ +extern void +ppc_vnor(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector xor */ +extern void +ppc_vxor(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** Pseudo-instruction: vector move */ +extern void +ppc_vmove(struct ppc_function *p, uint vD, uint vA); + +/** Set vector register to {0,0,0,0} */ +extern void +ppc_vzero(struct ppc_function *p, uint vr); + + + +/** + ** Vector shuffle / select / splat / etc + **/ + +/** vector permute */ +extern void +ppc_vperm(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC); + +/** vector select */ +extern void +ppc_vsel(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC); + +/** vector splat byte */ +extern void +ppc_vspltb(struct ppc_function *p, uint vD, uint vB, uint imm); + +/** vector splat half word */ +extern void +ppc_vsplthw(struct ppc_function *p, uint vD, uint vB, uint imm); + +/** vector splat word */ +extern void +ppc_vspltw(struct ppc_function *p, uint vD, uint vB, uint imm); + +/** vector splat signed immediate word */ +extern void +ppc_vspltisw(struct ppc_function *p, uint vD, int imm); + +/** vector shift left word: vD[word] = vA[word] << (vB[word] & 0x1f) */ +extern void +ppc_vslw(struct ppc_function *p, uint vD, uint vA, uint vB); + + + +/** + ** scalar arithmetic + **/ + +extern void +ppc_add(struct ppc_function *p, uint rt, uint ra, uint rb); + +extern void +ppc_addi(struct ppc_function *p, uint rt, uint ra, int imm); + +extern void +ppc_addis(struct ppc_function *p, uint rt, uint ra, int imm); + +extern void +ppc_and(struct ppc_function *p, uint rt, uint ra, uint rb); + +extern void +ppc_andi(struct ppc_function *p, uint rt, uint ra, int imm); + +extern void +ppc_or(struct ppc_function *p, uint rt, uint ra, uint rb); + +extern void +ppc_ori(struct ppc_function *p, uint rt, uint ra, int imm); + +extern void +ppc_xor(struct ppc_function *p, uint rt, uint ra, uint rb); + +extern void +ppc_xori(struct ppc_function *p, uint rt, uint ra, int imm); + +extern void +ppc_mr(struct ppc_function *p, uint rt, uint ra); + +extern void +ppc_li(struct ppc_function *p, uint rt, int imm); + +extern void +ppc_lis(struct ppc_function *p, uint rt, int imm); + +extern void +ppc_load_int(struct ppc_function *p, uint rt, int imm); + + + +/** + ** scalar load/store + **/ + +extern void +ppc_stwu(struct ppc_function *p, uint rs, uint ra, int d); + +extern void +ppc_stw(struct ppc_function *p, uint rs, uint ra, int d); + +extern void +ppc_lwz(struct ppc_function *p, uint rs, uint ra, int d); + + + +/** + ** Float (non-vector) arithmetic + **/ + +extern void +ppc_fadd(struct ppc_function *p, uint frt, uint fra, uint frb); + +extern void +ppc_fsub(struct ppc_function *p, uint frt, uint fra, uint frb); + +extern void +ppc_fctiwz(struct ppc_function *p, uint rt, uint ra); + +extern void +ppc_stfs(struct ppc_function *p, uint frs, uint ra, int offset); + +extern void +ppc_stfiwx(struct ppc_function *p, uint frs, uint ra, uint rb); + +extern void +ppc_lfs(struct ppc_function *p, uint frt, uint ra, int offset); + + + +/** + ** branch instructions + **/ + +extern void +ppc_blr(struct ppc_function *p); + +void +ppc_bclr(struct ppc_function *p, uint condOp, uint branchHint, uint condReg); + +extern void +ppc_return(struct ppc_function *p); + + +#endif /* RTASM_PPC_H */ diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index a04cc6c4ff..53a0e722cf 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -27,12 +27,16 @@ * Real-time assembly generation interface for Cell B.E. SPEs. * * \author Ian Romanick <idr@us.ibm.com> + * \author Brian Paul */ + +#include <stdio.h> #include "pipe/p_compiler.h" #include "util/u_memory.h" #include "rtasm_ppc_spe.h" + #ifdef GALLIUM_CELL /** * SPE instruction types @@ -143,21 +147,91 @@ union spe_inst_RI18 { /*@}*/ -static void emit_RR(struct spe_function *p, unsigned op, unsigned rT, - unsigned rA, unsigned rB) +static void +indent(const struct spe_function *p) +{ + int i; + for (i = 0; i < p->indent; i++) { + putchar(' '); + } +} + + +static const char * +rem_prefix(const char *longname) +{ + return longname + 4; +} + + +static const char * +reg_name(int reg) +{ + switch (reg) { + case SPE_REG_SP: + return "$sp"; + case SPE_REG_RA: + return "$lr"; + default: + { + /* cycle through four buffers to handle multiple calls per printf */ + static char buf[4][10]; + static int b = 0; + b = (b + 1) % 4; + sprintf(buf[b], "$%d", reg); + return buf[b]; + } + } +} + + +static void +emit_instruction(struct spe_function *p, uint32_t inst_bits) +{ + if (!p->store) + return; /* out of memory, drop the instruction */ + + if (p->num_inst == p->max_inst) { + /* allocate larger buffer */ + uint32_t *newbuf; + p->max_inst *= 2; /* 2x larger */ + newbuf = align_malloc(p->max_inst * SPE_INST_SIZE, 16); + if (newbuf) { + memcpy(newbuf, p->store, p->num_inst * SPE_INST_SIZE); + } + align_free(p->store); + p->store = newbuf; + if (!p->store) { + /* out of memory */ + p->num_inst = 0; + return; + } + } + + p->store[p->num_inst++] = inst_bits; +} + + + +static void emit_RR(struct spe_function *p, unsigned op, int rT, + int rA, int rB, const char *name) { union spe_inst_RR inst; inst.inst.op = op; inst.inst.rB = rB; inst.inst.rA = rA; inst.inst.rT = rT; - p->store[p->num_inst++] = inst.bits; - assert(p->num_inst <= p->max_inst); + emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + printf("%s\t%s, %s, %s\n", + rem_prefix(name), reg_name(rT), reg_name(rA), reg_name(rB)); + } } -static void emit_RRR(struct spe_function *p, unsigned op, unsigned rT, - unsigned rA, unsigned rB, unsigned rC) +static void emit_RRR(struct spe_function *p, unsigned op, int rT, + int rA, int rB, int rC, const char *name) { union spe_inst_RRR inst; inst.inst.op = op; @@ -165,155 +239,212 @@ static void emit_RRR(struct spe_function *p, unsigned op, unsigned rT, inst.inst.rB = rB; inst.inst.rA = rA; inst.inst.rC = rC; - p->store[p->num_inst++] = inst.bits; - assert(p->num_inst <= p->max_inst); + emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + printf("%s\t%s, %s, %s, %s\n", rem_prefix(name), reg_name(rT), + reg_name(rA), reg_name(rB), reg_name(rC)); + } } -static void emit_RI7(struct spe_function *p, unsigned op, unsigned rT, - unsigned rA, int imm) +static void emit_RI7(struct spe_function *p, unsigned op, int rT, + int rA, int imm, const char *name) { union spe_inst_RI7 inst; inst.inst.op = op; inst.inst.i7 = imm; inst.inst.rA = rA; inst.inst.rT = rT; - p->store[p->num_inst++] = inst.bits; - assert(p->num_inst <= p->max_inst); + emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + printf("%s\t%s, %s, 0x%x\n", + rem_prefix(name), reg_name(rT), reg_name(rA), imm); + } } -static void emit_RI8(struct spe_function *p, unsigned op, unsigned rT, - unsigned rA, int imm) +static void emit_RI8(struct spe_function *p, unsigned op, int rT, + int rA, int imm, const char *name) { union spe_inst_RI8 inst; inst.inst.op = op; inst.inst.i8 = imm; inst.inst.rA = rA; inst.inst.rT = rT; - p->store[p->num_inst++] = inst.bits; - assert(p->num_inst <= p->max_inst); + emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + printf("%s\t%s, %s, 0x%x\n", + rem_prefix(name), reg_name(rT), reg_name(rA), imm); + } } -static void emit_RI10(struct spe_function *p, unsigned op, unsigned rT, - unsigned rA, int imm) +static void emit_RI10(struct spe_function *p, unsigned op, int rT, + int rA, int imm, const char *name) { union spe_inst_RI10 inst; inst.inst.op = op; inst.inst.i10 = imm; inst.inst.rA = rA; inst.inst.rT = rT; - p->store[p->num_inst++] = inst.bits; - assert(p->num_inst <= p->max_inst); + emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + printf("%s\t%s, %s, 0x%x\n", + rem_prefix(name), reg_name(rT), reg_name(rA), imm); + } } -static void emit_RI16(struct spe_function *p, unsigned op, unsigned rT, - int imm) +/** As above, but do range checking on signed immediate value */ +static void emit_RI10s(struct spe_function *p, unsigned op, int rT, + int rA, int imm, const char *name) +{ + assert(imm <= 511); + assert(imm >= -512); + emit_RI10(p, op, rT, rA, imm, name); +} + + +static void emit_RI16(struct spe_function *p, unsigned op, int rT, + int imm, const char *name) { union spe_inst_RI16 inst; inst.inst.op = op; inst.inst.i16 = imm; inst.inst.rT = rT; - p->store[p->num_inst++] = inst.bits; - assert(p->num_inst <= p->max_inst); + emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + printf("%s\t%s, 0x%x\n", rem_prefix(name), reg_name(rT), imm); + } } -static void emit_RI18(struct spe_function *p, unsigned op, unsigned rT, - int imm) +static void emit_RI18(struct spe_function *p, unsigned op, int rT, + int imm, const char *name) { union spe_inst_RI18 inst; inst.inst.op = op; inst.inst.i18 = imm; inst.inst.rT = rT; - p->store[p->num_inst++] = inst.bits; - assert(p->num_inst <= p->max_inst); + emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + printf("%s\t%s, 0x%x\n", rem_prefix(name), reg_name(rT), imm); + } } - +#define EMIT(_name, _op) \ +void _name (struct spe_function *p) \ +{ \ + emit_RR(p, _op, 0, 0, 0, __FUNCTION__); \ +} #define EMIT_(_name, _op) \ -void _name (struct spe_function *p, unsigned rT) \ +void _name (struct spe_function *p, int rT) \ { \ - emit_RR(p, _op, rT, 0, 0); \ + emit_RR(p, _op, rT, 0, 0, __FUNCTION__); \ } #define EMIT_R(_name, _op) \ -void _name (struct spe_function *p, unsigned rT, unsigned rA) \ +void _name (struct spe_function *p, int rT, int rA) \ { \ - emit_RR(p, _op, rT, rA, 0); \ + emit_RR(p, _op, rT, rA, 0, __FUNCTION__); \ } #define EMIT_RR(_name, _op) \ -void _name (struct spe_function *p, unsigned rT, unsigned rA, unsigned rB) \ +void _name (struct spe_function *p, int rT, int rA, int rB) \ { \ - emit_RR(p, _op, rT, rA, rB); \ + emit_RR(p, _op, rT, rA, rB, __FUNCTION__); \ } #define EMIT_RRR(_name, _op) \ -void _name (struct spe_function *p, unsigned rT, unsigned rA, unsigned rB, unsigned rC) \ +void _name (struct spe_function *p, int rT, int rA, int rB, int rC) \ { \ - emit_RRR(p, _op, rT, rA, rB, rC); \ + emit_RRR(p, _op, rT, rA, rB, rC, __FUNCTION__); \ } #define EMIT_RI7(_name, _op) \ -void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ +void _name (struct spe_function *p, int rT, int rA, int imm) \ { \ - emit_RI7(p, _op, rT, rA, imm); \ + emit_RI7(p, _op, rT, rA, imm, __FUNCTION__); \ } #define EMIT_RI8(_name, _op, bias) \ -void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ +void _name (struct spe_function *p, int rT, int rA, int imm) \ { \ - emit_RI8(p, _op, rT, rA, bias - imm); \ + emit_RI8(p, _op, rT, rA, bias - imm, __FUNCTION__); \ } #define EMIT_RI10(_name, _op) \ -void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ +void _name (struct spe_function *p, int rT, int rA, int imm) \ { \ - emit_RI10(p, _op, rT, rA, imm); \ + emit_RI10(p, _op, rT, rA, imm, __FUNCTION__); \ +} + +#define EMIT_RI10s(_name, _op) \ +void _name (struct spe_function *p, int rT, int rA, int imm) \ +{ \ + emit_RI10s(p, _op, rT, rA, imm, __FUNCTION__); \ } #define EMIT_RI16(_name, _op) \ -void _name (struct spe_function *p, unsigned rT, int imm) \ +void _name (struct spe_function *p, int rT, int imm) \ { \ - emit_RI16(p, _op, rT, imm); \ + emit_RI16(p, _op, rT, imm, __FUNCTION__); \ } #define EMIT_RI18(_name, _op) \ -void _name (struct spe_function *p, unsigned rT, int imm) \ +void _name (struct spe_function *p, int rT, int imm) \ { \ - emit_RI18(p, _op, rT, imm); \ + emit_RI18(p, _op, rT, imm, __FUNCTION__); \ } #define EMIT_I16(_name, _op) \ void _name (struct spe_function *p, int imm) \ { \ - emit_RI16(p, _op, 0, imm); \ + emit_RI16(p, _op, 0, imm, __FUNCTION__); \ } #include "rtasm_ppc_spe.h" + /** * Initialize an spe_function. - * \param code_size size of instruction buffer to allocate, in bytes. + * \param code_size initial size of instruction buffer to allocate, in bytes. + * If zero, use a default. */ void spe_init_func(struct spe_function *p, unsigned code_size) { - p->store = align_malloc(code_size, 16); + uint i; + + if (!code_size) + code_size = 64; + p->num_inst = 0; p->max_inst = code_size / SPE_INST_SIZE; + p->store = align_malloc(code_size, 16); + + p->set_count = 0; + memset(p->regs, 0, SPE_NUM_REGS * sizeof(p->regs[0])); /* Conservatively treat R0 - R2 and R80 - R127 as non-volatile. */ - p->regs[0] = ~7; - p->regs[1] = (1U << (80 - 64)) - 1; + p->regs[0] = p->regs[1] = p->regs[2] = 1; + for (i = 80; i <= 127; i++) { + p->regs[i] = 1; + } + + p->print = FALSE; + p->indent = 0; } @@ -327,20 +458,23 @@ void spe_release_func(struct spe_function *p) } +/** Return current code size in bytes. */ +unsigned spe_code_size(const struct spe_function *p) +{ + return p->num_inst * SPE_INST_SIZE; +} + + /** - * Alloate a SPE register. + * Allocate a SPE register. * \return register index or -1 if none left. */ int spe_allocate_available_register(struct spe_function *p) { unsigned i; for (i = 0; i < SPE_NUM_REGS; i++) { - const uint64_t mask = (1ULL << (i % 64)); - const unsigned idx = i / 64; - - assert(idx < 2); - if ((p->regs[idx] & mask) != 0) { - p->regs[idx] &= ~mask; + if (p->regs[i] == 0) { + p->regs[i] = 1; return i; } } @@ -354,31 +488,161 @@ int spe_allocate_available_register(struct spe_function *p) */ int spe_allocate_register(struct spe_function *p, int reg) { - const unsigned idx = reg / 64; - const unsigned bit = reg % 64; - assert(reg < SPE_NUM_REGS); - assert((p->regs[idx] & (1ULL << bit)) != 0); - - p->regs[idx] &= ~(1ULL << bit); + assert(p->regs[reg] == 0); + p->regs[reg] = 1; return reg; } /** - * Mark the given SPE register as "unallocated". + * Mark the given SPE register as "unallocated". Note that this should + * only be used on registers allocated in the current register set; an + * assertion will fail if an attempt is made to deallocate a register + * allocated in an earlier register set. */ void spe_release_register(struct spe_function *p, int reg) { - const unsigned idx = reg / 64; - const unsigned bit = reg % 64; + assert(reg >= 0); + assert(reg < SPE_NUM_REGS); + assert(p->regs[reg] == 1); - assert(idx < 2); + p->regs[reg] = 0; +} - assert(reg < SPE_NUM_REGS); - assert((p->regs[idx] & (1ULL << bit)) == 0); +/** + * Start a new set of registers. This can be called if + * it will be difficult later to determine exactly what + * registers were actually allocated during a code generation + * sequence, and you really just want to deallocate all of them. + */ +void spe_allocate_register_set(struct spe_function *p) +{ + uint i; + + /* Keep track of the set count. If it ever wraps around to 0, + * we're in trouble. + */ + p->set_count++; + assert(p->set_count > 0); + + /* Increment the allocation count of all registers currently + * allocated. Then any registers that are allocated in this set + * will be the only ones with a count of 1; they'll all be released + * when the register set is released. + */ + for (i = 0; i < SPE_NUM_REGS; i++) { + if (p->regs[i] > 0) + p->regs[i]++; + } +} + +void spe_release_register_set(struct spe_function *p) +{ + uint i; + + /* If the set count drops below zero, we're in trouble. */ + assert(p->set_count > 0); + p->set_count--; - p->regs[idx] |= (1ULL << bit); + /* Drop the allocation level of all registers. Any allocated + * during this register set will drop to 0 and then become + * available. + */ + for (i = 0; i < SPE_NUM_REGS; i++) { + if (p->regs[i] > 0) + p->regs[i]--; + } +} + + +unsigned +spe_get_registers_used(const struct spe_function *p, ubyte used[]) +{ + unsigned i, num = 0; + /* only count registers in the range available to callers */ + for (i = 2; i < 80; i++) { + if (p->regs[i]) { + used[num++] = i; + } + } + return num; +} + + +void +spe_print_code(struct spe_function *p, boolean enable) +{ + p->print = enable; +} + + +void +spe_indent(struct spe_function *p, int spaces) +{ + p->indent += spaces; +} + + +void +spe_comment(struct spe_function *p, int rel_indent, const char *s) +{ + if (p->print) { + p->indent += rel_indent; + indent(p); + p->indent -= rel_indent; + printf("# %s\n", s); + } +} + + +/** + * Load quad word. + * NOTE: offset is in bytes and the least significant 4 bits must be zero! + */ +void spe_lqd(struct spe_function *p, int rT, int rA, int offset) +{ + const boolean pSave = p->print; + + /* offset must be a multiple of 16 */ + assert(offset % 16 == 0); + /* offset must fit in 10-bit signed int field, after shifting */ + assert((offset >> 4) <= 511); + assert((offset >> 4) >= -512); + + p->print = FALSE; + emit_RI10(p, 0x034, rT, rA, offset >> 4, "spe_lqd"); + p->print = pSave; + + if (p->print) { + indent(p); + printf("lqd\t%s, %d(%s)\n", reg_name(rT), offset, reg_name(rA)); + } +} + + +/** + * Store quad word. + * NOTE: offset is in bytes and the least significant 4 bits must be zero! + */ +void spe_stqd(struct spe_function *p, int rT, int rA, int offset) +{ + const boolean pSave = p->print; + + /* offset must be a multiple of 16 */ + assert(offset % 16 == 0); + /* offset must fit in 10-bit signed int field, after shifting */ + assert((offset >> 4) <= 511); + assert((offset >> 4) >= -512); + + p->print = FALSE; + emit_RI10(p, 0x024, rT, rA, offset >> 4, "spe_stqd"); + p->print = pSave; + + if (p->print) { + indent(p); + printf("stqd\t%s, %d(%s)\n", reg_name(rT), offset, reg_name(rA)); + } } @@ -390,53 +654,53 @@ void spe_release_register(struct spe_function *p, int reg) */ /** Branch Indirect to address in rA */ -void spe_bi(struct spe_function *p, unsigned rA, int d, int e) +void spe_bi(struct spe_function *p, int rA, int d, int e) { - emit_RI7(p, 0x1a8, 0, rA, (d << 5) | (e << 4)); + emit_RI7(p, 0x1a8, 0, rA, (d << 5) | (e << 4), __FUNCTION__); } /** Interupt Return */ -void spe_iret(struct spe_function *p, unsigned rA, int d, int e) +void spe_iret(struct spe_function *p, int rA, int d, int e) { - emit_RI7(p, 0x1aa, 0, rA, (d << 5) | (e << 4)); + emit_RI7(p, 0x1aa, 0, rA, (d << 5) | (e << 4), __FUNCTION__); } /** Branch indirect and set link on external data */ -void spe_bisled(struct spe_function *p, unsigned rT, unsigned rA, int d, +void spe_bisled(struct spe_function *p, int rT, int rA, int d, int e) { - emit_RI7(p, 0x1ab, rT, rA, (d << 5) | (e << 4)); + emit_RI7(p, 0x1ab, rT, rA, (d << 5) | (e << 4), __FUNCTION__); } /** Branch indirect and set link. Save PC in rT, jump to rA. */ -void spe_bisl(struct spe_function *p, unsigned rT, unsigned rA, int d, +void spe_bisl(struct spe_function *p, int rT, int rA, int d, int e) { - emit_RI7(p, 0x1a9, rT, rA, (d << 5) | (e << 4)); + emit_RI7(p, 0x1a9, rT, rA, (d << 5) | (e << 4), __FUNCTION__); } /** Branch indirect if zero word. If rT.word[0]==0, jump to rA. */ -void spe_biz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) +void spe_biz(struct spe_function *p, int rT, int rA, int d, int e) { - emit_RI7(p, 0x128, rT, rA, (d << 5) | (e << 4)); + emit_RI7(p, 0x128, rT, rA, (d << 5) | (e << 4), __FUNCTION__); } /** Branch indirect if non-zero word. If rT.word[0]!=0, jump to rA. */ -void spe_binz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) +void spe_binz(struct spe_function *p, int rT, int rA, int d, int e) { - emit_RI7(p, 0x129, rT, rA, (d << 5) | (e << 4)); + emit_RI7(p, 0x129, rT, rA, (d << 5) | (e << 4), __FUNCTION__); } /** Branch indirect if zero halfword. If rT.halfword[1]==0, jump to rA. */ -void spe_bihz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) +void spe_bihz(struct spe_function *p, int rT, int rA, int d, int e) { - emit_RI7(p, 0x12a, rT, rA, (d << 5) | (e << 4)); + emit_RI7(p, 0x12a, rT, rA, (d << 5) | (e << 4), __FUNCTION__); } /** Branch indirect if non-zero halfword. If rT.halfword[1]!=0, jump to rA. */ -void spe_bihnz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) +void spe_bihnz(struct spe_function *p, int rT, int rA, int d, int e) { - emit_RI7(p, 0x12b, rT, rA, (d << 5) | (e << 4)); + emit_RI7(p, 0x12b, rT, rA, (d << 5) | (e << 4), __FUNCTION__); } @@ -454,7 +718,6 @@ hbrr; #if 0 stop; EMIT_RR (spe_stopd, 0x140); -EMIT_ (spe_lnop, 0x001); EMIT_ (spe_nop, 0x201); sync; EMIT_ (spe_dsync, 0x003); @@ -471,7 +734,7 @@ EMIT_R (spe_mtspr, 0x10c); void -spe_load_float(struct spe_function *p, unsigned rT, float x) +spe_load_float(struct spe_function *p, int rT, float x) { if (x == 0.0f) { spe_il(p, rT, 0x0); @@ -498,45 +761,307 @@ spe_load_float(struct spe_function *p, unsigned rT, float x) void -spe_load_int(struct spe_function *p, unsigned rT, int i) +spe_load_int(struct spe_function *p, int rT, int i) { if (-32768 <= i && i <= 32767) { spe_il(p, rT, i); } else { spe_ilhu(p, rT, i >> 16); - spe_iohl(p, rT, i & 0xffff); + if (i & 0xffff) + spe_iohl(p, rT, i & 0xffff); } } +void spe_load_uint(struct spe_function *p, int rT, uint ui) +{ + /* If the whole value is in the lower 18 bits, use ila, which + * doesn't sign-extend. Otherwise, if the two halfwords of + * the constant are identical, use ilh. Otherwise, if every byte of + * the desired value is 0x00 or 0xff, we can use Form Select Mask for + * Bytes Immediate (fsmbi) to load the value in a single instruction. + * Otherwise, in the general case, we have to use ilhu followed by iohl. + */ + if ((ui & 0x0003ffff) == ui) { + spe_ila(p, rT, ui); + } + else if ((ui >> 16) == (ui & 0xffff)) { + spe_ilh(p, rT, ui & 0xffff); + } + else if ( + ((ui & 0x000000ff) == 0 || (ui & 0x000000ff) == 0x000000ff) && + ((ui & 0x0000ff00) == 0 || (ui & 0x0000ff00) == 0x0000ff00) && + ((ui & 0x00ff0000) == 0 || (ui & 0x00ff0000) == 0x00ff0000) && + ((ui & 0xff000000) == 0 || (ui & 0xff000000) == 0xff000000) + ) { + uint mask = 0; + /* fsmbi duplicates each bit in the given mask eight times, + * using a 16-bit value to initialize a 16-byte quadword. + * Each 4-bit nybble of the mask corresponds to a full word + * of the result; look at the value and figure out the mask + * (replicated for each word in the quadword), and then + * form the "select mask" to get the value. + */ + if ((ui & 0x000000ff) == 0x000000ff) mask |= 0x1111; + if ((ui & 0x0000ff00) == 0x0000ff00) mask |= 0x2222; + if ((ui & 0x00ff0000) == 0x00ff0000) mask |= 0x4444; + if ((ui & 0xff000000) == 0xff000000) mask |= 0x8888; + spe_fsmbi(p, rT, mask); + } + else { + /* The general case: this usually uses two instructions, but + * may use only one if the low-order 16 bits of each word are 0. + */ + spe_ilhu(p, rT, ui >> 16); + if (ui & 0xffff) + spe_iohl(p, rT, ui & 0xffff); + } +} +/** + * This function is constructed identically to spe_xor_uint() below. + * Changes to one should be made in the other. + */ void -spe_splat(struct spe_function *p, unsigned rT, unsigned rA) +spe_and_uint(struct spe_function *p, int rT, int rA, uint ui) { - spe_ila(p, rT, 66051); - spe_shufb(p, rT, rA, rA, rT); + /* If we can, emit a single instruction, either And Byte Immediate + * (which uses the same constant across each byte), And Halfword Immediate + * (which sign-extends a 10-bit immediate to 16 bits and uses that + * across each halfword), or And Word Immediate (which sign-extends + * a 10-bit immediate to 32 bits). + * + * Otherwise, we'll need to use a temporary register. + */ + uint tmp; + + /* If the upper 23 bits are all 0s or all 1s, sign extension + * will work and we can use And Word Immediate + */ + tmp = ui & 0xfffffe00; + if (tmp == 0xfffffe00 || tmp == 0) { + spe_andi(p, rT, rA, ui & 0x000003ff); + return; + } + + /* If the ui field is symmetric along halfword boundaries and + * the upper 7 bits of each halfword are all 0s or 1s, we + * can use And Halfword Immediate + */ + tmp = ui & 0xfe00fe00; + if ((tmp == 0xfe00fe00 || tmp == 0) && ((ui >> 16) == (ui & 0x0000ffff))) { + spe_andhi(p, rT, rA, ui & 0x000003ff); + return; + } + + /* If the ui field is symmetric in each byte, then we can use + * the And Byte Immediate instruction. + */ + tmp = ui & 0x000000ff; + if ((ui >> 24) == tmp && ((ui >> 16) & 0xff) == tmp && ((ui >> 8) & 0xff) == tmp) { + spe_andbi(p, rT, rA, tmp); + return; + } + + /* Otherwise, we'll have to use a temporary register. */ + int tmp_reg = spe_allocate_available_register(p); + spe_load_uint(p, tmp_reg, ui); + spe_and(p, rT, rA, tmp_reg); + spe_release_register(p, tmp_reg); } +/** + * This function is constructed identically to spe_and_uint() above. + * Changes to one should be made in the other. + */ void -spe_complement(struct spe_function *p, unsigned rT) +spe_xor_uint(struct spe_function *p, int rT, int rA, uint ui) { - spe_nor(p, rT, rT, rT); + /* If we can, emit a single instruction, either Exclusive Or Byte + * Immediate (which uses the same constant across each byte), Exclusive + * Or Halfword Immediate (which sign-extends a 10-bit immediate to + * 16 bits and uses that across each halfword), or Exclusive Or Word + * Immediate (which sign-extends a 10-bit immediate to 32 bits). + * + * Otherwise, we'll need to use a temporary register. + */ + uint tmp; + + /* If the upper 23 bits are all 0s or all 1s, sign extension + * will work and we can use Exclusive Or Word Immediate + */ + tmp = ui & 0xfffffe00; + if (tmp == 0xfffffe00 || tmp == 0) { + spe_xori(p, rT, rA, ui & 0x000003ff); + return; + } + + /* If the ui field is symmetric along halfword boundaries and + * the upper 7 bits of each halfword are all 0s or 1s, we + * can use Exclusive Or Halfword Immediate + */ + tmp = ui & 0xfe00fe00; + if ((tmp == 0xfe00fe00 || tmp == 0) && ((ui >> 16) == (ui & 0x0000ffff))) { + spe_xorhi(p, rT, rA, ui & 0x000003ff); + return; + } + + /* If the ui field is symmetric in each byte, then we can use + * the Exclusive Or Byte Immediate instruction. + */ + tmp = ui & 0x000000ff; + if ((ui >> 24) == tmp && ((ui >> 16) & 0xff) == tmp && ((ui >> 8) & 0xff) == tmp) { + spe_xorbi(p, rT, rA, tmp); + return; + } + + /* Otherwise, we'll have to use a temporary register. */ + int tmp_reg = spe_allocate_available_register(p); + spe_load_uint(p, tmp_reg, ui); + spe_xor(p, rT, rA, tmp_reg); + spe_release_register(p, tmp_reg); +} + +void +spe_compare_equal_uint(struct spe_function *p, int rT, int rA, uint ui) +{ + /* If the comparison value is 9 bits or less, it fits inside a + * Compare Equal Word Immediate instruction. + */ + if ((ui & 0x000001ff) == ui) { + spe_ceqi(p, rT, rA, ui); + } + /* Otherwise, we're going to have to load a word first. */ + else { + int tmp_reg = spe_allocate_available_register(p); + spe_load_uint(p, tmp_reg, ui); + spe_ceq(p, rT, rA, tmp_reg); + spe_release_register(p, tmp_reg); + } +} + +void +spe_compare_greater_uint(struct spe_function *p, int rT, int rA, uint ui) +{ + /* If the comparison value is 10 bits or less, it fits inside a + * Compare Logical Greater Than Word Immediate instruction. + */ + if ((ui & 0x000003ff) == ui) { + spe_clgti(p, rT, rA, ui); + } + /* Otherwise, we're going to have to load a word first. */ + else { + int tmp_reg = spe_allocate_available_register(p); + spe_load_uint(p, tmp_reg, ui); + spe_clgt(p, rT, rA, tmp_reg); + spe_release_register(p, tmp_reg); + } +} + +void +spe_splat(struct spe_function *p, int rT, int rA) +{ + /* Use a temporary, just in case rT == rA */ + int tmp_reg = spe_allocate_available_register(p); + /* Duplicate bytes 0, 1, 2, and 3 across the whole register */ + spe_ila(p, tmp_reg, 0x00010203); + spe_shufb(p, rT, rA, rA, tmp_reg); + spe_release_register(p, tmp_reg); +} + + +void +spe_complement(struct spe_function *p, int rT, int rA) +{ + spe_nor(p, rT, rA, rA); } void -spe_move(struct spe_function *p, unsigned rT, unsigned rA) +spe_move(struct spe_function *p, int rT, int rA) { - spe_ori(p, rT, rA, 0); + /* Use different instructions depending on the instruction address + * to take advantage of the dual pipelines. + */ + if (p->num_inst & 1) + spe_shlqbyi(p, rT, rA, 0); /* odd pipe */ + else + spe_ori(p, rT, rA, 0); /* even pipe */ } void -spe_zero(struct spe_function *p, unsigned rT) +spe_zero(struct spe_function *p, int rT) { spe_xor(p, rT, rT, rT); } +void +spe_splat_word(struct spe_function *p, int rT, int rA, int word) +{ + assert(word >= 0); + assert(word <= 3); + + if (word == 0) { + int tmp1 = rT; + spe_ila(p, tmp1, 66051); + spe_shufb(p, rT, rA, rA, tmp1); + } + else { + /* XXX review this, we may not need the rotqbyi instruction */ + int tmp1 = rT; + int tmp2 = spe_allocate_available_register(p); + + spe_ila(p, tmp1, 66051); + spe_rotqbyi(p, tmp2, rA, 4 * word); + spe_shufb(p, rT, tmp2, tmp2, tmp1); + + spe_release_register(p, tmp2); + } +} + +/** + * For each 32-bit float element of rA and rB, choose the smaller of the + * two, compositing them into the rT register. + * + * The Float Compare Greater Than (fcgt) instruction will put 1s into + * compare_reg where rA > rB, and 0s where rA <= rB. + * + * Then the Select Bits (selb) instruction will take bits from rA where + * compare_reg is 0, and from rB where compare_reg is 1; i.e., from rA + * where rA <= rB and from rB where rB > rA, which is exactly the + * "min" operation. + * + * The compare_reg could in many cases be the same as rT, unless + * rT == rA || rt == rB. But since this is common in constructions + * like "x = min(x, a)", we always allocate a new register to be safe. + */ +void +spe_float_min(struct spe_function *p, int rT, int rA, int rB) +{ + int compare_reg = spe_allocate_available_register(p); + spe_fcgt(p, compare_reg, rA, rB); + spe_selb(p, rT, rA, rB, compare_reg); + spe_release_register(p, compare_reg); +} + +/** + * For each 32-bit float element of rA and rB, choose the greater of the + * two, compositing them into the rT register. + * + * The logic is similar to that of spe_float_min() above; the only + * difference is that the registers on spe_selb() have been reversed, + * so that the larger of the two is selected instead of the smaller. + */ +void +spe_float_max(struct spe_function *p, int rT, int rA, int rB) +{ + int compare_reg = spe_allocate_available_register(p); + spe_fcgt(p, compare_reg, rA, rB); + spe_selb(p, rT, rB, rA, compare_reg); + spe_release_register(p, compare_reg); +} + #endif /* GALLIUM_CELL */ diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h index d95e5aace3..65d9c77415 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h @@ -28,6 +28,7 @@ * For details, see /opt/cell/sdk/docs/arch/SPU_ISA_v1.2_27Jan2007_pub.pdf * * \author Ian Romanick <idr@us.ibm.com> + * \author Brian Paul */ #ifndef RTASM_PPC_SPE_H @@ -39,10 +40,10 @@ /** number of general-purpose SIMD registers */ #define SPE_NUM_REGS 128 -/** Return Address register */ +/** Return Address register (aka $lr / Link Register) */ #define SPE_REG_RA 0 -/** Stack Pointer register */ +/** Stack Pointer register (aka $sp) */ #define SPE_REG_SP 1 @@ -52,308 +53,371 @@ struct spe_function uint num_inst; uint max_inst; - /** - * Mask of used / unused registers - * - * Each set bit corresponds to an available register. Each cleared bit - * corresponds to an allocated register. + /** + * The "set count" reflects the number of nested register sets + * are allowed. In the unlikely case that we exceed the set count, + * register allocation will start to be confused, which is critical + * enough that we check for it. + */ + unsigned char set_count; + + /** + * Flags for used and unused registers. Each byte corresponds to a + * register; a 0 in that byte means that the register is available. + * A value of 1 means that the register was allocated in the current + * register set. Any other value N means that the register was allocated + * N register sets ago. * * \sa * spe_allocate_register, spe_allocate_available_register, - * spe_release_register + * spe_allocate_register_set, spe_release_register_set, spe_release_register, */ - uint64_t regs[SPE_NUM_REGS / 64]; + unsigned char regs[SPE_NUM_REGS]; + + boolean print; /**< print/dump instructions as they're emitted? */ + int indent; /**< number of spaces to indent */ }; -extern void spe_init_func(struct spe_function *p, unsigned code_size); + +extern void spe_init_func(struct spe_function *p, uint code_size); extern void spe_release_func(struct spe_function *p); +extern uint spe_code_size(const struct spe_function *p); extern int spe_allocate_available_register(struct spe_function *p); extern int spe_allocate_register(struct spe_function *p, int reg); extern void spe_release_register(struct spe_function *p, int reg); +extern void spe_allocate_register_set(struct spe_function *p); +extern void spe_release_register_set(struct spe_function *p); + +extern uint spe_get_registers_used(const struct spe_function *p, ubyte used[]); + +extern void spe_print_code(struct spe_function *p, boolean enable); +extern void spe_indent(struct spe_function *p, int spaces); +extern void spe_comment(struct spe_function *p, int rel_indent, const char *s); + #endif /* RTASM_PPC_SPE_H */ -#ifndef EMIT_ -#define EMIT_(name, _op) \ - extern void _name (struct spe_function *p, unsigned rT) +#ifndef EMIT +#define EMIT(_name, _op) \ + extern void _name (struct spe_function *p); +#define EMIT_(_name, _op) \ + extern void _name (struct spe_function *p, int rT); #define EMIT_R(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, unsigned rA) + extern void _name (struct spe_function *p, int rT, int rA); #define EMIT_RR(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ - unsigned rB) + extern void _name (struct spe_function *p, int rT, int rA, int rB); #define EMIT_RRR(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ - unsigned rB, unsigned rC) + extern void _name (struct spe_function *p, int rT, int rA, int rB, int rC); #define EMIT_RI7(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ - int imm) + extern void _name (struct spe_function *p, int rT, int rA, int imm); #define EMIT_RI8(_name, _op, bias) \ - extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ - int imm) + extern void _name (struct spe_function *p, int rT, int rA, int imm); #define EMIT_RI10(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ - int imm) + extern void _name (struct spe_function *p, int rT, int rA, int imm); +#define EMIT_RI10s(_name, _op) \ + extern void _name (struct spe_function *p, int rT, int rA, int imm); #define EMIT_RI16(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, int imm) + extern void _name (struct spe_function *p, int rT, int imm); #define EMIT_RI18(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, int imm) + extern void _name (struct spe_function *p, int rT, int imm); #define EMIT_I16(_name, _op) \ - extern void _name (struct spe_function *p, int imm) + extern void _name (struct spe_function *p, int imm); #define UNDEF_EMIT_MACROS -#endif /* EMIT_ */ +#endif /* EMIT */ /* Memory load / store instructions */ -EMIT_RI10(spe_lqd, 0x034); -EMIT_RR (spe_lqx, 0x1c4); -EMIT_RI16(spe_lqa, 0x061); -EMIT_RI16(spe_lqr, 0x067); -EMIT_RI10(spe_stqd, 0x024); -EMIT_RR (spe_stqx, 0x144); -EMIT_RI16(spe_stqa, 0x041); -EMIT_RI16(spe_stqr, 0x047); -EMIT_RI7 (spe_cbd, 0x1f4); -EMIT_RR (spe_cbx, 0x1d4); -EMIT_RI7 (spe_chd, 0x1f5); -EMIT_RI7 (spe_chx, 0x1d5); -EMIT_RI7 (spe_cwd, 0x1f6); -EMIT_RI7 (spe_cwx, 0x1d6); -EMIT_RI7 (spe_cdd, 0x1f7); -EMIT_RI7 (spe_cdx, 0x1d7); +EMIT_RR (spe_lqx, 0x1c4) +EMIT_RI16(spe_lqa, 0x061) +EMIT_RI16(spe_lqr, 0x067) +EMIT_RR (spe_stqx, 0x144) +EMIT_RI16(spe_stqa, 0x041) +EMIT_RI16(spe_stqr, 0x047) +EMIT_RI7 (spe_cbd, 0x1f4) +EMIT_RR (spe_cbx, 0x1d4) +EMIT_RI7 (spe_chd, 0x1f5) +EMIT_RI7 (spe_chx, 0x1d5) +EMIT_RI7 (spe_cwd, 0x1f6) +EMIT_RI7 (spe_cwx, 0x1d6) +EMIT_RI7 (spe_cdd, 0x1f7) +EMIT_RI7 (spe_cdx, 0x1d7) /* Constant formation instructions */ -EMIT_RI16(spe_ilh, 0x083); -EMIT_RI16(spe_ilhu, 0x082); -EMIT_RI16(spe_il, 0x081); -EMIT_RI18(spe_ila, 0x021); -EMIT_RI16(spe_iohl, 0x0c1); -EMIT_RI16(spe_fsmbi, 0x065); +EMIT_RI16(spe_ilh, 0x083) +EMIT_RI16(spe_ilhu, 0x082) +EMIT_RI16(spe_il, 0x081) +EMIT_RI18(spe_ila, 0x021) +EMIT_RI16(spe_iohl, 0x0c1) +EMIT_RI16(spe_fsmbi, 0x065) /* Integer and logical instructions */ -EMIT_RR (spe_ah, 0x0c8); -EMIT_RI10(spe_ahi, 0x01d); -EMIT_RR (spe_a, 0x0c0); -EMIT_RI10(spe_ai, 0x01c); -EMIT_RR (spe_sfh, 0x048); -EMIT_RI10(spe_sfhi, 0x00d); -EMIT_RR (spe_sf, 0x040); -EMIT_RI10(spe_sfi, 0x00c); -EMIT_RR (spe_addx, 0x340); -EMIT_RR (spe_cg, 0x0c2); -EMIT_RR (spe_cgx, 0x342); -EMIT_RR (spe_sfx, 0x341); -EMIT_RR (spe_bg, 0x042); -EMIT_RR (spe_bgx, 0x343); -EMIT_RR (spe_mpy, 0x3c4); -EMIT_RR (spe_mpyu, 0x3cc); -EMIT_RI10(spe_mpyi, 0x074); -EMIT_RI10(spe_mpyui, 0x075); -EMIT_RRR (spe_mpya, 0x00c); -EMIT_RR (spe_mpyh, 0x3c5); -EMIT_RR (spe_mpys, 0x3c7); -EMIT_RR (spe_mpyhh, 0x3c6); -EMIT_RR (spe_mpyhha, 0x346); -EMIT_RR (spe_mpyhhu, 0x3ce); -EMIT_RR (spe_mpyhhau, 0x34e); -EMIT_R (spe_clz, 0x2a5); -EMIT_R (spe_cntb, 0x2b4); -EMIT_R (spe_fsmb, 0x1b6); -EMIT_R (spe_fsmh, 0x1b5); -EMIT_R (spe_fsm, 0x1b4); -EMIT_R (spe_gbb, 0x1b2); -EMIT_R (spe_gbh, 0x1b1); -EMIT_R (spe_gb, 0x1b0); -EMIT_RR (spe_avgb, 0x0d3); -EMIT_RR (spe_absdb, 0x053); -EMIT_RR (spe_sumb, 0x253); -EMIT_R (spe_xsbh, 0x2b6); -EMIT_R (spe_xshw, 0x2ae); -EMIT_R (spe_xswd, 0x2a6); -EMIT_RR (spe_and, 0x0c1); -EMIT_RR (spe_andc, 0x2c1); -EMIT_RI10(spe_andbi, 0x016); -EMIT_RI10(spe_andhi, 0x015); -EMIT_RI10(spe_andi, 0x014); -EMIT_RR (spe_or, 0x041); -EMIT_RR (spe_orc, 0x2c9); -EMIT_RI10(spe_orbi, 0x006); -EMIT_RI10(spe_orhi, 0x005); -EMIT_RI10(spe_ori, 0x004); -EMIT_R (spe_orx, 0x1f0); -EMIT_RR (spe_xor, 0x241); -EMIT_RI10(spe_xorbi, 0x026); -EMIT_RI10(spe_xorhi, 0x025); -EMIT_RI10(spe_xori, 0x024); -EMIT_RR (spe_nand, 0x0c9); -EMIT_RR (spe_nor, 0x049); -EMIT_RR (spe_eqv, 0x249); -EMIT_RRR (spe_selb, 0x008); -EMIT_RRR (spe_shufb, 0x00b); +EMIT_RR (spe_ah, 0x0c8) +EMIT_RI10(spe_ahi, 0x01d) +EMIT_RR (spe_a, 0x0c0) +EMIT_RI10s(spe_ai, 0x01c) +EMIT_RR (spe_sfh, 0x048) +EMIT_RI10(spe_sfhi, 0x00d) +EMIT_RR (spe_sf, 0x040) +EMIT_RI10(spe_sfi, 0x00c) +EMIT_RR (spe_addx, 0x340) +EMIT_RR (spe_cg, 0x0c2) +EMIT_RR (spe_cgx, 0x342) +EMIT_RR (spe_sfx, 0x341) +EMIT_RR (spe_bg, 0x042) +EMIT_RR (spe_bgx, 0x343) +EMIT_RR (spe_mpy, 0x3c4) +EMIT_RR (spe_mpyu, 0x3cc) +EMIT_RI10(spe_mpyi, 0x074) +EMIT_RI10(spe_mpyui, 0x075) +EMIT_RRR (spe_mpya, 0x00c) +EMIT_RR (spe_mpyh, 0x3c5) +EMIT_RR (spe_mpys, 0x3c7) +EMIT_RR (spe_mpyhh, 0x3c6) +EMIT_RR (spe_mpyhha, 0x346) +EMIT_RR (spe_mpyhhu, 0x3ce) +EMIT_RR (spe_mpyhhau, 0x34e) +EMIT_R (spe_clz, 0x2a5) +EMIT_R (spe_cntb, 0x2b4) +EMIT_R (spe_fsmb, 0x1b6) +EMIT_R (spe_fsmh, 0x1b5) +EMIT_R (spe_fsm, 0x1b4) +EMIT_R (spe_gbb, 0x1b2) +EMIT_R (spe_gbh, 0x1b1) +EMIT_R (spe_gb, 0x1b0) +EMIT_RR (spe_avgb, 0x0d3) +EMIT_RR (spe_absdb, 0x053) +EMIT_RR (spe_sumb, 0x253) +EMIT_R (spe_xsbh, 0x2b6) +EMIT_R (spe_xshw, 0x2ae) +EMIT_R (spe_xswd, 0x2a6) +EMIT_RR (spe_and, 0x0c1) +EMIT_RR (spe_andc, 0x2c1) +EMIT_RI10s(spe_andbi, 0x016) +EMIT_RI10s(spe_andhi, 0x015) +EMIT_RI10s(spe_andi, 0x014) +EMIT_RR (spe_or, 0x041) +EMIT_RR (spe_orc, 0x2c9) +EMIT_RI10s(spe_orbi, 0x006) +EMIT_RI10s(spe_orhi, 0x005) +EMIT_RI10s(spe_ori, 0x004) +EMIT_R (spe_orx, 0x1f0) +EMIT_RR (spe_xor, 0x241) +EMIT_RI10s(spe_xorbi, 0x046) +EMIT_RI10s(spe_xorhi, 0x045) +EMIT_RI10s(spe_xori, 0x044) +EMIT_RR (spe_nand, 0x0c9) +EMIT_RR (spe_nor, 0x049) +EMIT_RR (spe_eqv, 0x249) +EMIT_RRR (spe_selb, 0x008) +EMIT_RRR (spe_shufb, 0x00b) /* Shift and rotate instructions */ -EMIT_RR (spe_shlh, 0x05f); -EMIT_RI7 (spe_shlhi, 0x07f); -EMIT_RR (spe_shl, 0x05b); -EMIT_RI7 (spe_shli, 0x07b); -EMIT_RR (spe_shlqbi, 0x1db); -EMIT_RI7 (spe_shlqbii, 0x1fb); -EMIT_RR (spe_shlqby, 0x1df); -EMIT_RI7 (spe_shlqbyi, 0x1ff); -EMIT_RR (spe_shlqbybi, 0x1cf); -EMIT_RR (spe_roth, 0x05c); -EMIT_RI7 (spe_rothi, 0x07c); -EMIT_RR (spe_rot, 0x058); -EMIT_RI7 (spe_roti, 0x078); -EMIT_RR (spe_rotqby, 0x1dc); -EMIT_RI7 (spe_rotqbyi, 0x1fc); -EMIT_RR (spe_rotqbybi, 0x1cc); -EMIT_RR (spe_rotqbi, 0x1d8); -EMIT_RI7 (spe_rotqbii, 0x1f8); -EMIT_RR (spe_rothm, 0x05d); -EMIT_RI7 (spe_rothmi, 0x07d); -EMIT_RR (spe_rotm, 0x059); -EMIT_RI7 (spe_rotmi, 0x079); -EMIT_RR (spe_rotqmby, 0x1dd); -EMIT_RI7 (spe_rotqmbyi, 0x1fd); -EMIT_RR (spe_rotqmbybi, 0x1cd); -EMIT_RR (spe_rotqmbi, 0x1c9); -EMIT_RI7 (spe_rotqmbii, 0x1f9); -EMIT_RR (spe_rotmah, 0x05e); -EMIT_RI7 (spe_rotmahi, 0x07e); -EMIT_RR (spe_rotma, 0x05a); -EMIT_RI7 (spe_rotmai, 0x07a); +EMIT_RR (spe_shlh, 0x05f) +EMIT_RI7 (spe_shlhi, 0x07f) +EMIT_RR (spe_shl, 0x05b) +EMIT_RI7 (spe_shli, 0x07b) +EMIT_RR (spe_shlqbi, 0x1db) +EMIT_RI7 (spe_shlqbii, 0x1fb) +EMIT_RR (spe_shlqby, 0x1df) +EMIT_RI7 (spe_shlqbyi, 0x1ff) +EMIT_RR (spe_shlqbybi, 0x1cf) +EMIT_RR (spe_roth, 0x05c) +EMIT_RI7 (spe_rothi, 0x07c) +EMIT_RR (spe_rot, 0x058) +EMIT_RI7 (spe_roti, 0x078) +EMIT_RR (spe_rotqby, 0x1dc) +EMIT_RI7 (spe_rotqbyi, 0x1fc) +EMIT_RR (spe_rotqbybi, 0x1cc) +EMIT_RR (spe_rotqbi, 0x1d8) +EMIT_RI7 (spe_rotqbii, 0x1f8) +EMIT_RR (spe_rothm, 0x05d) +EMIT_RI7 (spe_rothmi, 0x07d) +EMIT_RR (spe_rotm, 0x059) +EMIT_RI7 (spe_rotmi, 0x079) +EMIT_RR (spe_rotqmby, 0x1dd) +EMIT_RI7 (spe_rotqmbyi, 0x1fd) +EMIT_RR (spe_rotqmbybi, 0x1cd) +EMIT_RR (spe_rotqmbi, 0x1c9) +EMIT_RI7 (spe_rotqmbii, 0x1f9) +EMIT_RR (spe_rotmah, 0x05e) +EMIT_RI7 (spe_rotmahi, 0x07e) +EMIT_RR (spe_rotma, 0x05a) +EMIT_RI7 (spe_rotmai, 0x07a) /* Compare, branch, and halt instructions */ -EMIT_RR (spe_heq, 0x3d8); -EMIT_RI10(spe_heqi, 0x07f); -EMIT_RR (spe_hgt, 0x258); -EMIT_RI10(spe_hgti, 0x04f); -EMIT_RR (spe_hlgt, 0x2d8); -EMIT_RI10(spe_hlgti, 0x05f); -EMIT_RR (spe_ceqb, 0x3d0); -EMIT_RI10(spe_ceqbi, 0x07e); -EMIT_RR (spe_ceqh, 0x3c8); -EMIT_RI10(spe_ceqhi, 0x07d); -EMIT_RR (spe_ceq, 0x3c0); -EMIT_RI10(spe_ceqi, 0x07c); -EMIT_RR (spe_cgtb, 0x250); -EMIT_RI10(spe_cgtbi, 0x04e); -EMIT_RR (spe_cgth, 0x248); -EMIT_RI10(spe_cgthi, 0x04d); -EMIT_RR (spe_cgt, 0x240); -EMIT_RI10(spe_cgti, 0x04c); -EMIT_RR (spe_clgtb, 0x2d0); -EMIT_RI10(spe_clgtbi, 0x05e); -EMIT_RR (spe_clgth, 0x2c8); -EMIT_RI10(spe_clgthi, 0x05d); -EMIT_RR (spe_clgt, 0x2c0); -EMIT_RI10(spe_clgti, 0x05c); -EMIT_I16 (spe_br, 0x064); -EMIT_I16 (spe_bra, 0x060); -EMIT_RI16(spe_brsl, 0x066); -EMIT_RI16(spe_brasl, 0x062); -EMIT_RI16(spe_brnz, 0x042); -EMIT_RI16(spe_brz, 0x040); -EMIT_RI16(spe_brhnz, 0x046); -EMIT_RI16(spe_brhz, 0x044); - -extern void spe_bi(struct spe_function *p, unsigned rA, int d, int e); -extern void spe_iret(struct spe_function *p, unsigned rA, int d, int e); -extern void spe_bisled(struct spe_function *p, unsigned rT, unsigned rA, +EMIT_RR (spe_heq, 0x3d8) +EMIT_RI10(spe_heqi, 0x07f) +EMIT_RR (spe_hgt, 0x258) +EMIT_RI10(spe_hgti, 0x04f) +EMIT_RR (spe_hlgt, 0x2d8) +EMIT_RI10(spe_hlgti, 0x05f) +EMIT_RR (spe_ceqb, 0x3d0) +EMIT_RI10(spe_ceqbi, 0x07e) +EMIT_RR (spe_ceqh, 0x3c8) +EMIT_RI10(spe_ceqhi, 0x07d) +EMIT_RR (spe_ceq, 0x3c0) +EMIT_RI10(spe_ceqi, 0x07c) +EMIT_RR (spe_cgtb, 0x250) +EMIT_RI10(spe_cgtbi, 0x04e) +EMIT_RR (spe_cgth, 0x248) +EMIT_RI10(spe_cgthi, 0x04d) +EMIT_RR (spe_cgt, 0x240) +EMIT_RI10(spe_cgti, 0x04c) +EMIT_RR (spe_clgtb, 0x2d0) +EMIT_RI10(spe_clgtbi, 0x05e) +EMIT_RR (spe_clgth, 0x2c8) +EMIT_RI10(spe_clgthi, 0x05d) +EMIT_RR (spe_clgt, 0x2c0) +EMIT_RI10(spe_clgti, 0x05c) +EMIT_I16 (spe_br, 0x064) +EMIT_I16 (spe_bra, 0x060) +EMIT_RI16(spe_brsl, 0x066) +EMIT_RI16(spe_brasl, 0x062) +EMIT_RI16(spe_brnz, 0x042) +EMIT_RI16(spe_brz, 0x040) +EMIT_RI16(spe_brhnz, 0x046) +EMIT_RI16(spe_brhz, 0x044) + +/* Control instructions + */ +EMIT (spe_lnop, 0x001) + +extern void +spe_lqd(struct spe_function *p, int rT, int rA, int offset); + +extern void +spe_stqd(struct spe_function *p, int rT, int rA, int offset); + +extern void spe_bi(struct spe_function *p, int rA, int d, int e); +extern void spe_iret(struct spe_function *p, int rA, int d, int e); +extern void spe_bisled(struct spe_function *p, int rT, int rA, int d, int e); -extern void spe_bisl(struct spe_function *p, unsigned rT, unsigned rA, +extern void spe_bisl(struct spe_function *p, int rT, int rA, int d, int e); -extern void spe_biz(struct spe_function *p, unsigned rT, unsigned rA, +extern void spe_biz(struct spe_function *p, int rT, int rA, int d, int e); -extern void spe_binz(struct spe_function *p, unsigned rT, unsigned rA, +extern void spe_binz(struct spe_function *p, int rT, int rA, int d, int e); -extern void spe_bihz(struct spe_function *p, unsigned rT, unsigned rA, +extern void spe_bihz(struct spe_function *p, int rT, int rA, int d, int e); -extern void spe_bihnz(struct spe_function *p, unsigned rT, unsigned rA, +extern void spe_bihnz(struct spe_function *p, int rT, int rA, int d, int e); /** Load/splat immediate float into rT. */ extern void -spe_load_float(struct spe_function *p, unsigned rT, float x); +spe_load_float(struct spe_function *p, int rT, float x); /** Load/splat immediate int into rT. */ extern void -spe_load_int(struct spe_function *p, unsigned rT, int i); +spe_load_int(struct spe_function *p, int rT, int i); + +/** Load/splat immediate unsigned int into rT. */ +extern void +spe_load_uint(struct spe_function *p, int rT, uint ui); + +/** And immediate value into rT. */ +extern void +spe_and_uint(struct spe_function *p, int rT, int rA, uint ui); + +/** Xor immediate value into rT. */ +extern void +spe_xor_uint(struct spe_function *p, int rT, int rA, uint ui); + +/** Compare equal with immediate value. */ +extern void +spe_compare_equal_uint(struct spe_function *p, int rT, int rA, uint ui); + +/** Compare greater with immediate value. */ +extern void +spe_compare_greater_uint(struct spe_function *p, int rT, int rA, uint ui); /** Replicate word 0 of rA across rT. */ extern void -spe_splat(struct spe_function *p, unsigned rT, unsigned rA); +spe_splat(struct spe_function *p, int rT, int rA); -/** Complement/invert all bits in rT. */ +/** rT = complement_all_bits(rA). */ extern void -spe_complement(struct spe_function *p, unsigned rT); +spe_complement(struct spe_function *p, int rT, int rA); /** rT = rA. */ extern void -spe_move(struct spe_function *p, unsigned rT, unsigned rA); +spe_move(struct spe_function *p, int rT, int rA); /** rT = {0,0,0,0}. */ extern void -spe_zero(struct spe_function *p, unsigned rT); +spe_zero(struct spe_function *p, int rT); + +/** rT = splat(rA, word) */ +extern void +spe_splat_word(struct spe_function *p, int rT, int rA, int word); + +/** rT = float min(rA, rB) */ +extern void +spe_float_min(struct spe_function *p, int rT, int rA, int rB); + +/** rT = float max(rA, rB) */ +extern void +spe_float_max(struct spe_function *p, int rT, int rA, int rB); /* Floating-point instructions */ -EMIT_RR (spe_fa, 0x2c4); -EMIT_RR (spe_dfa, 0x2cc); -EMIT_RR (spe_fs, 0x2c5); -EMIT_RR (spe_dfs, 0x2cd); -EMIT_RR (spe_fm, 0x2c6); -EMIT_RR (spe_dfm, 0x2ce); -EMIT_RRR (spe_fma, 0x00e); -EMIT_RR (spe_dfma, 0x35c); -EMIT_RRR (spe_fnms, 0x00d); -EMIT_RR (spe_dfnms, 0x35e); -EMIT_RRR (spe_fms, 0x00f); -EMIT_RR (spe_dfms, 0x35d); -EMIT_RR (spe_dfnma, 0x35f); -EMIT_R (spe_frest, 0x1b8); -EMIT_R (spe_frsqest, 0x1b9); -EMIT_RR (spe_fi, 0x3d4); -EMIT_RI8 (spe_csflt, 0x1da, 155); -EMIT_RI8 (spe_cflts, 0x1d8, 173); -EMIT_RI8 (spe_cuflt, 0x1db, 155); -EMIT_RI8 (spe_cfltu, 0x1d9, 173); -EMIT_R (spe_frds, 0x3b9); -EMIT_R (spe_fesd, 0x3b8); -EMIT_RR (spe_dfceq, 0x3c3); -EMIT_RR (spe_dfcmeq, 0x3cb); -EMIT_RR (spe_dfcgt, 0x2c3); -EMIT_RR (spe_dfcmgt, 0x2cb); -EMIT_RI7 (spe_dftsv, 0x3bf); -EMIT_RR (spe_fceq, 0x3c2); -EMIT_RR (spe_fcmeq, 0x3ca); -EMIT_RR (spe_fcgt, 0x2c2); -EMIT_RR (spe_fcmgt, 0x2ca); -EMIT_R (spe_fscrwr, 0x3ba); -EMIT_ (spe_fscrrd, 0x398); +EMIT_RR (spe_fa, 0x2c4) +EMIT_RR (spe_dfa, 0x2cc) +EMIT_RR (spe_fs, 0x2c5) +EMIT_RR (spe_dfs, 0x2cd) +EMIT_RR (spe_fm, 0x2c6) +EMIT_RR (spe_dfm, 0x2ce) +EMIT_RRR (spe_fma, 0x00e) +EMIT_RR (spe_dfma, 0x35c) +EMIT_RRR (spe_fnms, 0x00d) +EMIT_RR (spe_dfnms, 0x35e) +EMIT_RRR (spe_fms, 0x00f) +EMIT_RR (spe_dfms, 0x35d) +EMIT_RR (spe_dfnma, 0x35f) +EMIT_R (spe_frest, 0x1b8) +EMIT_R (spe_frsqest, 0x1b9) +EMIT_RR (spe_fi, 0x3d4) +EMIT_RI8 (spe_csflt, 0x1da, 155) +EMIT_RI8 (spe_cflts, 0x1d8, 173) +EMIT_RI8 (spe_cuflt, 0x1db, 155) +EMIT_RI8 (spe_cfltu, 0x1d9, 173) +EMIT_R (spe_frds, 0x3b9) +EMIT_R (spe_fesd, 0x3b8) +EMIT_RR (spe_dfceq, 0x3c3) +EMIT_RR (spe_dfcmeq, 0x3cb) +EMIT_RR (spe_dfcgt, 0x2c3) +EMIT_RR (spe_dfcmgt, 0x2cb) +EMIT_RI7 (spe_dftsv, 0x3bf) +EMIT_RR (spe_fceq, 0x3c2) +EMIT_RR (spe_fcmeq, 0x3ca) +EMIT_RR (spe_fcgt, 0x2c2) +EMIT_RR (spe_fcmgt, 0x2ca) +EMIT_R (spe_fscrwr, 0x3ba) +EMIT_ (spe_fscrrd, 0x398) /* Channel instructions */ -EMIT_R (spe_rdch, 0x00d); -EMIT_R (spe_rdchcnt, 0x00f); -EMIT_R (spe_wrch, 0x10d); +EMIT_R (spe_rdch, 0x00d) +EMIT_R (spe_rdchcnt, 0x00f) +EMIT_R (spe_wrch, 0x10d) #ifdef UNDEF_EMIT_MACROS +#undef EMIT #undef EMIT_ #undef EMIT_R #undef EMIT_RR @@ -361,6 +425,7 @@ EMIT_R (spe_wrch, 0x10d); #undef EMIT_RI7 #undef EMIT_RI8 #undef EMIT_RI10 +#undef EMIT_RI10s #undef EMIT_RI16 #undef EMIT_RI18 #undef EMIT_I16 diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index ad9d8f8ced..57fcf6de2a 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -26,7 +26,7 @@ #if defined(PIPE_ARCH_X86) #include "pipe/p_compiler.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "util/u_pointer.h" #include "rtasm_execmem.h" @@ -240,7 +240,8 @@ static void emit_modrm( struct x86_function *p, /* Oh-oh we've stumbled into the SIB thing. */ if (regmem.file == file_REG32 && - regmem.idx == reg_SP) { + regmem.idx == reg_SP && + regmem.mod != mod_REG) { emit_1ub(p, 0x24); /* simplistic! */ } @@ -439,25 +440,70 @@ void x86_call( struct x86_function *p, struct x86_reg reg) } -/* michal: - * Temporary. As I need immediate operands, and dont want to mess with the codegen, - * I load the immediate into general purpose register and use it. - */ void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) { DUMP_RI( dst, imm ); + assert(dst.file == file_REG32); assert(dst.mod == mod_REG); emit_1ub(p, 0xb8 + dst.idx); emit_1i(p, imm); } -void x86_add_reg_imm8( struct x86_function *p, struct x86_reg dst, ubyte imm ) +/** + * Immediate group 1 instructions. + */ +static INLINE void +x86_group1_imm( struct x86_function *p, + unsigned op, struct x86_reg dst, int imm ) { - DUMP_RI( dst, imm ); + assert(dst.file == file_REG32); assert(dst.mod == mod_REG); - emit_1ub(p, 0x80); - emit_modrm_noreg(p, 0, dst); - emit_1ub(p, imm); + if(-0x80 <= imm && imm < 0x80) { + emit_1ub(p, 0x83); + emit_modrm_noreg(p, op, dst); + emit_1b(p, (char)imm); + } + else { + emit_1ub(p, 0x81); + emit_modrm_noreg(p, op, dst); + emit_1i(p, imm); + } +} + +void x86_add_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 0, dst, imm); +} + +void x86_or_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 1, dst, imm); +} + +void x86_and_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 4, dst, imm); +} + +void x86_sub_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 5, dst, imm); +} + +void x86_xor_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 6, dst, imm); +} + +void x86_cmp_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 7, dst, imm); } diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index af79f07dd3..1b5eaaca85 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -152,12 +152,13 @@ void x86_jmp( struct x86_function *p, int label ); /* void x86_call( struct x86_function *p, void (*label)() ); */ void x86_call( struct x86_function *p, struct x86_reg reg); -/* michal: - * Temporary. As I need immediate operands, and dont want to mess with the codegen, - * I load the immediate into general purpose register and use it. - */ void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ); -void x86_add_reg_imm8( struct x86_function *p, struct x86_reg dst, ubyte imm ); +void x86_add_imm( struct x86_function *p, struct x86_reg dst, int imm ); +void x86_or_imm( struct x86_function *p, struct x86_reg dst, int imm ); +void x86_and_imm( struct x86_function *p, struct x86_reg dst, int imm ); +void x86_sub_imm( struct x86_function *p, struct x86_reg dst, int imm ); +void x86_xor_imm( struct x86_function *p, struct x86_reg dst, int imm ); +void x86_cmp_imm( struct x86_function *p, struct x86_reg dst, int imm ); /* Macro for sse_shufps() and sse2_pshufd(): diff --git a/src/gallium/auxiliary/sct/Makefile b/src/gallium/auxiliary/sct/Makefile index 516d1756cf..a7d111b689 100644 --- a/src/gallium/auxiliary/sct/Makefile +++ b/src/gallium/auxiliary/sct/Makefile @@ -7,6 +7,3 @@ C_SOURCES = \ sct.c include ../../Makefile.template - -symlinks: - diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile index c7155a9316..b4900e8dba 100644 --- a/src/gallium/auxiliary/tgsi/Makefile +++ b/src/gallium/auxiliary/tgsi/Makefile @@ -11,6 +11,7 @@ C_SOURCES = \ tgsi_info.c \ tgsi_iterate.c \ tgsi_parse.c \ + tgsi_ppc.c \ tgsi_scan.c \ tgsi_sse2.c \ tgsi_text.c \ @@ -18,6 +19,3 @@ C_SOURCES = \ tgsi_util.c include ../../Makefile.template - -symlinks: - diff --git a/src/gallium/auxiliary/tgsi/SConscript b/src/gallium/auxiliary/tgsi/SConscript index 45bf3f6d57..8200cce42f 100644 --- a/src/gallium/auxiliary/tgsi/SConscript +++ b/src/gallium/auxiliary/tgsi/SConscript @@ -12,6 +12,7 @@ tgsi = env.ConvenienceLibrary( 'tgsi_parse.c', 'tgsi_sanity.c', 'tgsi_scan.c', + 'tgsi_ppc.c', 'tgsi_sse2.c', 'tgsi_text.c', 'tgsi_transform.c', diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c index ed8fc5ac25..a1891a140a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "pipe/p_shader_tokens.h" #include "tgsi_build.h" #include "tgsi_parse.h" @@ -114,7 +114,7 @@ tgsi_default_declaration( void ) struct tgsi_declaration declaration; declaration.Type = TGSI_TOKEN_TYPE_DECLARATION; - declaration.Size = 1; + declaration.NrTokens = 1; declaration.File = TGSI_FILE_NULL; declaration.UsageMask = TGSI_WRITEMASK_XYZW; declaration.Interpolate = TGSI_INTERPOLATE_CONSTANT; @@ -160,9 +160,9 @@ declaration_grow( struct tgsi_declaration *declaration, struct tgsi_header *header ) { - assert( declaration->Size < 0xFF ); + assert( declaration->NrTokens < 0xFF ); - declaration->Size++; + declaration->NrTokens++; header_bodysize_grow( header ); } @@ -308,7 +308,7 @@ tgsi_default_immediate( void ) struct tgsi_immediate immediate; immediate.Type = TGSI_TOKEN_TYPE_IMMEDIATE; - immediate.Size = 1; + immediate.NrTokens = 1; immediate.DataType = TGSI_IMM_FLOAT32; immediate.Padding = 0; immediate.Extended = 0; @@ -345,9 +345,9 @@ immediate_grow( struct tgsi_immediate *immediate, struct tgsi_header *header ) { - assert( immediate->Size < 0xFF ); + assert( immediate->NrTokens < 0xFF ); - immediate->Size++; + immediate->NrTokens++; header_bodysize_grow( header ); } @@ -384,7 +384,7 @@ tgsi_build_full_immediate( *immediate = tgsi_build_immediate( header ); - for( i = 0; i < full_imm->Immediate.Size - 1; i++ ) { + for( i = 0; i < full_imm->Immediate.NrTokens - 1; i++ ) { struct tgsi_immediate_float32 *if32; if( maxsize <= size ) @@ -411,7 +411,7 @@ tgsi_default_instruction( void ) struct tgsi_instruction instruction; instruction.Type = TGSI_TOKEN_TYPE_INSTRUCTION; - instruction.Size = 1; + instruction.NrTokens = 1; instruction.Opcode = TGSI_OPCODE_MOV; instruction.Saturate = TGSI_SAT_NONE; instruction.NumDstRegs = 1; @@ -453,9 +453,9 @@ instruction_grow( struct tgsi_instruction *instruction, struct tgsi_header *header ) { - assert (instruction->Size < 0xFF); + assert (instruction->NrTokens < 0xFF); - instruction->Size++; + instruction->NrTokens++; header_bodysize_grow( header ); } @@ -801,10 +801,14 @@ tgsi_default_instruction_ext_nv( void ) return instruction_ext_nv; } -union token_u32 + +/** test for inequality of 32-bit values pointed to by a and b */ +static INLINE boolean +compare32(const void *a, const void *b) { - unsigned u32; -}; + return *((uint32_t *) a) != *((uint32_t *) b); +} + unsigned tgsi_compare_instruction_ext_nv( @@ -813,7 +817,7 @@ tgsi_compare_instruction_ext_nv( { a.Padding = b.Padding = 0; a.Extended = b.Extended = 0; - return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; + return compare32(&a, &b); } struct tgsi_instruction_ext_nv @@ -872,7 +876,7 @@ tgsi_compare_instruction_ext_label( { a.Padding = b.Padding = 0; a.Extended = b.Extended = 0; - return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; + return compare32(&a, &b); } struct tgsi_instruction_ext_label @@ -913,7 +917,7 @@ tgsi_compare_instruction_ext_texture( { a.Padding = b.Padding = 0; a.Extended = b.Extended = 0; - return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; + return compare32(&a, &b); } struct tgsi_instruction_ext_texture @@ -1035,7 +1039,7 @@ tgsi_compare_src_register_ext_swz( { a.Padding = b.Padding = 0; a.Extended = b.Extended = 0; - return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; + return compare32(&a, &b); } struct tgsi_src_register_ext_swz @@ -1103,7 +1107,7 @@ tgsi_compare_src_register_ext_mod( { a.Padding = b.Padding = 0; a.Extended = b.Extended = 0; - return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; + return compare32(&a, &b); } struct tgsi_src_register_ext_mod @@ -1249,7 +1253,7 @@ tgsi_compare_dst_register_ext_concode( { a.Padding = b.Padding = 0; a.Extended = b.Extended = 0; - return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; + return compare32(&a, &b); } struct tgsi_dst_register_ext_concode @@ -1307,7 +1311,7 @@ tgsi_compare_dst_register_ext_modulate( { a.Padding = b.Padding = 0; a.Extended = b.Extended = 0; - return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; + return compare32(&a, &b); } struct tgsi_dst_register_ext_modulate diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 2ed8c2bf07..d57cb9139f 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "util/u_string.h" #include "tgsi_dump.h" #include "tgsi_info.h" @@ -285,7 +285,7 @@ iter_immediate( ENM( imm->Immediate.DataType, immediate_type_names ); TXT( " { " ); - for (i = 0; i < imm->Immediate.Size - 1; i++) { + for (i = 0; i < imm->Immediate.NrTokens - 1; i++) { switch (imm->Immediate.DataType) { case TGSI_IMM_FLOAT32: FLT( imm->u.ImmediateFloat32[i].Float ); @@ -294,7 +294,7 @@ iter_immediate( assert( 0 ); } - if (i < imm->Immediate.Size - 2) + if (i < imm->Immediate.NrTokens - 2) TXT( ", " ); } TXT( " }" ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c index be25cb45a0..3dc61c48ca 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "util/u_string.h" #include "tgsi_dump_c.h" #include "tgsi_build.h" @@ -283,7 +283,7 @@ dump_immediate_verbose( UIX( imm->Immediate.Padding ); } - for( i = 0; i < imm->Immediate.Size - 1; i++ ) { + for( i = 0; i < imm->Immediate.NrTokens - 1; i++ ) { EOL(); switch( imm->Immediate.DataType ) { case TGSI_IMM_FLOAT32: @@ -646,7 +646,6 @@ tgsi_dump_c( struct tgsi_full_declaration fd; uint ignored = flags & TGSI_DUMP_C_IGNORED; uint deflt = flags & TGSI_DUMP_C_DEFAULT; - uint instno = 0; tgsi_parse_init( &parse, tokens ); @@ -676,7 +675,7 @@ tgsi_dump_c( ENM( parse.FullToken.Token.Type, TGSI_TOKEN_TYPES ); if( ignored ) { TXT( "\nSize : " ); - UID( parse.FullToken.Token.Size ); + UID( parse.FullToken.Token.NrTokens ); if( deflt || parse.FullToken.Token.Extended ) { TXT( "\nExtended : " ); UID( parse.FullToken.Token.Extended ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index f98b66dc0b..94589cf79f 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -133,7 +133,7 @@ tgsi_exec_machine_bind_shader( struct tgsi_exec_machine *mach, const struct tgsi_token *tokens, uint numSamplers, - struct tgsi_sampler *samplers) + struct tgsi_sampler **samplers) { uint k; struct tgsi_parse_context parse; @@ -202,7 +202,7 @@ tgsi_exec_machine_bind_shader( case TGSI_TOKEN_TYPE_IMMEDIATE: { - uint size = parse.FullToken.FullImmediate.Immediate.Size - 1; + uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; assert( size % 4 == 0 ); assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES ); @@ -320,6 +320,7 @@ micro_add( dst->f[3] = src0->f[3] + src1->f[3]; } +#if 0 static void micro_iadd( union tgsi_exec_channel *dst, @@ -331,6 +332,7 @@ micro_iadd( dst->i[2] = src0->i[2] + src1->i[2]; dst->i[3] = src0->i[3] + src1->i[3]; } +#endif static void micro_and( @@ -408,6 +410,7 @@ micro_div( } } +#if 0 static void micro_udiv( union tgsi_exec_channel *dst, @@ -419,6 +422,7 @@ micro_udiv( dst->u[2] = src0->u[2] / src1->u[2]; dst->u[3] = src0->u[3] / src1->u[3]; } +#endif static void micro_eq( @@ -434,6 +438,7 @@ micro_eq( dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3]; } +#if 0 static void micro_ieq( union tgsi_exec_channel *dst, @@ -447,6 +452,7 @@ micro_ieq( dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2]; dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3]; } +#endif static void micro_exp2( @@ -466,6 +472,7 @@ micro_exp2( #endif } +#if 0 static void micro_f2ut( union tgsi_exec_channel *dst, @@ -476,6 +483,7 @@ micro_f2ut( dst->u[2] = (uint) src->f[2]; dst->u[3] = (uint) src->f[3]; } +#endif static void micro_flr( @@ -570,6 +578,7 @@ micro_lt( dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; } +#if 0 static void micro_ilt( union tgsi_exec_channel *dst, @@ -583,7 +592,9 @@ micro_ilt( dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2]; dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3]; } +#endif +#if 0 static void micro_ult( union tgsi_exec_channel *dst, @@ -597,6 +608,7 @@ micro_ult( dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2]; dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3]; } +#endif static void micro_max( @@ -610,6 +622,7 @@ micro_max( dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; } +#if 0 static void micro_imax( union tgsi_exec_channel *dst, @@ -621,7 +634,9 @@ micro_imax( dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; } +#endif +#if 0 static void micro_umax( union tgsi_exec_channel *dst, @@ -633,6 +648,7 @@ micro_umax( dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; } +#endif static void micro_min( @@ -646,6 +662,7 @@ micro_min( dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; } +#if 0 static void micro_imin( union tgsi_exec_channel *dst, @@ -657,7 +674,9 @@ micro_imin( dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; } +#endif +#if 0 static void micro_umin( union tgsi_exec_channel *dst, @@ -669,7 +688,9 @@ micro_umin( dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; } +#endif +#if 0 static void micro_umod( union tgsi_exec_channel *dst, @@ -681,6 +702,7 @@ micro_umod( dst->u[2] = src0->u[2] % src1->u[2]; dst->u[3] = src0->u[3] % src1->u[3]; } +#endif static void micro_mul( @@ -694,6 +716,7 @@ micro_mul( dst->f[3] = src0->f[3] * src1->f[3]; } +#if 0 static void micro_imul( union tgsi_exec_channel *dst, @@ -705,7 +728,9 @@ micro_imul( dst->i[2] = src0->i[2] * src1->i[2]; dst->i[3] = src0->i[3] * src1->i[3]; } +#endif +#if 0 static void micro_imul64( union tgsi_exec_channel *dst0, @@ -722,7 +747,9 @@ micro_imul64( dst0->i[2] = 0; dst0->i[3] = 0; } +#endif +#if 0 static void micro_umul64( union tgsi_exec_channel *dst0, @@ -739,7 +766,10 @@ micro_umul64( dst0->u[2] = 0; dst0->u[3] = 0; } +#endif + +#if 0 static void micro_movc( union tgsi_exec_channel *dst, @@ -752,6 +782,7 @@ micro_movc( dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; } +#endif static void micro_neg( @@ -764,6 +795,7 @@ micro_neg( dst->f[3] = -src->f[3]; } +#if 0 static void micro_ineg( union tgsi_exec_channel *dst, @@ -774,6 +806,7 @@ micro_ineg( dst->i[2] = -src->i[2]; dst->i[3] = -src->i[3]; } +#endif static void micro_not( @@ -874,6 +907,7 @@ micro_trunc( dst->f[3] = (float) (int) src0->f[3]; } +#if 0 static void micro_ushr( union tgsi_exec_channel *dst, @@ -885,6 +919,7 @@ micro_ushr( dst->u[2] = src0->u[2] >> src1->u[2]; dst->u[3] = src0->u[3] >> src1->u[3]; } +#endif static void micro_sin( @@ -919,6 +954,7 @@ micro_sub( dst->f[3] = src0->f[3] - src1->f[3]; } +#if 0 static void micro_u2f( union tgsi_exec_channel *dst, @@ -929,6 +965,7 @@ micro_u2f( dst->f[2] = (float) src->u[2]; dst->f[3] = (float) src->u[3]; } +#endif static void micro_xor( @@ -1045,11 +1082,28 @@ fetch_source( union tgsi_exec_channel index; uint swizzle; + /* We start with a direct index into a register file. + * + * file[1], + * where: + * file = SrcRegister.File + * [1] = SrcRegister.Index + */ index.i[0] = index.i[1] = index.i[2] = index.i[3] = reg->SrcRegister.Index; + /* There is an extra source register that indirectly subscripts + * a register file. The direct index now becomes an offset + * that is being added to the indirect register. + * + * file[ind[2].x+1], + * where: + * ind = SrcRegisterInd.File + * [2] = SrcRegisterInd.Index + * .x = SrcRegisterInd.SwizzleX + */ if (reg->SrcRegister.Indirect) { union tgsi_exec_channel index2; union tgsi_exec_channel indir_index; @@ -1086,19 +1140,31 @@ fetch_source( } } - if( reg->SrcRegister.Dimension ) { - switch( reg->SrcRegister.File ) { + /* There is an extra source register that is a second + * subscript to a register file. Effectively it means that + * the register file is actually a 2D array of registers. + * + * file[1][3] == file[1*sizeof(file[1])+3], + * where: + * [3] = SrcRegisterDim.Index + */ + if (reg->SrcRegister.Dimension) { + /* The size of the first-order array depends on the register file type. + * We need to multiply the index to the first array to get an effective, + * "flat" index that points to the beginning of the second-order array. + */ + switch (reg->SrcRegister.File) { case TGSI_FILE_INPUT: - index.i[0] *= 17; - index.i[1] *= 17; - index.i[2] *= 17; - index.i[3] *= 17; + index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; + index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; + index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; + index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; break; case TGSI_FILE_CONSTANT: - index.i[0] *= 4096; - index.i[1] *= 4096; - index.i[2] *= 4096; - index.i[3] *= 4096; + index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER; + index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER; + index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER; + index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER; break; default: assert( 0 ); @@ -1109,6 +1175,17 @@ fetch_source( index.i[2] += reg->SrcRegisterDim.Index; index.i[3] += reg->SrcRegisterDim.Index; + /* Again, the second subscript index can be addressed indirectly + * identically to the first one. + * Nothing stops us from indirectly addressing the indirect register, + * but there is no need for that, so we won't exercise it. + * + * file[1][ind[4].y+3], + * where: + * ind = SrcRegisterDimInd.File + * [4] = SrcRegisterDimInd.Index + * .y = SrcRegisterDimInd.SwizzleX + */ if (reg->SrcRegisterDim.Indirect) { union tgsi_exec_channel index2; union tgsi_exec_channel indir_index; @@ -1141,6 +1218,11 @@ fetch_source( index.i[i] = 0; } } + + /* If by any chance there was a need for a 3D array of register + * files, we would have to check whether SrcRegisterDim is followed + * by a dimension register and continue the saga. + */ } swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); @@ -1490,7 +1572,7 @@ exec_kilp(struct tgsi_exec_machine *mach, /* - * Fetch a texel using STR texture coordinates. + * Fetch a four texture samples using STR texture coordinates. */ static void fetch_texel( struct tgsi_sampler *sampler, @@ -1524,7 +1606,7 @@ exec_tex(struct tgsi_exec_machine *mach, boolean projected) { const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; - union tgsi_exec_channel r[8]; + union tgsi_exec_channel r[4]; uint chan_index; float lodBias; @@ -1532,6 +1614,7 @@ exec_tex(struct tgsi_exec_machine *mach, switch (inst->InstructionExtTexture.Texture) { case TGSI_TEXTURE_1D: + case TGSI_TEXTURE_SHADOW1D: FETCH(&r[0], 0, CHAN_X); @@ -1547,13 +1630,15 @@ exec_tex(struct tgsi_exec_machine *mach, else lodBias = 0.0; - fetch_texel(&mach->Samplers[unit], + fetch_texel(mach->Samplers[unit], &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */ &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ break; case TGSI_TEXTURE_2D: case TGSI_TEXTURE_RECT: + case TGSI_TEXTURE_SHADOW2D: + case TGSI_TEXTURE_SHADOWRECT: FETCH(&r[0], 0, CHAN_X); FETCH(&r[1], 0, CHAN_Y); @@ -1573,7 +1658,7 @@ exec_tex(struct tgsi_exec_machine *mach, else lodBias = 0.0; - fetch_texel(&mach->Samplers[unit], + fetch_texel(mach->Samplers[unit], &r[0], &r[1], &r[2], lodBias, /* inputs */ &r[0], &r[1], &r[2], &r[3]); /* outputs */ break; @@ -1599,7 +1684,7 @@ exec_tex(struct tgsi_exec_machine *mach, else lodBias = 0.0; - fetch_texel(&mach->Samplers[unit], + fetch_texel(mach->Samplers[unit], &r[0], &r[1], &r[2], lodBias, &r[0], &r[1], &r[2], &r[3]); break; @@ -1709,6 +1794,7 @@ exec_declaration( break; default: + eval = NULL; assert( 0 ); } @@ -1751,7 +1837,7 @@ exec_instruction( case TGSI_OPCODE_ARL: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_trunc( &r[0], &r[0] ); + micro_flr( &r[0], &r[0] ); STORE( &r[0], 0, chan_index ); } break; @@ -1806,6 +1892,7 @@ exec_instruction( case TGSI_OPCODE_RSQ: /* TGSI_OPCODE_RECIPSQRT */ FETCH( &r[0], 0, CHAN_X ); + micro_abs( &r[0], &r[0] ); micro_sqrt( &r[0], &r[0] ); micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index fc40a25e09..4ffd4efbff 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -68,17 +68,12 @@ struct tgsi_interp_coef float dady[NUM_CHANNELS]; }; - -struct softpipe_tile_cache; /**< Opaque to TGSI */ - /** * Information for sampling textures, which must be implemented * by code outside the TGSI executor. */ struct tgsi_sampler { - const struct pipe_sampler_state *state; - struct pipe_texture *texture; /** Get samples for four fragments in a quad */ void (*get_samples)(struct tgsi_sampler *sampler, const float s[QUAD_SIZE], @@ -86,8 +81,6 @@ struct tgsi_sampler const float p[QUAD_SIZE], float lodbias, float rgba[NUM_CHANNELS][QUAD_SIZE]); - void *pipe; /*XXX temporary*/ - struct softpipe_tile_cache *cache; }; /** @@ -178,6 +171,16 @@ struct tgsi_exec_labels #define TGSI_EXEC_MAX_LOOP_NESTING 20 #define TGSI_EXEC_MAX_CALL_NESTING 20 +/* The maximum number of input attributes per vertex. For 2D + * input register files, this is the stride between two 1D + * arrays. + */ +#define TGSI_EXEC_MAX_INPUT_ATTRIBS 17 + +/* The maximum number of constant vectors per constant buffer. + */ +#define TGSI_EXEC_MAX_CONST_BUFFER 4096 + /** * Run-time virtual machine state for executing TGSI shader. */ @@ -195,7 +198,7 @@ struct tgsi_exec_machine struct tgsi_exec_vector *Temps; struct tgsi_exec_vector *Addrs; - struct tgsi_sampler *Samplers; + struct tgsi_sampler **Samplers; float Imms[TGSI_EXEC_NUM_IMMEDIATES][4]; unsigned ImmLimit; @@ -258,7 +261,7 @@ tgsi_exec_machine_bind_shader( struct tgsi_exec_machine *mach, const struct tgsi_token *tokens, uint numSamplers, - struct tgsi_sampler *samplers); + struct tgsi_sampler **samplers); uint tgsi_exec_machine_run( diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index 68c7a6b7f5..2b8a6f0fb1 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "tgsi_info.h" static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = diff --git a/src/gallium/auxiliary/tgsi/tgsi_iterate.c b/src/gallium/auxiliary/tgsi/tgsi_iterate.c index 5371a88b96..d88c2558d8 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_iterate.c +++ b/src/gallium/auxiliary/tgsi/tgsi_iterate.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "tgsi_iterate.h" boolean diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c index 3757486ba9..22006edf3d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.c +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "pipe/p_shader_tokens.h" #include "tgsi_parse.h" #include "tgsi_build.h" @@ -88,16 +88,33 @@ tgsi_parse_end_of_tokens( 1 + ctx->FullHeader.Header.HeaderSize + ctx->FullHeader.Header.BodySize; } + +/** + * This function is used to avoid and work-around type punning/aliasing + * warnings. The warnings seem harmless on x86 but on PPC they cause + * real failures. + */ +static INLINE void +copy_token(void *dst, const void *src) +{ + memcpy(dst, src, 4); +} + + +/** + * Get next 4-byte token, return it at address specified by 'token' + */ static void next_token( struct tgsi_parse_context *ctx, void *token ) { assert( !tgsi_parse_end_of_tokens( ctx ) ); - - *(struct tgsi_token *) token = ctx->Tokens[ctx->Position++]; + copy_token(token, &ctx->Tokens[ctx->Position]); + ctx->Position++; } + void tgsi_parse_token( struct tgsi_parse_context *ctx ) @@ -116,7 +133,7 @@ tgsi_parse_token( struct tgsi_full_declaration *decl = &ctx->FullToken.FullDeclaration; *decl = tgsi_default_full_declaration(); - decl->Declaration = *(struct tgsi_declaration *) &token; + copy_token(&decl->Declaration, &token); next_token( ctx, &decl->DeclarationRange ); @@ -132,15 +149,14 @@ tgsi_parse_token( struct tgsi_full_immediate *imm = &ctx->FullToken.FullImmediate; *imm = tgsi_default_full_immediate(); - imm->Immediate = *(struct tgsi_immediate *) &token; - + copy_token(&imm->Immediate, &token); assert( !imm->Immediate.Extended ); switch (imm->Immediate.DataType) { case TGSI_IMM_FLOAT32: imm->u.Pointer = MALLOC( - sizeof( struct tgsi_immediate_float32 ) * (imm->Immediate.Size - 1) ); - for( i = 0; i < imm->Immediate.Size - 1; i++ ) { + sizeof( struct tgsi_immediate_float32 ) * (imm->Immediate.NrTokens - 1) ); + for( i = 0; i < imm->Immediate.NrTokens - 1; i++ ) { next_token( ctx, (struct tgsi_immediate_float32 *) &imm->u.ImmediateFloat32[i] ); } break; @@ -158,8 +174,7 @@ tgsi_parse_token( unsigned extended; *inst = tgsi_default_full_instruction(); - inst->Instruction = *(struct tgsi_instruction *) &token; - + copy_token(&inst->Instruction, &token); extended = inst->Instruction.Extended; while( extended ) { @@ -169,18 +184,15 @@ tgsi_parse_token( switch( token.Type ) { case TGSI_INSTRUCTION_EXT_TYPE_NV: - inst->InstructionExtNv = - *(struct tgsi_instruction_ext_nv *) &token; + copy_token(&inst->InstructionExtNv, &token); break; case TGSI_INSTRUCTION_EXT_TYPE_LABEL: - inst->InstructionExtLabel = - *(struct tgsi_instruction_ext_label *) &token; + copy_token(&inst->InstructionExtLabel, &token); break; case TGSI_INSTRUCTION_EXT_TYPE_TEXTURE: - inst->InstructionExtTexture = - *(struct tgsi_instruction_ext_texture *) &token; + copy_token(&inst->InstructionExtTexture, &token); break; default: @@ -212,13 +224,13 @@ tgsi_parse_token( switch( token.Type ) { case TGSI_DST_REGISTER_EXT_TYPE_CONDCODE: - inst->FullDstRegisters[i].DstRegisterExtConcode = - *(struct tgsi_dst_register_ext_concode *) &token; + copy_token(&inst->FullDstRegisters[i].DstRegisterExtConcode, + &token); break; case TGSI_DST_REGISTER_EXT_TYPE_MODULATE: - inst->FullDstRegisters[i].DstRegisterExtModulate = - *(struct tgsi_dst_register_ext_modulate *) &token; + copy_token(&inst->FullDstRegisters[i].DstRegisterExtModulate, + &token); break; default: @@ -245,13 +257,13 @@ tgsi_parse_token( switch( token.Type ) { case TGSI_SRC_REGISTER_EXT_TYPE_SWZ: - inst->FullSrcRegisters[i].SrcRegisterExtSwz = - *(struct tgsi_src_register_ext_swz *) &token; + copy_token(&inst->FullSrcRegisters[i].SrcRegisterExtSwz, + &token); break; case TGSI_SRC_REGISTER_EXT_TYPE_MOD: - inst->FullSrcRegisters[i].SrcRegisterExtMod = - *(struct tgsi_src_register_ext_mod *) &token; + copy_token(&inst->FullSrcRegisters[i].SrcRegisterExtMod, + &token); break; default: diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c new file mode 100644 index 0000000000..0c64ae5713 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -0,0 +1,1363 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * TGSI to PowerPC code generation. + */ + +#include "pipe/p_config.h" + +#if defined(PIPE_ARCH_PPC) + +#include "util/u_debug.h" +#include "pipe/p_shader_tokens.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_sse.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "tgsi_dump.h" +#include "tgsi_exec.h" +#include "tgsi_ppc.h" +#include "rtasm/rtasm_ppc.h" + + +/** + * Since it's pretty much impossible to form PPC vector immediates, load + * them from memory here: + */ +const float ppc_builtin_constants[] ALIGN16_ATTRIB = { + 1.0f, -128.0f, 128.0, 0.0 +}; + + +#define FOR_EACH_CHANNEL( CHAN )\ + for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) + +#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ + ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) + +#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ + if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) + +#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\ + FOR_EACH_CHANNEL( CHAN )\ + IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN ) + +#define CHAN_X 0 +#define CHAN_Y 1 +#define CHAN_Z 2 +#define CHAN_W 3 + + +/** + * How many TGSI temps should be implemented with real PPC vector registers + * rather than memory. + */ +#define MAX_PPC_TEMPS 3 + + +/** + * Context/state used during code gen. + */ +struct gen_context +{ + struct ppc_function *f; + int inputs_reg; /**< GP register pointing to input params */ + int outputs_reg; /**< GP register pointing to output params */ + int temps_reg; /**< GP register pointing to temporary "registers" */ + int immed_reg; /**< GP register pointing to immediates buffer */ + int const_reg; /**< GP register pointing to constants buffer */ + int builtins_reg; /**< GP register pointint to built-in constants */ + + int offset_reg; /**< used to reduce redundant li instructions */ + int offset_value; + + int one_vec; /**< vector register with {1.0, 1.0, 1.0, 1.0} */ + int bit31_vec; /**< vector register with {1<<31, 1<<31, 1<<31, 1<<31} */ + + /** + * Map TGSI temps to PPC vector temps. + * We have 32 PPC vector regs. Use 16 of them for storing 4 TGSI temps. + * XXX currently only do this for TGSI temps [0..MAX_PPC_TEMPS-1]. + */ + int temps_map[MAX_PPC_TEMPS][4]; + + /** + * Cache of src registers. + * This is used to avoid redundant load instructions. + */ + struct { + struct tgsi_full_src_register src; + uint chan; + uint vec; + } regs[12]; /* 3 src regs, 4 channels */ + uint num_regs; +}; + + +/** + * Initialize code generation context. + */ +static void +init_gen_context(struct gen_context *gen, struct ppc_function *func) +{ + uint i; + + memset(gen, 0, sizeof(*gen)); + gen->f = func; + gen->inputs_reg = ppc_reserve_register(func, 3); /* first function param */ + gen->outputs_reg = ppc_reserve_register(func, 4); /* second function param */ + gen->temps_reg = ppc_reserve_register(func, 5); /* ... */ + gen->immed_reg = ppc_reserve_register(func, 6); + gen->const_reg = ppc_reserve_register(func, 7); + gen->builtins_reg = ppc_reserve_register(func, 8); + gen->one_vec = -1; + gen->bit31_vec = -1; + gen->offset_reg = -1; + gen->offset_value = -9999999; + for (i = 0; i < MAX_PPC_TEMPS; i++) { + gen->temps_map[i][0] = ppc_allocate_vec_register(gen->f); + gen->temps_map[i][1] = ppc_allocate_vec_register(gen->f); + gen->temps_map[i][2] = ppc_allocate_vec_register(gen->f); + gen->temps_map[i][3] = ppc_allocate_vec_register(gen->f); + } +} + + +/** + * Is the given TGSI register stored as a real PPC vector register? + */ +static boolean +is_ppc_vec_temporary(const struct tgsi_full_src_register *reg) +{ + return (reg->SrcRegister.File == TGSI_FILE_TEMPORARY && + reg->SrcRegister.Index < MAX_PPC_TEMPS); +} + + +/** + * Is the given TGSI register stored as a real PPC vector register? + */ +static boolean +is_ppc_vec_temporary_dst(const struct tgsi_full_dst_register *reg) +{ + return (reg->DstRegister.File == TGSI_FILE_TEMPORARY && + reg->DstRegister.Index < MAX_PPC_TEMPS); +} + + + +/** + * All PPC vector load/store instructions form an effective address + * by adding the contents of two registers. For example: + * lvx v2,r8,r9 # v2 = memory[r8 + r9] + * stvx v2,r8,r9 # memory[r8 + r9] = v2; + * So our lvx/stvx instructions are typically preceded by an 'li' instruction + * to load r9 (above) with an immediate (an offset). + * This code emits that 'li' instruction, but only if the offset value is + * different than the previous 'li'. + * This optimization seems to save about 10% in the instruction count. + * Note that we need to unconditionally emit an 'li' inside basic blocks + * (such as inside loops). + */ +static int +emit_li_offset(struct gen_context *gen, int offset) +{ + if (gen->offset_reg <= 0) { + /* allocate a GP register for storing load/store offset */ + gen->offset_reg = ppc_allocate_register(gen->f); + } + + /* emit new 'li' if offset is changing */ + if (gen->offset_value < 0 || gen->offset_value != offset) { + gen->offset_value = offset; + ppc_li(gen->f, gen->offset_reg, offset); + } + + return gen->offset_reg; +} + + +/** + * Forces subsequent emit_li_offset() calls to emit an 'li'. + * To be called at the top of basic blocks. + */ +static void +reset_li_offset(struct gen_context *gen) +{ + gen->offset_value = -9999999; +} + + + +/** + * Load the given vector register with {value, value, value, value}. + * The value must be in the ppu_builtin_constants[] array. + * We wouldn't need this if there was a simple way to load PPC vector + * registers with immediate values! + */ +static void +load_constant_vec(struct gen_context *gen, int dst_vec, float value) +{ + uint pos; + for (pos = 0; pos < Elements(ppc_builtin_constants); pos++) { + if (ppc_builtin_constants[pos] == value) { + int offset = pos * 4; + int offset_reg = emit_li_offset(gen, offset); + + /* Load 4-byte word into vector register. + * The vector slot depends on the effective address we load from. + * We know that our builtins start at a 16-byte boundary so we + * know that 'swizzle' tells us which vector slot will have the + * loaded word. The other vector slots will be undefined. + */ + ppc_lvewx(gen->f, dst_vec, gen->builtins_reg, offset_reg); + /* splat word[pos % 4] across the vector reg */ + ppc_vspltw(gen->f, dst_vec, dst_vec, pos % 4); + return; + } + } + assert(0 && "Need to add new constant to ppc_builtin_constants array"); +} + + +/** + * Return index of vector register containing {1.0, 1.0, 1.0, 1.0}. + */ +static int +gen_one_vec(struct gen_context *gen) +{ + if (gen->one_vec < 0) { + gen->one_vec = ppc_allocate_vec_register(gen->f); + load_constant_vec(gen, gen->one_vec, 1.0f); + } + return gen->one_vec; +} + +/** + * Return index of vector register containing {1<<31, 1<<31, 1<<31, 1<<31}. + */ +static int +gen_get_bit31_vec(struct gen_context *gen) +{ + if (gen->bit31_vec < 0) { + gen->bit31_vec = ppc_allocate_vec_register(gen->f); + ppc_vspltisw(gen->f, gen->bit31_vec, -1); + ppc_vslw(gen->f, gen->bit31_vec, gen->bit31_vec, gen->bit31_vec); + } + return gen->bit31_vec; +} + + +/** + * Register fetch. Return PPC vector register with result. + */ +static int +emit_fetch(struct gen_context *gen, + const struct tgsi_full_src_register *reg, + const unsigned chan_index) +{ + uint swizzle = tgsi_util_get_full_src_register_extswizzle(reg, chan_index); + int dst_vec = -1; + + switch (swizzle) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + switch (reg->SrcRegister.File) { + case TGSI_FILE_INPUT: + { + int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; + int offset_reg = emit_li_offset(gen, offset); + dst_vec = ppc_allocate_vec_register(gen->f); + ppc_lvx(gen->f, dst_vec, gen->inputs_reg, offset_reg); + } + break; + case TGSI_FILE_TEMPORARY: + if (is_ppc_vec_temporary(reg)) { + /* use PPC vec register */ + dst_vec = gen->temps_map[reg->SrcRegister.Index][swizzle]; + } + else { + /* use memory-based temp register "file" */ + int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; + int offset_reg = emit_li_offset(gen, offset); + dst_vec = ppc_allocate_vec_register(gen->f); + ppc_lvx(gen->f, dst_vec, gen->temps_reg, offset_reg); + } + break; + case TGSI_FILE_IMMEDIATE: + { + int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4; + int offset_reg = emit_li_offset(gen, offset); + dst_vec = ppc_allocate_vec_register(gen->f); + /* Load 4-byte word into vector register. + * The vector slot depends on the effective address we load from. + * We know that our immediates start at a 16-byte boundary so we + * know that 'swizzle' tells us which vector slot will have the + * loaded word. The other vector slots will be undefined. + */ + ppc_lvewx(gen->f, dst_vec, gen->immed_reg, offset_reg); + /* splat word[swizzle] across the vector reg */ + ppc_vspltw(gen->f, dst_vec, dst_vec, swizzle); + } + break; + case TGSI_FILE_CONSTANT: + { + int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4; + int offset_reg = emit_li_offset(gen, offset); + dst_vec = ppc_allocate_vec_register(gen->f); + /* Load 4-byte word into vector register. + * The vector slot depends on the effective address we load from. + * We know that our constants start at a 16-byte boundary so we + * know that 'swizzle' tells us which vector slot will have the + * loaded word. The other vector slots will be undefined. + */ + ppc_lvewx(gen->f, dst_vec, gen->const_reg, offset_reg); + /* splat word[swizzle] across the vector reg */ + ppc_vspltw(gen->f, dst_vec, dst_vec, swizzle); + } + break; + default: + assert( 0 ); + } + break; + case TGSI_EXTSWIZZLE_ZERO: + ppc_vzero(gen->f, dst_vec); + break; + case TGSI_EXTSWIZZLE_ONE: + { + int one_vec = gen_one_vec(gen); + dst_vec = ppc_allocate_vec_register(gen->f); + ppc_vmove(gen->f, dst_vec, one_vec); + } + break; + default: + assert( 0 ); + } + + assert(dst_vec >= 0); + + { + uint sign_op = tgsi_util_get_full_src_register_sign_mode(reg, chan_index); + if (sign_op != TGSI_UTIL_SIGN_KEEP) { + int bit31_vec = gen_get_bit31_vec(gen); + int dst_vec2; + + if (is_ppc_vec_temporary(reg)) { + /* need to use a new temp */ + dst_vec2 = ppc_allocate_vec_register(gen->f); + } + else { + dst_vec2 = dst_vec; + } + + switch (sign_op) { + case TGSI_UTIL_SIGN_CLEAR: + /* vec = vec & ~bit31 */ + ppc_vandc(gen->f, dst_vec2, dst_vec, bit31_vec); + break; + case TGSI_UTIL_SIGN_SET: + /* vec = vec | bit31 */ + ppc_vor(gen->f, dst_vec2, dst_vec, bit31_vec); + break; + case TGSI_UTIL_SIGN_TOGGLE: + /* vec = vec ^ bit31 */ + ppc_vxor(gen->f, dst_vec2, dst_vec, bit31_vec); + break; + default: + assert(0); + } + return dst_vec2; + } + } + + return dst_vec; +} + + + +/** + * Test if two TGSI src registers refer to the same memory location. + * We use this to avoid redundant register loads. + */ +static boolean +equal_src_locs(const struct tgsi_full_src_register *a, uint chan_a, + const struct tgsi_full_src_register *b, uint chan_b) +{ + int swz_a, swz_b; + int sign_a, sign_b; + if (a->SrcRegister.File != b->SrcRegister.File) + return FALSE; + if (a->SrcRegister.Index != b->SrcRegister.Index) + return FALSE; + swz_a = tgsi_util_get_full_src_register_extswizzle(a, chan_a); + swz_b = tgsi_util_get_full_src_register_extswizzle(b, chan_b); + if (swz_a != swz_b) + return FALSE; + sign_a = tgsi_util_get_full_src_register_sign_mode(a, chan_a); + sign_b = tgsi_util_get_full_src_register_sign_mode(b, chan_b); + if (sign_a != sign_b) + return FALSE; + return TRUE; +} + + +/** + * Given a TGSI src register and channel index, return the PPC vector + * register containing the value. We use a cache to prevent re-loading + * the same register multiple times. + * \return index of PPC vector register with the desired src operand + */ +static int +get_src_vec(struct gen_context *gen, + struct tgsi_full_instruction *inst, int src_reg, uint chan) +{ + const const struct tgsi_full_src_register *src = + &inst->FullSrcRegisters[src_reg]; + int vec; + uint i; + + /* check the cache */ + for (i = 0; i < gen->num_regs; i++) { + if (equal_src_locs(&gen->regs[i].src, gen->regs[i].chan, src, chan)) { + /* cache hit */ + assert(gen->regs[i].vec >= 0); + return gen->regs[i].vec; + } + } + + /* cache miss: allocate new vec reg and emit fetch/load code */ + vec = emit_fetch(gen, src, chan); + gen->regs[gen->num_regs].src = *src; + gen->regs[gen->num_regs].chan = chan; + gen->regs[gen->num_regs].vec = vec; + gen->num_regs++; + + assert(gen->num_regs <= Elements(gen->regs)); + + assert(vec >= 0); + + return vec; +} + + +/** + * Clear the src operand cache. To be called at the end of each emit function. + */ +static void +release_src_vecs(struct gen_context *gen) +{ + uint i; + for (i = 0; i < gen->num_regs; i++) { + const const struct tgsi_full_src_register src = gen->regs[i].src; + if (!is_ppc_vec_temporary(&src)) { + ppc_release_vec_register(gen->f, gen->regs[i].vec); + } + } + gen->num_regs = 0; +} + + + +static int +get_dst_vec(struct gen_context *gen, + const struct tgsi_full_instruction *inst, + unsigned chan_index) +{ + const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[0]; + + if (is_ppc_vec_temporary_dst(reg)) { + int vec = gen->temps_map[reg->DstRegister.Index][chan_index]; + return vec; + } + else { + return ppc_allocate_vec_register(gen->f); + } +} + + +/** + * Register store. Store 'src_vec' at location indicated by 'reg'. + * \param free_vec Should the src_vec be released when done? + */ +static void +emit_store(struct gen_context *gen, + int src_vec, + const struct tgsi_full_instruction *inst, + unsigned chan_index, + boolean free_vec) +{ + const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[0]; + + switch (reg->DstRegister.File) { + case TGSI_FILE_OUTPUT: + { + int offset = (reg->DstRegister.Index * 4 + chan_index) * 16; + int offset_reg = emit_li_offset(gen, offset); + ppc_stvx(gen->f, src_vec, gen->outputs_reg, offset_reg); + } + break; + case TGSI_FILE_TEMPORARY: + if (is_ppc_vec_temporary_dst(reg)) { + if (!free_vec) { + int dst_vec = gen->temps_map[reg->DstRegister.Index][chan_index]; + if (dst_vec != src_vec) + ppc_vmove(gen->f, dst_vec, src_vec); + } + free_vec = FALSE; + } + else { + int offset = (reg->DstRegister.Index * 4 + chan_index) * 16; + int offset_reg = emit_li_offset(gen, offset); + ppc_stvx(gen->f, src_vec, gen->temps_reg, offset_reg); + } + break; +#if 0 + case TGSI_FILE_ADDRESS: + emit_addrs( + func, + xmm, + reg->DstRegister.Index, + chan_index ); + break; +#endif + default: + assert( 0 ); + } + +#if 0 + switch( inst->Instruction.Saturate ) { + case TGSI_SAT_NONE: + break; + + case TGSI_SAT_ZERO_ONE: + /* assert( 0 ); */ + break; + + case TGSI_SAT_MINUS_PLUS_ONE: + assert( 0 ); + break; + } +#endif + + if (free_vec) + ppc_release_vec_register(gen->f, src_vec); +} + + +static void +emit_scalar_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + int v0, v1; + uint chan_index; + + v0 = get_src_vec(gen, inst, 0, CHAN_X); + v1 = ppc_allocate_vec_register(gen->f); + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_RSQ: + /* v1 = 1.0 / sqrt(v0) */ + ppc_vrsqrtefp(gen->f, v1, v0); + break; + case TGSI_OPCODE_RCP: + /* v1 = 1.0 / v0 */ + ppc_vrefp(gen->f, v1, v0); + break; + default: + assert(0); + } + + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + emit_store(gen, v1, inst, chan_index, FALSE); + } + + release_src_vecs(gen); + ppc_release_vec_register(gen->f, v1); +} + + +static void +emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + uint chan_index; + + FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { + int v0 = get_src_vec(gen, inst, 0, chan_index); /* v0 = srcreg[0] */ + int v1 = get_dst_vec(gen, inst, chan_index); + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + /* turn off the most significant bit of each vector float word */ + { + int bit31_vec = gen_get_bit31_vec(gen); + ppc_vandc(gen->f, v1, v0, bit31_vec); /* v1 = v0 & ~bit31 */ + } + break; + case TGSI_OPCODE_FLOOR: + ppc_vrfim(gen->f, v1, v0); /* v1 = floor(v0) */ + break; + case TGSI_OPCODE_FRAC: + ppc_vrfim(gen->f, v1, v0); /* tmp = floor(v0) */ + ppc_vsubfp(gen->f, v1, v0, v1); /* v1 = v0 - v1 */ + break; + case TGSI_OPCODE_EXPBASE2: + ppc_vexptefp(gen->f, v1, v0); /* v1 = 2^v0 */ + break; + case TGSI_OPCODE_LOGBASE2: + /* XXX this may be broken! */ + ppc_vlogefp(gen->f, v1, v0); /* v1 = log2(v0) */ + break; + case TGSI_OPCODE_MOV: + case TGSI_OPCODE_SWZ: + if (v0 != v1) + ppc_vmove(gen->f, v1, v0); + break; + default: + assert(0); + } + emit_store(gen, v1, inst, chan_index, TRUE); /* store v0 */ + } + + release_src_vecs(gen); +} + + +static void +emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + int zero_vec = -1; + uint chan; + + if (inst->Instruction.Opcode == TGSI_OPCODE_MUL) { + zero_vec = ppc_allocate_vec_register(gen->f); + ppc_vzero(gen->f, zero_vec); + } + + FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) { + /* fetch src operands */ + int v0 = get_src_vec(gen, inst, 0, chan); + int v1 = get_src_vec(gen, inst, 1, chan); + int v2 = get_dst_vec(gen, inst, chan); + + /* emit binop */ + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ADD: + ppc_vaddfp(gen->f, v2, v0, v1); + break; + case TGSI_OPCODE_SUB: + ppc_vsubfp(gen->f, v2, v0, v1); + break; + case TGSI_OPCODE_MUL: + ppc_vmaddfp(gen->f, v2, v0, v1, zero_vec); + break; + case TGSI_OPCODE_MIN: + ppc_vminfp(gen->f, v2, v0, v1); + break; + case TGSI_OPCODE_MAX: + ppc_vmaxfp(gen->f, v2, v0, v1); + break; + default: + assert(0); + } + + /* store v2 */ + emit_store(gen, v2, inst, chan, TRUE); + } + + if (inst->Instruction.Opcode == TGSI_OPCODE_MUL) + ppc_release_vec_register(gen->f, zero_vec); + + release_src_vecs(gen); +} + + +static void +emit_triop(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + uint chan; + + FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) { + /* fetch src operands */ + int v0 = get_src_vec(gen, inst, 0, chan); + int v1 = get_src_vec(gen, inst, 1, chan); + int v2 = get_src_vec(gen, inst, 2, chan); + int v3 = get_dst_vec(gen, inst, chan); + + /* emit ALU */ + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_MAD: + ppc_vmaddfp(gen->f, v3, v0, v1, v2); /* v3 = v0 * v1 + v2 */ + break; + case TGSI_OPCODE_LRP: + ppc_vsubfp(gen->f, v3, v1, v2); /* v3 = v1 - v2 */ + ppc_vmaddfp(gen->f, v3, v0, v3, v2); /* v3 = v0 * v3 + v2 */ + break; + default: + assert(0); + } + + /* store v3 */ + emit_store(gen, v3, inst, chan, TRUE); + } + + release_src_vecs(gen); +} + + +/** + * Vector comparisons, resulting in 1.0 or 0.0 values. + */ +static void +emit_inequality(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + uint chan; + int one_vec = gen_one_vec(gen); + + FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) { + /* fetch src operands */ + int v0 = get_src_vec(gen, inst, 0, chan); + int v1 = get_src_vec(gen, inst, 1, chan); + int v2 = get_dst_vec(gen, inst, chan); + boolean complement = FALSE; + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_SNE: + complement = TRUE; + /* fall-through */ + case TGSI_OPCODE_SEQ: + ppc_vcmpeqfpx(gen->f, v2, v0, v1); /* v2 = v0 == v1 ? ~0 : 0 */ + break; + + case TGSI_OPCODE_SGE: + complement = TRUE; + /* fall-through */ + case TGSI_OPCODE_SLT: + ppc_vcmpgtfpx(gen->f, v2, v1, v0); /* v2 = v1 > v0 ? ~0 : 0 */ + break; + + case TGSI_OPCODE_SLE: + complement = TRUE; + /* fall-through */ + case TGSI_OPCODE_SGT: + ppc_vcmpgtfpx(gen->f, v2, v0, v1); /* v2 = v0 > v1 ? ~0 : 0 */ + break; + default: + assert(0); + } + + /* v2 is now {0,0,0,0} or {~0,~0,~0,~0} */ + + if (complement) + ppc_vandc(gen->f, v2, one_vec, v2); /* v2 = one_vec & ~v2 */ + else + ppc_vand(gen->f, v2, one_vec, v2); /* v2 = one_vec & v2 */ + + /* store v2 */ + emit_store(gen, v2, inst, chan, TRUE); + } + + release_src_vecs(gen); +} + + +static void +emit_dotprod(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + int v0, v1, v2; + uint chan_index; + + v2 = ppc_allocate_vec_register(gen->f); + + ppc_vzero(gen->f, v2); /* v2 = {0, 0, 0, 0} */ + + v0 = get_src_vec(gen, inst, 0, CHAN_X); /* v0 = src0.XXXX */ + v1 = get_src_vec(gen, inst, 1, CHAN_X); /* v1 = src1.XXXX */ + ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ + + v0 = get_src_vec(gen, inst, 0, CHAN_Y); /* v0 = src0.YYYY */ + v1 = get_src_vec(gen, inst, 1, CHAN_Y); /* v1 = src1.YYYY */ + ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ + + v0 = get_src_vec(gen, inst, 0, CHAN_Z); /* v0 = src0.ZZZZ */ + v1 = get_src_vec(gen, inst, 1, CHAN_Z); /* v1 = src1.ZZZZ */ + ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ + + if (inst->Instruction.Opcode == TGSI_OPCODE_DP4) { + v0 = get_src_vec(gen, inst, 0, CHAN_W); /* v0 = src0.WWWW */ + v1 = get_src_vec(gen, inst, 1, CHAN_W); /* v1 = src1.WWWW */ + ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ + } + else if (inst->Instruction.Opcode == TGSI_OPCODE_DPH) { + v1 = get_src_vec(gen, inst, 1, CHAN_W); /* v1 = src1.WWWW */ + ppc_vaddfp(gen->f, v2, v2, v1); /* v2 = v2 + v1 */ + } + + FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { + emit_store(gen, v2, inst, chan_index, FALSE); /* store v2, free v2 later */ + } + + release_src_vecs(gen); + + ppc_release_vec_register(gen->f, v2); +} + + +/** Approximation for vr = pow(va, vb) */ +static void +ppc_vec_pow(struct ppc_function *f, int vr, int va, int vb) +{ + /* pow(a,b) ~= exp2(log2(a) * b) */ + int t_vec = ppc_allocate_vec_register(f); + int zero_vec = ppc_allocate_vec_register(f); + + ppc_vzero(f, zero_vec); + + ppc_vlogefp(f, t_vec, va); /* t = log2(va) */ + ppc_vmaddfp(f, t_vec, t_vec, vb, zero_vec); /* t = t * vb + zero */ + ppc_vexptefp(f, vr, t_vec); /* vr = 2^t */ + + ppc_release_vec_register(f, t_vec); + ppc_release_vec_register(f, zero_vec); +} + + +static void +emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + int one_vec = gen_one_vec(gen); + + /* Compute X */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) { + emit_store(gen, one_vec, inst, CHAN_X, FALSE); + } + + /* Compute Y, Z */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) || + IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { + int x_vec; + int zero_vec = ppc_allocate_vec_register(gen->f); + + x_vec = get_src_vec(gen, inst, 0, CHAN_X); /* x_vec = src[0].x */ + + ppc_vzero(gen->f, zero_vec); /* zero = {0,0,0,0} */ + ppc_vmaxfp(gen->f, x_vec, x_vec, zero_vec); /* x_vec = max(x_vec, 0) */ + + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { + emit_store(gen, x_vec, inst, CHAN_Y, FALSE); + } + + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { + int y_vec, w_vec; + int z_vec = ppc_allocate_vec_register(gen->f); + int pow_vec = ppc_allocate_vec_register(gen->f); + int pos_vec = ppc_allocate_vec_register(gen->f); + int p128_vec = ppc_allocate_vec_register(gen->f); + int n128_vec = ppc_allocate_vec_register(gen->f); + + y_vec = get_src_vec(gen, inst, 0, CHAN_Y); /* y_vec = src[0].y */ + ppc_vmaxfp(gen->f, y_vec, y_vec, zero_vec); /* y_vec = max(y_vec, 0) */ + + w_vec = get_src_vec(gen, inst, 0, CHAN_W); /* w_vec = src[0].w */ + + /* clamp W to [-128, 128] */ + load_constant_vec(gen, p128_vec, 128.0f); + load_constant_vec(gen, n128_vec, -128.0f); + ppc_vmaxfp(gen->f, w_vec, w_vec, n128_vec); /* w = max(w, -128) */ + ppc_vminfp(gen->f, w_vec, w_vec, p128_vec); /* w = min(w, 128) */ + + /* if temp.x > 0 + * z = pow(tmp.y, tmp.w) + * else + * z = 0.0 + */ + ppc_vec_pow(gen->f, pow_vec, y_vec, w_vec); /* pow = pow(y, w) */ + ppc_vcmpgtfpx(gen->f, pos_vec, x_vec, zero_vec); /* pos = x > 0 */ + ppc_vand(gen->f, z_vec, pow_vec, pos_vec); /* z = pow & pos */ + + emit_store(gen, z_vec, inst, CHAN_Z, FALSE); + + ppc_release_vec_register(gen->f, z_vec); + ppc_release_vec_register(gen->f, pow_vec); + ppc_release_vec_register(gen->f, pos_vec); + ppc_release_vec_register(gen->f, p128_vec); + ppc_release_vec_register(gen->f, n128_vec); + } + + ppc_release_vec_register(gen->f, zero_vec); + } + + /* Compute W */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) { + emit_store(gen, one_vec, inst, CHAN_W, FALSE); + } + + release_src_vecs(gen); +} + + +static void +emit_exp(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + const int one_vec = gen_one_vec(gen); + int src_vec; + + /* get src arg */ + src_vec = get_src_vec(gen, inst, 0, CHAN_X); + + /* Compute X = 2^floor(src) */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) { + int dst_vec = get_dst_vec(gen, inst, CHAN_X); + int tmp_vec = ppc_allocate_vec_register(gen->f); + ppc_vrfim(gen->f, tmp_vec, src_vec); /* tmp = floor(src); */ + ppc_vexptefp(gen->f, dst_vec, tmp_vec); /* dst = 2 ^ tmp */ + emit_store(gen, dst_vec, inst, CHAN_X, TRUE); + ppc_release_vec_register(gen->f, tmp_vec); + } + + /* Compute Y = src - floor(src) */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { + int dst_vec = get_dst_vec(gen, inst, CHAN_Y); + int tmp_vec = ppc_allocate_vec_register(gen->f); + ppc_vrfim(gen->f, tmp_vec, src_vec); /* tmp = floor(src); */ + ppc_vsubfp(gen->f, dst_vec, src_vec, tmp_vec); /* dst = src - tmp */ + emit_store(gen, dst_vec, inst, CHAN_Y, TRUE); + ppc_release_vec_register(gen->f, tmp_vec); + } + + /* Compute Z = RoughApprox2ToX(src) */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { + int dst_vec = get_dst_vec(gen, inst, CHAN_Z); + ppc_vexptefp(gen->f, dst_vec, src_vec); /* dst = 2 ^ src */ + emit_store(gen, dst_vec, inst, CHAN_Z, TRUE); + } + + /* Compute W = 1.0 */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) { + emit_store(gen, one_vec, inst, CHAN_W, FALSE); + } + + release_src_vecs(gen); +} + + +static void +emit_log(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + const int bit31_vec = gen_get_bit31_vec(gen); + const int one_vec = gen_one_vec(gen); + int src_vec, abs_vec; + + /* get src arg */ + src_vec = get_src_vec(gen, inst, 0, CHAN_X); + + /* compute abs(src) */ + abs_vec = ppc_allocate_vec_register(gen->f); + ppc_vandc(gen->f, abs_vec, src_vec, bit31_vec); /* abs = src & ~bit31 */ + + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) && + IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { + + /* compute tmp = floor(log2(abs)) */ + int tmp_vec = ppc_allocate_vec_register(gen->f); + ppc_vlogefp(gen->f, tmp_vec, abs_vec); /* tmp = log2(abs) */ + ppc_vrfim(gen->f, tmp_vec, tmp_vec); /* tmp = floor(tmp); */ + + /* Compute X = tmp */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) { + emit_store(gen, tmp_vec, inst, CHAN_X, FALSE); + } + + /* Compute Y = abs / 2^tmp */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { + const int zero_vec = ppc_allocate_vec_register(gen->f); + ppc_vzero(gen->f, zero_vec); + ppc_vexptefp(gen->f, tmp_vec, tmp_vec); /* tmp = 2 ^ tmp */ + ppc_vrefp(gen->f, tmp_vec, tmp_vec); /* tmp = 1 / tmp */ + /* tmp = abs * tmp + zero */ + ppc_vmaddfp(gen->f, tmp_vec, abs_vec, tmp_vec, zero_vec); + emit_store(gen, tmp_vec, inst, CHAN_Y, FALSE); + ppc_release_vec_register(gen->f, zero_vec); + } + + ppc_release_vec_register(gen->f, tmp_vec); + } + + /* Compute Z = RoughApproxLog2(abs) */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { + int dst_vec = get_dst_vec(gen, inst, CHAN_Z); + ppc_vlogefp(gen->f, dst_vec, abs_vec); /* dst = log2(abs) */ + emit_store(gen, dst_vec, inst, CHAN_Z, TRUE); + } + + /* Compute W = 1.0 */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) { + emit_store(gen, one_vec, inst, CHAN_W, FALSE); + } + + ppc_release_vec_register(gen->f, abs_vec); + release_src_vecs(gen); +} + + +static void +emit_pow(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + int s0_vec = get_src_vec(gen, inst, 0, CHAN_X); + int s1_vec = get_src_vec(gen, inst, 1, CHAN_X); + int pow_vec = ppc_allocate_vec_register(gen->f); + int chan; + + ppc_vec_pow(gen->f, pow_vec, s0_vec, s1_vec); + + FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) { + emit_store(gen, pow_vec, inst, chan, FALSE); + } + + ppc_release_vec_register(gen->f, pow_vec); + + release_src_vecs(gen); +} + + +static void +emit_xpd(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + int x0_vec, y0_vec, z0_vec; + int x1_vec, y1_vec, z1_vec; + int zero_vec, tmp_vec; + int tmp2_vec; + + zero_vec = ppc_allocate_vec_register(gen->f); + ppc_vzero(gen->f, zero_vec); + + tmp_vec = ppc_allocate_vec_register(gen->f); + tmp2_vec = ppc_allocate_vec_register(gen->f); + + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) || + IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { + x0_vec = get_src_vec(gen, inst, 0, CHAN_X); + x1_vec = get_src_vec(gen, inst, 1, CHAN_X); + } + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) || + IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { + y0_vec = get_src_vec(gen, inst, 0, CHAN_Y); + y1_vec = get_src_vec(gen, inst, 1, CHAN_Y); + } + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) || + IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { + z0_vec = get_src_vec(gen, inst, 0, CHAN_Z); + z1_vec = get_src_vec(gen, inst, 1, CHAN_Z); + } + + IF_IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) { + /* tmp = y0 * z1 */ + ppc_vmaddfp(gen->f, tmp_vec, y0_vec, z1_vec, zero_vec); + /* tmp = tmp - z0 * y1*/ + ppc_vnmsubfp(gen->f, tmp_vec, tmp_vec, z0_vec, y1_vec); + emit_store(gen, tmp_vec, inst, CHAN_X, FALSE); + } + IF_IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) { + /* tmp = z0 * x1 */ + ppc_vmaddfp(gen->f, tmp_vec, z0_vec, x1_vec, zero_vec); + /* tmp = tmp - x0 * z1 */ + ppc_vnmsubfp(gen->f, tmp_vec, tmp_vec, x0_vec, z1_vec); + emit_store(gen, tmp_vec, inst, CHAN_Y, FALSE); + } + IF_IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z) { + /* tmp = x0 * y1 */ + ppc_vmaddfp(gen->f, tmp_vec, x0_vec, y1_vec, zero_vec); + /* tmp = tmp - y0 * x1 */ + ppc_vnmsubfp(gen->f, tmp_vec, tmp_vec, y0_vec, x1_vec); + emit_store(gen, tmp_vec, inst, CHAN_Z, FALSE); + } + /* W is undefined */ + + ppc_release_vec_register(gen->f, tmp_vec); + ppc_release_vec_register(gen->f, zero_vec); + release_src_vecs(gen); +} + +static int +emit_instruction(struct gen_context *gen, + struct tgsi_full_instruction *inst) +{ + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_MOV: + case TGSI_OPCODE_SWZ: + case TGSI_OPCODE_ABS: + case TGSI_OPCODE_FLOOR: + case TGSI_OPCODE_FRAC: + case TGSI_OPCODE_EXPBASE2: + case TGSI_OPCODE_LOGBASE2: + emit_unaryop(gen, inst); + break; + case TGSI_OPCODE_RSQ: + case TGSI_OPCODE_RCP: + emit_scalar_unaryop(gen, inst); + break; + case TGSI_OPCODE_ADD: + case TGSI_OPCODE_SUB: + case TGSI_OPCODE_MUL: + case TGSI_OPCODE_MIN: + case TGSI_OPCODE_MAX: + emit_binop(gen, inst); + break; + case TGSI_OPCODE_SEQ: + case TGSI_OPCODE_SNE: + case TGSI_OPCODE_SLT: + case TGSI_OPCODE_SGT: + case TGSI_OPCODE_SLE: + case TGSI_OPCODE_SGE: + emit_inequality(gen, inst); + break; + case TGSI_OPCODE_MAD: + case TGSI_OPCODE_LRP: + emit_triop(gen, inst); + break; + case TGSI_OPCODE_DP3: + case TGSI_OPCODE_DP4: + case TGSI_OPCODE_DPH: + emit_dotprod(gen, inst); + break; + case TGSI_OPCODE_LIT: + emit_lit(gen, inst); + break; + case TGSI_OPCODE_LOG: + emit_log(gen, inst); + break; + case TGSI_OPCODE_EXP: + emit_exp(gen, inst); + break; + case TGSI_OPCODE_POW: + emit_pow(gen, inst); + break; + case TGSI_OPCODE_XPD: + emit_xpd(gen, inst); + break; + case TGSI_OPCODE_END: + /* normal end */ + return 1; + default: + return 0; + } + return 1; +} + + +static void +emit_declaration( + struct ppc_function *func, + struct tgsi_full_declaration *decl ) +{ + if( decl->Declaration.File == TGSI_FILE_INPUT ) { +#if 0 + unsigned first, last, mask; + unsigned i, j; + + first = decl->DeclarationRange.First; + last = decl->DeclarationRange.Last; + mask = decl->Declaration.UsageMask; + + for( i = first; i <= last; i++ ) { + for( j = 0; j < NUM_CHANNELS; j++ ) { + if( mask & (1 << j) ) { + switch( decl->Declaration.Interpolate ) { + case TGSI_INTERPOLATE_CONSTANT: + emit_coef_a0( func, 0, i, j ); + emit_inputs( func, 0, i, j ); + break; + + case TGSI_INTERPOLATE_LINEAR: + emit_tempf( func, 0, 0, TGSI_SWIZZLE_X ); + emit_coef_dadx( func, 1, i, j ); + emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y ); + emit_coef_dady( func, 3, i, j ); + emit_mul( func, 0, 1 ); /* x * dadx */ + emit_coef_a0( func, 4, i, j ); + emit_mul( func, 2, 3 ); /* y * dady */ + emit_add( func, 0, 4 ); /* x * dadx + a0 */ + emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */ + emit_inputs( func, 0, i, j ); + break; + + case TGSI_INTERPOLATE_PERSPECTIVE: + emit_tempf( func, 0, 0, TGSI_SWIZZLE_X ); + emit_coef_dadx( func, 1, i, j ); + emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y ); + emit_coef_dady( func, 3, i, j ); + emit_mul( func, 0, 1 ); /* x * dadx */ + emit_tempf( func, 4, 0, TGSI_SWIZZLE_W ); + emit_coef_a0( func, 5, i, j ); + emit_rcp( func, 4, 4 ); /* 1.0 / w */ + emit_mul( func, 2, 3 ); /* y * dady */ + emit_add( func, 0, 5 ); /* x * dadx + a0 */ + emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */ + emit_mul( func, 0, 4 ); /* (x * dadx + y * dady + a0) / w */ + emit_inputs( func, 0, i, j ); + break; + + default: + assert( 0 ); + break; + } + } + } + } +#endif + } +} + + + +static void +emit_prologue(struct ppc_function *func) +{ + /* XXX set up stack frame */ +} + + +static void +emit_epilogue(struct ppc_function *func) +{ + ppc_comment(func, -4, "Epilogue:"); + ppc_return(func); + /* XXX restore prev stack frame */ +#if 0 + debug_printf("PPC: Emitted %u instructions\n", func->num_inst); +#endif +} + + + +/** + * Translate a TGSI vertex/fragment shader to PPC code. + * + * \param tokens the TGSI input shader + * \param func the output PPC code/function + * \param immediates buffer to place immediates, later passed to PPC func + * \return TRUE for success, FALSE if translation failed + */ +boolean +tgsi_emit_ppc(const struct tgsi_token *tokens, + struct ppc_function *func, + float (*immediates)[4], + boolean do_swizzles ) +{ + static int use_ppc_asm = -1; + struct tgsi_parse_context parse; + /*boolean instruction_phase = FALSE;*/ + unsigned ok = 1; + uint num_immediates = 0; + struct gen_context gen; + uint ic = 0; + + if (use_ppc_asm < 0) { + /* If GALLIUM_NOPPC is set, don't use PPC codegen */ + use_ppc_asm = !debug_get_bool_option("GALLIUM_NOPPC", FALSE); + } + if (!use_ppc_asm) + return FALSE; + + if (0) { + debug_printf("\n********* TGSI->PPC ********\n"); + tgsi_dump(tokens, 0); + } + + util_init_math(); + + init_gen_context(&gen, func); + + emit_prologue(func); + + tgsi_parse_init( &parse, tokens ); + + while (!tgsi_parse_end_of_tokens(&parse) && ok) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { + emit_declaration(func, &parse.FullToken.FullDeclaration ); + } + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + if (func->print) { + _debug_printf("# "); + ic++; + tgsi_dump_instruction(&parse.FullToken.FullInstruction, ic); + } + + ok = emit_instruction(&gen, &parse.FullToken.FullInstruction); + + if (!ok) { + debug_printf("failed to translate tgsi opcode %d to PPC (%s)\n", + parse.FullToken.FullInstruction.Instruction.Opcode, + parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ? + "vertex shader" : "fragment shader"); + } + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + /* splat each immediate component into a float[4] vector for SoA */ + { + const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; + uint i; + assert(size <= 4); + assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES); + for (i = 0; i < size; i++) { + immediates[num_immediates][i] = + parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; + } + num_immediates++; + } + break; + + default: + ok = 0; + assert( 0 ); + } + } + + emit_epilogue(func); + + tgsi_parse_free( &parse ); + + if (ppc_num_instructions(func) == 0) { + /* ran out of memory for instructions */ + ok = FALSE; + } + + if (!ok) + debug_printf("TGSI->PPC translation failed\n"); + + return ok; +} + +#endif /* PIPE_ARCH_PPC */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.h b/src/gallium/auxiliary/tgsi/tgsi_ppc.h new file mode 100644 index 0000000000..829ec075e7 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.h @@ -0,0 +1,51 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_PPC_H +#define TGSI_PPC_H + +#if defined __cplusplus +extern "C" { +#endif + +struct tgsi_token; +struct ppc_function; + +extern const float ppc_builtin_constants[]; + + +boolean +tgsi_emit_ppc(const struct tgsi_token *tokens, + struct ppc_function *function, + float (*immediates)[4], + boolean do_swizzles); + +#if defined __cplusplus +} +#endif + +#endif /* TGSI_PPC_H */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index bc7b941b78..76e773da91 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "tgsi_sanity.h" #include "tgsi_info.h" #include "tgsi_iterate.h" diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index 1239f6c076..c535788819 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -43,6 +43,9 @@ /** + * Scan the given TGSI shader to collect information such as number of + * registers used, special instructions used, etc. + * \return info the result of the scan */ void tgsi_scan_shader(const struct tgsi_token *tokens, @@ -115,7 +118,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens, { const struct tgsi_full_declaration *fulldecl = &parse.FullToken.FullDeclaration; - uint file = fulldecl->Declaration.File; + const uint file = fulldecl->Declaration.File; uint reg; for (reg = fulldecl->DeclarationRange.First; reg <= fulldecl->DeclarationRange.Last; @@ -131,8 +134,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens, info->input_semantic_index[reg] = (ubyte)fulldecl->Semantic.SemanticIndex; info->num_inputs++; } - - if (file == TGSI_FILE_OUTPUT) { + else if (file == TGSI_FILE_OUTPUT) { info->output_semantic_name[reg] = (ubyte)fulldecl->Semantic.SemanticName; info->output_semantic_index[reg] = (ubyte)fulldecl->Semantic.SemanticIndex; info->num_outputs++; @@ -149,7 +151,14 @@ tgsi_scan_shader(const struct tgsi_token *tokens, break; case TGSI_TOKEN_TYPE_IMMEDIATE: - info->immediate_count++; + { + uint reg = info->immediate_count++; + uint file = TGSI_FILE_IMMEDIATE; + + info->file_mask[file] |= (1 << reg); + info->file_count[file]++; + info->file_max[file] = MAX2(info->file_max[file], (int)reg); + } break; default: diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index ff869c8312..d70bcd03c5 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -25,9 +25,16 @@ * **************************************************************************/ -#include "pipe/p_debug.h" +#include "pipe/p_config.h" + +#if defined(PIPE_ARCH_X86) + +#include "util/u_debug.h" #include "pipe/p_shader_tokens.h" #include "util/u_math.h" +#if defined(PIPE_ARCH_SSE) +#include "util/u_sse.h" +#endif #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi_exec.h" @@ -35,8 +42,6 @@ #include "rtasm/rtasm_x86sse.h" -#ifdef PIPE_ARCH_X86 - /* for 1/sqrt() * * This costs about 100fps (close to 10%) in gears: @@ -509,10 +514,31 @@ emit_coef_dady( * Function call helpers. */ +/** + * NOTE: In gcc, if the destination uses the SSE intrinsics, then it must be + * defined with __attribute__((force_align_arg_pointer)), as we do not guarantee + * that the stack pointer is 16 byte aligned, as expected. + */ static void -emit_push_gp( - struct x86_function *func ) +emit_func_call_dst( + struct x86_function *func, + unsigned xmm_save, + unsigned xmm_dst, + void (PIPE_CDECL *code)() ) { + struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX ); + unsigned i, n; + unsigned xmm_mask; + + /* Bitmask of the xmm registers to save */ + xmm_mask = (1 << xmm_save) - 1; + xmm_mask &= ~(1 << xmm_dst); + + sse_movaps( + func, + get_temp( TEMP_R0, 0 ), + make_xmm( xmm_dst ) ); + x86_push( func, x86_make_reg( file_REG32, reg_AX) ); @@ -522,12 +548,49 @@ emit_push_gp( x86_push( func, x86_make_reg( file_REG32, reg_DX) ); -} + + for(i = 0, n = 0; i < 8; ++i) + if(xmm_mask & (1 << i)) + ++n; + + x86_sub_imm( + func, + x86_make_reg( file_REG32, reg_SP ), + n*16); + + for(i = 0, n = 0; i < 8; ++i) + if(xmm_mask & (1 << i)) { + sse_movups( + func, + x86_make_disp( x86_make_reg( file_REG32, reg_SP ), n*16 ), + make_xmm( i ) ); + ++n; + } + + x86_lea( + func, + ecx, + get_temp( TEMP_R0, 0 ) ); + + x86_push( func, ecx ); + x86_mov_reg_imm( func, ecx, (unsigned long) code ); + x86_call( func, ecx ); + x86_pop(func, ecx ); + + for(i = 0, n = 0; i < 8; ++i) + if(xmm_mask & (1 << i)) { + sse_movups( + func, + make_xmm( i ), + x86_make_disp( x86_make_reg( file_REG32, reg_SP ), n*16 ) ); + ++n; + } + + x86_add_imm( + func, + x86_make_reg( file_REG32, reg_SP ), + n*16); -static void -x86_pop_gp( - struct x86_function *func ) -{ /* Restore GP registers in a reverse order. */ x86_pop( @@ -539,39 +602,6 @@ x86_pop_gp( x86_pop( func, x86_make_reg( file_REG32, reg_AX) ); -} - -static void -emit_func_call_dst( - struct x86_function *func, - unsigned xmm_dst, - void (PIPE_CDECL *code)() ) -{ - sse_movaps( - func, - get_temp( TEMP_R0, 0 ), - make_xmm( xmm_dst ) ); - - emit_push_gp( - func ); - - { - struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX ); - - x86_lea( - func, - ecx, - get_temp( TEMP_R0, 0 ) ); - - x86_push( func, ecx ); - x86_mov_reg_imm( func, ecx, (unsigned long) code ); - x86_call( func, ecx ); - x86_pop(func, ecx ); - } - - - x86_pop_gp( - func ); sse_movaps( func, @@ -582,6 +612,7 @@ emit_func_call_dst( static void emit_func_call_dst_src( struct x86_function *func, + unsigned xmm_save, unsigned xmm_dst, unsigned xmm_src, void (PIPE_CDECL *code)() ) @@ -593,10 +624,119 @@ emit_func_call_dst_src( emit_func_call_dst( func, + xmm_save, xmm_dst, code ); } + +#if defined(PIPE_ARCH_SSE) + +/* + * Fast SSE2 implementation of special math functions. + */ + +#define POLY0(x, c0) _mm_set1_ps(c0) +#define POLY1(x, c0, c1) _mm_add_ps(_mm_mul_ps(POLY0(x, c1), x), _mm_set1_ps(c0)) +#define POLY2(x, c0, c1, c2) _mm_add_ps(_mm_mul_ps(POLY1(x, c1, c2), x), _mm_set1_ps(c0)) +#define POLY3(x, c0, c1, c2, c3) _mm_add_ps(_mm_mul_ps(POLY2(x, c1, c2, c3), x), _mm_set1_ps(c0)) +#define POLY4(x, c0, c1, c2, c3, c4) _mm_add_ps(_mm_mul_ps(POLY3(x, c1, c2, c3, c4), x), _mm_set1_ps(c0)) +#define POLY5(x, c0, c1, c2, c3, c4, c5) _mm_add_ps(_mm_mul_ps(POLY4(x, c1, c2, c3, c4, c5), x), _mm_set1_ps(c0)) + +#define EXP_POLY_DEGREE 3 +#define LOG_POLY_DEGREE 5 + +/** + * See http://www.devmaster.net/forums/showthread.php?p=43580 + */ +static INLINE __m128 +exp2f4(__m128 x) +{ + __m128i ipart; + __m128 fpart, expipart, expfpart; + + x = _mm_min_ps(x, _mm_set1_ps( 129.00000f)); + x = _mm_max_ps(x, _mm_set1_ps(-126.99999f)); + + /* ipart = int(x - 0.5) */ + ipart = _mm_cvtps_epi32(_mm_sub_ps(x, _mm_set1_ps(0.5f))); + + /* fpart = x - ipart */ + fpart = _mm_sub_ps(x, _mm_cvtepi32_ps(ipart)); + + /* expipart = (float) (1 << ipart) */ + expipart = _mm_castsi128_ps(_mm_slli_epi32(_mm_add_epi32(ipart, _mm_set1_epi32(127)), 23)); + + /* minimax polynomial fit of 2**x, in range [-0.5, 0.5[ */ +#if EXP_POLY_DEGREE == 5 + expfpart = POLY5(fpart, 9.9999994e-1f, 6.9315308e-1f, 2.4015361e-1f, 5.5826318e-2f, 8.9893397e-3f, 1.8775767e-3f); +#elif EXP_POLY_DEGREE == 4 + expfpart = POLY4(fpart, 1.0000026f, 6.9300383e-1f, 2.4144275e-1f, 5.2011464e-2f, 1.3534167e-2f); +#elif EXP_POLY_DEGREE == 3 + expfpart = POLY3(fpart, 9.9992520e-1f, 6.9583356e-1f, 2.2606716e-1f, 7.8024521e-2f); +#elif EXP_POLY_DEGREE == 2 + expfpart = POLY2(fpart, 1.0017247f, 6.5763628e-1f, 3.3718944e-1f); +#else +#error +#endif + + return _mm_mul_ps(expipart, expfpart); +} + + +/** + * See http://www.devmaster.net/forums/showthread.php?p=43580 + */ +static INLINE __m128 +log2f4(__m128 x) +{ + __m128i expmask = _mm_set1_epi32(0x7f800000); + __m128i mantmask = _mm_set1_epi32(0x007fffff); + __m128 one = _mm_set1_ps(1.0f); + + __m128i i = _mm_castps_si128(x); + + /* exp = (float) exponent(x) */ + __m128 exp = _mm_cvtepi32_ps(_mm_sub_epi32(_mm_srli_epi32(_mm_and_si128(i, expmask), 23), _mm_set1_epi32(127))); + + /* mant = (float) mantissa(x) */ + __m128 mant = _mm_or_ps(_mm_castsi128_ps(_mm_and_si128(i, mantmask)), one); + + __m128 logmant; + + /* Minimax polynomial fit of log2(x)/(x - 1), for x in range [1, 2[ + * These coefficients can be generate with + * http://www.boost.org/doc/libs/1_36_0/libs/math/doc/sf_and_dist/html/math_toolkit/toolkit/internals2/minimax.html + */ +#if LOG_POLY_DEGREE == 6 + logmant = POLY5(mant, 3.11578814719469302614f, -3.32419399085241980044f, 2.59883907202499966007f, -1.23152682416275988241f, 0.318212422185251071475f, -0.0344359067839062357313f); +#elif LOG_POLY_DEGREE == 5 + logmant = POLY4(mant, 2.8882704548164776201f, -2.52074962577807006663f, 1.48116647521213171641f, -0.465725644288844778798f, 0.0596515482674574969533f); +#elif LOG_POLY_DEGREE == 4 + logmant = POLY3(mant, 2.61761038894603480148f, -1.75647175389045657003f, 0.688243882994381274313f, -0.107254423828329604454f); +#elif LOG_POLY_DEGREE == 3 + logmant = POLY2(mant, 2.28330284476918490682f, -1.04913055217340124191f, 0.204446009836232697516f); +#else +#error +#endif + + /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/ + logmant = _mm_mul_ps(logmant, _mm_sub_ps(mant, one)); + + return _mm_add_ps(logmant, exp); +} + + +static INLINE __m128 +powf4(__m128 x, __m128 y) +{ + return exp2f4(_mm_mul_ps(log2f4(x), y)); +} + +#endif /* PIPE_ARCH_SSE */ + + + /** * Low-level instruction translators. */ @@ -639,38 +779,42 @@ cos4f( static void emit_cos( struct x86_function *func, + unsigned xmm_save, unsigned xmm_dst ) { emit_func_call_dst( func, + xmm_save, xmm_dst, cos4f ); } static void PIPE_CDECL +#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_SSE) +__attribute__((force_align_arg_pointer)) +#endif ex24f( float *store ) { -#if FAST_MATH +#if defined(PIPE_ARCH_SSE) + _mm_store_ps(&store[0], exp2f4( _mm_load_ps(&store[0]) )); +#else store[0] = util_fast_exp2( store[0] ); store[1] = util_fast_exp2( store[1] ); store[2] = util_fast_exp2( store[2] ); store[3] = util_fast_exp2( store[3] ); -#else - store[0] = powf( 2.0f, store[0] ); - store[1] = powf( 2.0f, store[1] ); - store[2] = powf( 2.0f, store[2] ); - store[3] = powf( 2.0f, store[3] ); #endif } static void emit_ex2( struct x86_function *func, + unsigned xmm_save, unsigned xmm_dst ) { emit_func_call_dst( func, + xmm_save, xmm_dst, ex24f ); } @@ -710,10 +854,12 @@ flr4f( static void emit_flr( struct x86_function *func, + unsigned xmm_save, unsigned xmm_dst ) { emit_func_call_dst( func, + xmm_save, xmm_dst, flr4f ); } @@ -731,31 +877,42 @@ frc4f( static void emit_frc( struct x86_function *func, + unsigned xmm_save, unsigned xmm_dst ) { emit_func_call_dst( func, + xmm_save, xmm_dst, frc4f ); } static void PIPE_CDECL +#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_SSE) +__attribute__((force_align_arg_pointer)) +#endif lg24f( float *store ) { +#if defined(PIPE_ARCH_SSE) + _mm_store_ps(&store[0], log2f4( _mm_load_ps(&store[0]) )); +#else store[0] = util_fast_log2( store[0] ); store[1] = util_fast_log2( store[1] ); store[2] = util_fast_log2( store[2] ); store[3] = util_fast_log2( store[3] ); +#endif } static void emit_lg2( struct x86_function *func, + unsigned xmm_save, unsigned xmm_dst ) { emit_func_call_dst( func, + xmm_save, xmm_dst, lg24f ); } @@ -797,30 +954,32 @@ emit_neg( } static void PIPE_CDECL +#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_SSE) +__attribute__((force_align_arg_pointer)) +#endif pow4f( float *store ) { -#if FAST_MATH +#if defined(PIPE_ARCH_SSE) + _mm_store_ps(&store[0], powf4( _mm_load_ps(&store[0]), _mm_load_ps(&store[4]) )); +#else store[0] = util_fast_pow( store[0], store[4] ); store[1] = util_fast_pow( store[1], store[5] ); store[2] = util_fast_pow( store[2], store[6] ); store[3] = util_fast_pow( store[3], store[7] ); -#else - store[0] = powf( store[0], store[4] ); - store[1] = powf( store[1], store[5] ); - store[2] = powf( store[2], store[6] ); - store[3] = powf( store[3], store[7] ); #endif } static void emit_pow( struct x86_function *func, + unsigned xmm_save, unsigned xmm_dst, unsigned xmm_src ) { emit_func_call_dst_src( func, + xmm_save, xmm_dst, xmm_src, pow4f ); @@ -855,10 +1014,12 @@ rnd4f( static void emit_rnd( struct x86_function *func, + unsigned xmm_save, unsigned xmm_dst ) { emit_func_call_dst( func, + xmm_save, xmm_dst, rnd4f ); } @@ -935,10 +1096,12 @@ sgn4f( static void emit_sgn( struct x86_function *func, + unsigned xmm_save, unsigned xmm_dst ) { emit_func_call_dst( func, + xmm_save, xmm_dst, sgn4f ); } @@ -955,10 +1118,12 @@ sin4f( static void emit_sin (struct x86_function *func, + unsigned xmm_save, unsigned xmm_dst) { emit_func_call_dst( func, + xmm_save, xmm_dst, sin4f ); } @@ -1378,7 +1543,7 @@ emit_instruction( get_temp( TGSI_EXEC_TEMP_MINUS_128_I, TGSI_EXEC_TEMP_MINUS_128_C ) ); - emit_pow( func, 1, 2 ); + emit_pow( func, 3, 1, 2 ); FETCH( func, *inst, 0, 0, CHAN_X ); sse_xorps( func, @@ -1410,6 +1575,7 @@ emit_instruction( case TGSI_OPCODE_RSQ: /* TGSI_OPCODE_RECIPSQRT */ FETCH( func, *inst, 0, 0, CHAN_X ); + emit_abs( func, 0 ); emit_rsqrt( func, 1, 0 ); FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { STORE( func, *inst, 1, 0, chan_index ); @@ -1424,11 +1590,11 @@ emit_instruction( if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) { emit_MOV( func, 1, 0 ); - emit_flr( func, 1 ); + emit_flr( func, 2, 1 ); /* dst.x = ex2(floor(src.x)) */ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) { emit_MOV( func, 2, 1 ); - emit_ex2( func, 2 ); + emit_ex2( func, 3, 2 ); STORE( func, *inst, 2, 0, CHAN_X ); } /* dst.y = src.x - floor(src.x) */ @@ -1440,7 +1606,7 @@ emit_instruction( } /* dst.z = ex2(src.x) */ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) { - emit_ex2( func, 0 ); + emit_ex2( func, 3, 0 ); STORE( func, *inst, 0, 0, CHAN_Z ); } } @@ -1458,21 +1624,21 @@ emit_instruction( FETCH( func, *inst, 0, 0, CHAN_X ); emit_abs( func, 0 ); emit_MOV( func, 1, 0 ); - emit_lg2( func, 1 ); + emit_lg2( func, 2, 1 ); /* dst.z = lg2(abs(src.x)) */ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) { STORE( func, *inst, 1, 0, CHAN_Z ); } if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) { - emit_flr( func, 1 ); + emit_flr( func, 2, 1 ); /* dst.x = floor(lg2(abs(src.x))) */ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) { STORE( func, *inst, 1, 0, CHAN_X ); } /* dst.x = abs(src)/ex2(floor(lg2(abs(src.x)))) */ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) { - emit_ex2( func, 1 ); + emit_ex2( func, 2, 1 ); emit_rcp( func, 1, 1 ); emit_mul( func, 0, 1 ); STORE( func, *inst, 0, 0, CHAN_Y ); @@ -1647,7 +1813,18 @@ emit_instruction( case TGSI_OPCODE_DOT2ADD: /* TGSI_OPCODE_DP2A */ - return 0; + FETCH( func, *inst, 0, 0, CHAN_X ); /* xmm0 = src[0].x */ + FETCH( func, *inst, 1, 1, CHAN_X ); /* xmm1 = src[1].x */ + emit_mul( func, 0, 1 ); /* xmm0 = xmm0 * xmm1 */ + FETCH( func, *inst, 1, 0, CHAN_Y ); /* xmm1 = src[0].y */ + FETCH( func, *inst, 2, 1, CHAN_Y ); /* xmm2 = src[1].y */ + emit_mul( func, 1, 2 ); /* xmm1 = xmm1 * xmm2 */ + emit_add( func, 0, 1 ); /* xmm0 = xmm0 + xmm1 */ + FETCH( func, *inst, 1, 2, CHAN_X ); /* xmm1 = src[2].x */ + emit_add( func, 0, 1 ); /* xmm0 = xmm0 + xmm1 */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); /* dest[ch] = xmm0 */ + } break; case TGSI_OPCODE_INDEX: @@ -1662,7 +1839,7 @@ emit_instruction( /* TGSI_OPCODE_FRC */ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( func, *inst, 0, 0, chan_index ); - emit_frc( func, 0 ); + emit_frc( func, 0, 0 ); STORE( func, *inst, 0, 0, chan_index ); } break; @@ -1675,7 +1852,7 @@ emit_instruction( /* TGSI_OPCODE_FLR */ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( func, *inst, 0, 0, chan_index ); - emit_flr( func, 0 ); + emit_flr( func, 0, 0 ); STORE( func, *inst, 0, 0, chan_index ); } break; @@ -1683,7 +1860,7 @@ emit_instruction( case TGSI_OPCODE_ROUND: FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( func, *inst, 0, 0, chan_index ); - emit_rnd( func, 0 ); + emit_rnd( func, 0, 0 ); STORE( func, *inst, 0, 0, chan_index ); } break; @@ -1691,7 +1868,7 @@ emit_instruction( case TGSI_OPCODE_EXPBASE2: /* TGSI_OPCODE_EX2 */ FETCH( func, *inst, 0, 0, CHAN_X ); - emit_ex2( func, 0 ); + emit_ex2( func, 0, 0 ); FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { STORE( func, *inst, 0, 0, chan_index ); } @@ -1700,7 +1877,7 @@ emit_instruction( case TGSI_OPCODE_LOGBASE2: /* TGSI_OPCODE_LG2 */ FETCH( func, *inst, 0, 0, CHAN_X ); - emit_lg2( func, 0 ); + emit_lg2( func, 0, 0 ); FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { STORE( func, *inst, 0, 0, chan_index ); } @@ -1710,7 +1887,7 @@ emit_instruction( /* TGSI_OPCODE_POW */ FETCH( func, *inst, 0, 0, CHAN_X ); FETCH( func, *inst, 1, 1, CHAN_X ); - emit_pow( func, 0, 1 ); + emit_pow( func, 0, 0, 1 ); FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { STORE( func, *inst, 0, 0, chan_index ); } @@ -1801,7 +1978,7 @@ emit_instruction( case TGSI_OPCODE_COS: FETCH( func, *inst, 0, 0, CHAN_X ); - emit_cos( func, 0 ); + emit_cos( func, 0, 0 ); FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { STORE( func, *inst, 0, 0, chan_index ); } @@ -1860,7 +2037,7 @@ emit_instruction( case TGSI_OPCODE_SIN: FETCH( func, *inst, 0, 0, CHAN_X ); - emit_sin( func, 0 ); + emit_sin( func, 0, 0 ); FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { STORE( func, *inst, 0, 0, chan_index ); } @@ -1927,7 +2104,7 @@ emit_instruction( case TGSI_OPCODE_ARR: FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( func, *inst, 0, 0, chan_index ); - emit_rnd( func, 0 ); + emit_rnd( func, 0, 0 ); emit_f2it( func, 0 ); STORE( func, *inst, 0, 0, chan_index ); } @@ -1952,7 +2129,7 @@ emit_instruction( /* TGSI_OPCODE_SGN */ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( func, *inst, 0, 0, chan_index ); - emit_sgn( func, 0 ); + emit_sgn( func, 0, 0 ); STORE( func, *inst, 0, 0, chan_index ); } break; @@ -1964,12 +2141,12 @@ emit_instruction( case TGSI_OPCODE_SCS: IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { FETCH( func, *inst, 0, 0, CHAN_X ); - emit_cos( func, 0 ); + emit_cos( func, 0, 0 ); STORE( func, *inst, 0, 0, CHAN_X ); } IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { FETCH( func, *inst, 0, 0, CHAN_X ); - emit_sin( func, 0 ); + emit_sin( func, 0, 0 ); STORE( func, *inst, 0, 0, CHAN_Y ); } IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { @@ -1995,7 +2172,39 @@ emit_instruction( break; case TGSI_OPCODE_NRM: - return 0; + /* fall-through */ + case TGSI_OPCODE_NRM4: + /* 3 or 4-component normalization */ + { + uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4; + /* note: cannot use xmm regs 2/3 here (see emit_rsqrt() above) */ + FETCH( func, *inst, 4, 0, CHAN_X ); /* xmm4 = src[0].x */ + FETCH( func, *inst, 5, 0, CHAN_Y ); /* xmm5 = src[0].y */ + FETCH( func, *inst, 6, 0, CHAN_Z ); /* xmm6 = src[0].z */ + if (dims == 4) { + FETCH( func, *inst, 7, 0, CHAN_W ); /* xmm7 = src[0].w */ + } + emit_MOV( func, 0, 4 ); /* xmm0 = xmm3 */ + emit_mul( func, 0, 4 ); /* xmm0 *= xmm3 */ + emit_MOV( func, 1, 5 ); /* xmm1 = xmm4 */ + emit_mul( func, 1, 5 ); /* xmm1 *= xmm4 */ + emit_add( func, 0, 1 ); /* xmm0 += xmm1 */ + emit_MOV( func, 1, 6 ); /* xmm1 = xmm5 */ + emit_mul( func, 1, 6 ); /* xmm1 *= xmm5 */ + emit_add( func, 0, 1 ); /* xmm0 += xmm1 */ + if (dims == 4) { + emit_MOV( func, 1, 7 ); /* xmm1 = xmm7 */ + emit_mul( func, 1, 7 ); /* xmm1 *= xmm7 */ + emit_add( func, 0, 0 ); /* xmm0 += xmm1 */ + } + emit_rsqrt( func, 1, 0 ); /* xmm1 = 1/sqrt(xmm0) */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + if (chan_index < dims) { + emit_mul( func, 4+chan_index, 1); /* xmm[4+ch] *= xmm1 */ + STORE( func, *inst, 4+chan_index, 0, chan_index ); + } + } + } break; case TGSI_OPCODE_DIV: @@ -2003,7 +2212,16 @@ emit_instruction( break; case TGSI_OPCODE_DP2: - return 0; + FETCH( func, *inst, 0, 0, CHAN_X ); /* xmm0 = src[0].x */ + FETCH( func, *inst, 1, 1, CHAN_X ); /* xmm1 = src[1].x */ + emit_mul( func, 0, 1 ); /* xmm0 = xmm0 * xmm1 */ + FETCH( func, *inst, 1, 0, CHAN_Y ); /* xmm1 = src[0].y */ + FETCH( func, *inst, 2, 1, CHAN_Y ); /* xmm2 = src[1].y */ + emit_mul( func, 1, 2 ); /* xmm1 = xmm1 * xmm2 */ + emit_add( func, 0, 1 ); /* xmm0 = xmm0 + xmm1 */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); /* dest[ch] = xmm0 */ + } break; case TGSI_OPCODE_TXL: @@ -2454,7 +2672,7 @@ tgsi_emit_sse2( case TGSI_TOKEN_TYPE_IMMEDIATE: /* simply copy the immediate values into the next immediates[] slot */ { - const uint size = parse.FullToken.FullImmediate.Immediate.Size - 1; + const uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; uint i; assert(size <= 4); assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES); diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index 9454563361..58fe07c11d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "tgsi_text.h" #include "tgsi_build.h" #include "tgsi_info.h" @@ -1023,7 +1023,7 @@ static boolean parse_immediate( struct translate_ctx *ctx ) ctx->cur++; imm = tgsi_default_full_immediate(); - imm.Immediate.Size += 4; + imm.Immediate.NrTokens += 4; imm.Immediate.DataType = TGSI_IMM_FLOAT32; imm.u.Pointer = values; diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.c b/src/gallium/auxiliary/tgsi/tgsi_transform.c index ea87da31e5..062c1be938 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_transform.c +++ b/src/gallium/auxiliary/tgsi/tgsi_transform.c @@ -31,7 +31,7 @@ * Authors: Brian Paul */ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "tgsi_transform.h" diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c index 50101a9bb0..71f8a6ca40 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_util.c +++ b/src/gallium/auxiliary/tgsi/tgsi_util.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "pipe/p_shader_tokens.h" #include "tgsi_parse.h" #include "tgsi_build.h" diff --git a/src/gallium/auxiliary/translate/Makefile b/src/gallium/auxiliary/translate/Makefile index ad2a5b705e..3c82f8ae03 100644 --- a/src/gallium/auxiliary/translate/Makefile +++ b/src/gallium/auxiliary/translate/Makefile @@ -10,6 +10,3 @@ C_SOURCES = \ translate_cache.c include ../../Makefile.template - -symlinks: - diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile index 5c227c1eb5..160df8dfa7 100644 --- a/src/gallium/auxiliary/util/Makefile +++ b/src/gallium/auxiliary/util/Makefile @@ -4,7 +4,7 @@ include $(TOP)/configs/current LIBNAME = util C_SOURCES = \ - p_debug.c \ + u_debug.c \ u_blit.c \ u_cache.c \ u_draw_quad.c \ @@ -12,6 +12,8 @@ C_SOURCES = \ u_handle_table.c \ u_hash_table.c \ u_hash.c \ + u_keymap.c \ + u_linear.c \ u_math.c \ u_mm.c \ u_rect.c \ @@ -21,9 +23,7 @@ C_SOURCES = \ u_stream_wd.c \ u_tile.c \ u_time.c \ - u_timed_winsys.c + u_timed_winsys.c \ + u_simple_screen.c include ../../Makefile.template - -symlinks: - diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript index 1ef06631bf..5d336ea082 100644 --- a/src/gallium/auxiliary/util/SConscript +++ b/src/gallium/auxiliary/util/SConscript @@ -3,25 +3,29 @@ Import('*') util = env.ConvenienceLibrary( target = 'util', source = [ - 'p_debug.c', - 'p_debug_mem.c', - 'p_debug_prof.c', 'u_blit.c', 'u_cache.c', + 'u_debug.c', + 'u_debug_memory.c', + 'u_debug_profile.c', + 'u_debug_stack.c', 'u_draw_quad.c', 'u_gen_mipmap.c', 'u_handle_table.c', 'u_hash.c', 'u_hash_table.c', + 'u_keymap.c', 'u_math.c', 'u_mm.c', 'u_rect.c', 'u_simple_shaders.c', 'u_snprintf.c', - 'u_stream_stdc.c', - 'u_stream_wd.c', + 'u_stream_stdc.c', + 'u_stream_wd.c', 'u_tile.c', 'u_time.c', + 'u_timed_winsys.c', + 'u_simple_screen.c', ]) auxiliaries.insert(0, util) diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index 2cef3338b5..efc3a874cc 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -34,10 +34,9 @@ #include "pipe/p_context.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_winsys.h" #include "pipe/p_shader_tokens.h" #include "util/u_blit.h" @@ -415,7 +414,7 @@ util_blit_pixels(struct blit_state *ctx, memset(&fb, 0, sizeof(fb)); fb.width = dst->width; fb.height = dst->height; - fb.num_cbufs = 1; + fb.nr_cbufs = 1; fb.cbufs[0] = dst; cso_set_framebuffer(ctx->cso, &fb); @@ -526,7 +525,7 @@ util_blit_pixels_tex(struct blit_state *ctx, memset(&fb, 0, sizeof(fb)); fb.width = dst->width; fb.height = dst->height; - fb.num_cbufs = 1; + fb.nr_cbufs = 1; fb.cbufs[0] = dst; cso_set_framebuffer(ctx->cso, &fb); diff --git a/src/gallium/auxiliary/util/u_cache.c b/src/gallium/auxiliary/util/u_cache.c index 0a1a64259f..41cd38171f 100644 --- a/src/gallium/auxiliary/util/u_cache.c +++ b/src/gallium/auxiliary/util/u_cache.c @@ -36,7 +36,7 @@ #include "pipe/p_compiler.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "util/u_math.h" #include "util/u_memory.h" diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/u_debug.c index 125f3daf00..e05c419b2f 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/u_debug.c @@ -1,6 +1,7 @@ /************************************************************************** * * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright (c) 2008 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -36,12 +37,20 @@ #include <windows.h> #include <winddi.h> +#elif defined(PIPE_SUBSYSTEM_WINDOWS_CE) + +#include <stdio.h> +#include <stdlib.h> +#include <windows.h> +#include <types.h> + #elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers #endif #include <windows.h> +#include <stdio.h> #else @@ -51,7 +60,7 @@ #endif #include "pipe/p_compiler.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "pipe/p_format.h" #include "pipe/p_state.h" #include "pipe/p_inlines.h" @@ -98,7 +107,41 @@ void _debug_vprintf(const char *format, va_list ap) OutputDebugStringA(buf); buf[0] = '\0'; } -#elif defined(PIPE_SUBSYSTEM_WINDOWS_CE) || defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) + + if(GetConsoleWindow() && !IsDebuggerPresent()) { + vfprintf(stderr, format, ap); + fflush(stderr); + } + +#elif defined(PIPE_SUBSYSTEM_WINDOWS_CE) + wchar_t *wide_format; + long wide_str_len; + char buf[512]; + int ret; +#if (_WIN32_WCE < 600) + ret = vsprintf(buf, format, ap); + if(ret < 0){ + sprintf(buf, "Cant handle debug print!"); + ret = 25; + } +#else + ret = vsprintf_s(buf, 512, format, ap); + if(ret < 0){ + sprintf_s(buf, 512, "Cant handle debug print!"); + ret = 25; + } +#endif + buf[ret] = '\0'; + /* Format is ascii - needs to be converted to wchar_t for printing */ + wide_str_len = MultiByteToWideChar(CP_ACP, 0, (const char *) buf, -1, NULL, 0); + wide_format = (wchar_t *) malloc((wide_str_len+1) * sizeof(wchar_t)); + if (wide_format) { + MultiByteToWideChar(CP_ACP, 0, (const char *) buf, -1, + wide_format, wide_str_len); + NKDbgPrintfW(wide_format, wide_format); + free(wide_format); + } +#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) /* TODO */ #else /* !PIPE_SUBSYSTEM_WINDOWS */ #ifdef DEBUG @@ -308,6 +351,13 @@ debug_get_flags_option(const char *name, str = _debug_get_option(name); if(!str) result = dfault; + else if (!util_strcmp(str, "help")) { + result = dfault; + while (flags->name) { + debug_printf("%s: help for %s: %s [0x%lx]\n", __FUNCTION__, name, flags->name, flags->value); + flags++; + } + } else { result = 0; while( flags->name ) { @@ -317,7 +367,12 @@ debug_get_flags_option(const char *name, } } - debug_printf("%s: %s = 0x%lx\n", __FUNCTION__, name, result); + if (str) { + debug_printf("%s: %s = 0x%lx (%s)\n", __FUNCTION__, name, result, str); + } + else { + debug_printf("%s: %s = 0x%lx\n", __FUNCTION__, name, result); + } return result; } @@ -358,6 +413,32 @@ debug_dump_enum(const struct debug_named_value *names, const char * +debug_dump_enum_noprefix(const struct debug_named_value *names, + const char *prefix, + unsigned long value) +{ + static char rest[64]; + + while(names->name) { + if(names->value == value) { + const char *name = names->name; + while (*name == *prefix) { + name++; + prefix++; + } + return name; + } + ++names; + } + + + + util_snprintf(rest, sizeof(rest), "0x%08lx", value); + return rest; +} + + +const char * debug_dump_flags(const struct debug_named_value *names, unsigned long value) { @@ -486,16 +567,24 @@ static const struct debug_named_value pipe_format_names[] = { DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8A8_SSCALED), DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8X8_SSCALED), DEBUG_NAMED_VALUE(PIPE_FORMAT_L8_SRGB), - DEBUG_NAMED_VALUE(PIPE_FORMAT_A8_L8_SRGB), + DEBUG_NAMED_VALUE(PIPE_FORMAT_A8L8_SRGB), DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8_SRGB), DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8A8_SRGB), DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8X8_SRGB), + DEBUG_NAMED_VALUE(PIPE_FORMAT_A8R8G8B8_SRGB), + DEBUG_NAMED_VALUE(PIPE_FORMAT_X8R8G8B8_SRGB), + DEBUG_NAMED_VALUE(PIPE_FORMAT_B8G8R8A8_SRGB), + DEBUG_NAMED_VALUE(PIPE_FORMAT_B8G8R8X8_SRGB), DEBUG_NAMED_VALUE(PIPE_FORMAT_X8UB8UG8SR8S_NORM), DEBUG_NAMED_VALUE(PIPE_FORMAT_B6UG5SR5S_NORM), DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT1_RGB), DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT1_RGBA), DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT3_RGBA), DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT5_RGBA), + DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT1_SRGB), + DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT1_SRGBA), + DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT3_SRGBA), + DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT5_SRGBA), #endif DEBUG_NAMED_VALUE_END }; @@ -561,34 +650,37 @@ void debug_dump_image(const char *prefix, void debug_dump_surface(const char *prefix, struct pipe_surface *surface) { - unsigned surface_usage; + struct pipe_texture *texture; + struct pipe_screen *screen; + struct pipe_transfer *transfer; void *data; if (!surface) - goto error1; + return; + + texture = surface->texture; + screen = texture->screen; - /* XXX: force mappable surface */ - surface_usage = surface->usage; - surface->usage |= PIPE_BUFFER_USAGE_CPU_READ; + transfer = screen->get_tex_transfer(screen, texture, surface->face, + surface->level, surface->zslice, + PIPE_TRANSFER_READ, 0, 0, surface->width, + surface->height); - data = pipe_surface_map(surface, - PIPE_BUFFER_USAGE_CPU_READ); + data = screen->transfer_map(screen, transfer); if(!data) - goto error2; + goto error; debug_dump_image(prefix, - surface->format, - surface->block.size, - surface->nblocksx, - surface->nblocksy, - surface->stride, + transfer->format, + transfer->block.size, + transfer->nblocksx, + transfer->nblocksy, + transfer->stride, data); - pipe_surface_unmap(surface); -error2: - surface->usage = surface_usage; -error1: - ; + screen->transfer_unmap(screen, transfer); +error: + screen->tex_transfer_release(screen, &transfer); } @@ -627,8 +719,11 @@ void debug_dump_surface_bmp(const char *filename, struct pipe_surface *surface) { +#ifndef PIPE_SUBSYSTEM_WINDOWS_MINIPORT + struct pipe_texture *texture; + struct pipe_screen *screen; struct util_stream *stream; - unsigned surface_usage; + struct pipe_transfer *transfer; struct bmp_file_header bmfh; struct bmp_info_header bmih; float *rgba; @@ -665,14 +760,18 @@ debug_dump_surface_bmp(const char *filename, util_stream_write(stream, &bmfh, 14); util_stream_write(stream, &bmih, 40); + + texture = surface->texture; + screen = texture->screen; - /* XXX: force mappable surface */ - surface_usage = surface->usage; - surface->usage |= PIPE_BUFFER_USAGE_CPU_READ; + transfer = screen->get_tex_transfer(screen, texture, surface->face, + surface->level, surface->zslice, + PIPE_TRANSFER_READ, 0, 0, surface->width, + surface->height); y = surface->height; while(y--) { - pipe_get_tile_rgba(surface, + pipe_get_tile_rgba(transfer, 0, y, surface->width, 1, rgba); for(x = 0; x < surface->width; ++x) @@ -685,14 +784,15 @@ debug_dump_surface_bmp(const char *filename, util_stream_write(stream, &pixel, 4); } } - - surface->usage = surface_usage; + screen->tex_transfer_release(screen, &transfer); + util_stream_close(stream); error2: FREE(rgba); error1: ; +#endif } #endif diff --git a/src/gallium/include/pipe/p_debug.h b/src/gallium/auxiliary/util/u_debug.h index 3b00fb9aa8..b298b9b66d 100644 --- a/src/gallium/include/pipe/p_debug.h +++ b/src/gallium/auxiliary/util/u_debug.h @@ -35,13 +35,13 @@ * @author Jose Fonseca <jrfonseca@tungstengraphics.com> */ -#ifndef P_DEBUG_H_ -#define P_DEBUG_H_ +#ifndef U_DEBUG_H_ +#define U_DEBUG_H_ #include <stdarg.h> -#include "p_compiler.h" +#include "pipe/p_compiler.h" #ifdef __cplusplus @@ -261,6 +261,11 @@ const char * debug_dump_enum(const struct debug_named_value *names, unsigned long value); +const char * +debug_dump_enum_noprefix(const struct debug_named_value *names, + const char *prefix, + unsigned long value); + /** * Convert binary flags value to a string. @@ -353,4 +358,4 @@ void debug_dump_surface_bmp(const char *filename, } #endif -#endif /* P_DEBUG_H_ */ +#endif /* U_DEBUG_H_ */ diff --git a/src/gallium/auxiliary/util/p_debug_mem.c b/src/gallium/auxiliary/util/u_debug_memory.c index 250fd60f63..758541c282 100644 --- a/src/gallium/auxiliary/util/p_debug_mem.c +++ b/src/gallium/auxiliary/util/u_debug_memory.c @@ -44,11 +44,13 @@ #include <stdlib.h> #endif -#include "pipe/p_debug.h" +#include "util/u_debug.h" +#include "util/u_debug_stack.h" #include "util/u_double_list.h" #define DEBUG_MEMORY_MAGIC 0x6e34090aU +#define DEBUG_MEMORY_STACK 0 /* XXX: disabled until we have symbol lookup */ #if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) && !defined(WINCE) @@ -71,7 +73,9 @@ struct debug_memory_header const char *file; unsigned line; const char *function; + struct debug_stack_frame backtrace[DEBUG_MEMORY_STACK]; size_t size; + unsigned magic; }; @@ -136,6 +140,8 @@ debug_malloc(const char *file, unsigned line, const char *function, hdr->size = size; hdr->magic = DEBUG_MEMORY_MAGIC; + debug_backtrace_capture(hdr->backtrace, 0, DEBUG_MEMORY_STACK); + ftr = footer_from_header(hdr); ftr->magic = DEBUG_MEMORY_MAGIC; @@ -290,6 +296,7 @@ debug_memory_end(unsigned long start_no) debug_printf("%s:%u:%s: %u bytes at %p not freed\n", hdr->file, hdr->line, hdr->function, hdr->size, ptr); + debug_backtrace_dump(hdr->backtrace, DEBUG_MEMORY_STACK); total_size += hdr->size; } diff --git a/src/gallium/auxiliary/util/p_debug_prof.c b/src/gallium/auxiliary/util/u_debug_profile.c index 5f9772ef91..6d8b244c3a 100644 --- a/src/gallium/auxiliary/util/p_debug_prof.c +++ b/src/gallium/auxiliary/util/u_debug_profile.c @@ -42,7 +42,7 @@ #include <windows.h> #include <winddi.h> -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "util/u_string.h" diff --git a/src/gallium/auxiliary/util/u_debug_stack.c b/src/gallium/auxiliary/util/u_debug_stack.c new file mode 100644 index 0000000000..76068a6509 --- /dev/null +++ b/src/gallium/auxiliary/util/u_debug_stack.c @@ -0,0 +1,97 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Stack backtracing. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + +#include "u_debug.h" +#include "u_debug_stack.h" + + +void +debug_backtrace_capture(struct debug_stack_frame *backtrace, + unsigned start_frame, + unsigned nr_frames) +{ + const void **frame_pointer = NULL; + unsigned i = 0; + + if(!nr_frames) + return; + +#if defined(PIPE_CC_GCC) + frame_pointer = ((const void **)__builtin_frame_address(1)); +#elif defined(PIPE_CC_MSVC) + __asm { + mov frame_pointer, ebp + } + frame_pointer = (const void **)frame_pointer[0]; +#else + frame_pointer = NULL; +#endif + + +#ifdef PIPE_ARCH_X86 + while(nr_frames) { + if(!frame_pointer) + break; + + if(start_frame) + --start_frame; + else { + backtrace[i++].function = frame_pointer[1]; + --nr_frames; + } + + frame_pointer = (const void **)frame_pointer[0]; + } +#endif + + while(nr_frames) { + backtrace[i++].function = NULL; + --nr_frames; + } +} + + +void +debug_backtrace_dump(const struct debug_stack_frame *backtrace, + unsigned nr_frames) +{ + unsigned i; + + for(i = 0; i < nr_frames; ++i) { + if(!backtrace[i].function) + break; + debug_printf("\t%p\n", backtrace[i].function); + } +} + diff --git a/src/gallium/auxiliary/util/u_debug_stack.h b/src/gallium/auxiliary/util/u_debug_stack.h new file mode 100644 index 0000000000..f50f04e0f7 --- /dev/null +++ b/src/gallium/auxiliary/util/u_debug_stack.h @@ -0,0 +1,65 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_DEBUG_STACK_H_ +#define U_DEBUG_STACK_H_ + + +/** + * @file + * Stack backtracing. + * + * @author Jose Fonseca <jfonseca@vmware.com> + */ + + +#ifdef __cplusplus +extern "C" { +#endif + + +struct debug_stack_frame +{ + const void *function; +}; + + +void +debug_backtrace_capture(struct debug_stack_frame *backtrace, + unsigned start_frame, + unsigned nr_frames); + +void +debug_backtrace_dump(const struct debug_stack_frame *backtrace, + unsigned nr_frames); + + +#ifdef __cplusplus +} +#endif + +#endif /* U_DEBUG_STACK_H_ */ diff --git a/src/gallium/auxiliary/util/u_draw_quad.c b/src/gallium/auxiliary/util/u_draw_quad.c index d7bb74b87b..f282f3d289 100644 --- a/src/gallium/auxiliary/util/u_draw_quad.c +++ b/src/gallium/auxiliary/util/u_draw_quad.c @@ -29,7 +29,6 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_winsys.h" #include "util/u_draw_quad.h" @@ -53,7 +52,7 @@ util_draw_vertex_buffer(struct pipe_context *pipe, /* tell pipe about the vertex buffer */ vbuffer.buffer = vbuf; - vbuffer.pitch = num_attribs * 4 * sizeof(float); /* vertex size */ + vbuffer.stride = num_attribs * 4 * sizeof(float); /* vertex size */ vbuffer.buffer_offset = offset; pipe->set_vertex_buffers(pipe, 1, &vbuffer); diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index b0de5968e9..90483fcb21 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -2,6 +2,7 @@ * * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. + * Copyright 2008 VMware, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -34,10 +35,9 @@ #include "pipe/p_context.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_winsys.h" #include "pipe/p_shader_tokens.h" #include "util/u_memory.h" @@ -93,13 +93,82 @@ enum dtype typedef ushort half_float; -#if 0 -extern half_float -float_to_half(float f); +static half_float +float_to_half(float f) +{ + /* XXX fix this */ + return 0; +} + +static float +half_to_float(half_float h) +{ + /* XXX fix this */ + return 0.0f; +} + -extern float -half_to_float(half_float h); -#endif + + +/** + * \name Support macros for do_row and do_row_3d + * + * The macro madness is here for two reasons. First, it compacts the code + * slightly. Second, it makes it much easier to adjust the specifics of the + * filter to tune the rounding characteristics. + */ +/*@{*/ +#define DECLARE_ROW_POINTERS(t, e) \ + const t(*rowA)[e] = (const t(*)[e]) srcRowA; \ + const t(*rowB)[e] = (const t(*)[e]) srcRowB; \ + const t(*rowC)[e] = (const t(*)[e]) srcRowC; \ + const t(*rowD)[e] = (const t(*)[e]) srcRowD; \ + t(*dst)[e] = (t(*)[e]) dstRow + +#define DECLARE_ROW_POINTERS0(t) \ + const t *rowA = (const t *) srcRowA; \ + const t *rowB = (const t *) srcRowB; \ + const t *rowC = (const t *) srcRowC; \ + const t *rowD = (const t *) srcRowD; \ + t *dst = (t *) dstRow + +#define FILTER_SUM_3D(Aj, Ak, Bj, Bk, Cj, Ck, Dj, Dk) \ + ((unsigned) Aj + (unsigned) Ak \ + + (unsigned) Bj + (unsigned) Bk \ + + (unsigned) Cj + (unsigned) Ck \ + + (unsigned) Dj + (unsigned) Dk \ + + 4) >> 3 + +#define FILTER_3D(e) \ + do { \ + dst[i][e] = FILTER_SUM_3D(rowA[j][e], rowA[k][e], \ + rowB[j][e], rowB[k][e], \ + rowC[j][e], rowC[k][e], \ + rowD[j][e], rowD[k][e]); \ + } while(0) + +#define FILTER_F_3D(e) \ + do { \ + dst[i][e] = (rowA[j][e] + rowA[k][e] \ + + rowB[j][e] + rowB[k][e] \ + + rowC[j][e] + rowC[k][e] \ + + rowD[j][e] + rowD[k][e]) * 0.125F; \ + } while(0) + +#define FILTER_HF_3D(e) \ + do { \ + const float aj = half_to_float(rowA[j][e]); \ + const float ak = half_to_float(rowA[k][e]); \ + const float bj = half_to_float(rowB[j][e]); \ + const float bk = half_to_float(rowB[k][e]); \ + const float cj = half_to_float(rowC[j][e]); \ + const float ck = half_to_float(rowC[k][e]); \ + const float dj = half_to_float(rowD[j][e]); \ + const float dk = half_to_float(rowD[k][e]); \ + dst[i][e] = float_to_half((aj + ak + bj + bk + cj + ck + dj + dk) \ + * 0.125F); \ + } while(0) +/*@}*/ /** @@ -471,6 +540,385 @@ do_row(enum dtype datatype, uint comps, int srcWidth, } +/** + * Average together four rows of a source image to produce a single new + * row in the dest image. It's legal for the two source rows to point + * to the same data. The source width must be equal to either the + * dest width or two times the dest width. + * + * \param datatype GL pixel type \c GL_UNSIGNED_BYTE, \c GL_UNSIGNED_SHORT, + * \c GL_FLOAT, etc. + * \param comps number of components per pixel (1..4) + * \param srcWidth Width of a row in the source data + * \param srcRowA Pointer to one of the rows of source data + * \param srcRowB Pointer to one of the rows of source data + * \param srcRowC Pointer to one of the rows of source data + * \param srcRowD Pointer to one of the rows of source data + * \param dstWidth Width of a row in the destination data + * \param srcRowA Pointer to the row of destination data + */ +static void +do_row_3D(enum dtype datatype, uint comps, int srcWidth, + const void *srcRowA, const void *srcRowB, + const void *srcRowC, const void *srcRowD, + int dstWidth, void *dstRow) +{ + const uint k0 = (srcWidth == dstWidth) ? 0 : 1; + const uint colStride = (srcWidth == dstWidth) ? 1 : 2; + uint i, j, k; + + assert(comps >= 1); + assert(comps <= 4); + + if ((datatype == UBYTE) && (comps == 4)) { + DECLARE_ROW_POINTERS(ubyte, 4); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_3D(0); + FILTER_3D(1); + FILTER_3D(2); + FILTER_3D(3); + } + } + else if ((datatype == UBYTE) && (comps == 3)) { + DECLARE_ROW_POINTERS(ubyte, 3); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_3D(0); + FILTER_3D(1); + FILTER_3D(2); + } + } + else if ((datatype == UBYTE) && (comps == 2)) { + DECLARE_ROW_POINTERS(ubyte, 2); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_3D(0); + FILTER_3D(1); + } + } + else if ((datatype == UBYTE) && (comps == 1)) { + DECLARE_ROW_POINTERS(ubyte, 1); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_3D(0); + } + } + else if ((datatype == USHORT) && (comps == 4)) { + DECLARE_ROW_POINTERS(ushort, 4); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_3D(0); + FILTER_3D(1); + FILTER_3D(2); + FILTER_3D(3); + } + } + else if ((datatype == USHORT) && (comps == 3)) { + DECLARE_ROW_POINTERS(ushort, 3); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_3D(0); + FILTER_3D(1); + FILTER_3D(2); + } + } + else if ((datatype == USHORT) && (comps == 2)) { + DECLARE_ROW_POINTERS(ushort, 2); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_3D(0); + FILTER_3D(1); + } + } + else if ((datatype == USHORT) && (comps == 1)) { + DECLARE_ROW_POINTERS(ushort, 1); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_3D(0); + } + } + else if ((datatype == FLOAT) && (comps == 4)) { + DECLARE_ROW_POINTERS(float, 4); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_F_3D(0); + FILTER_F_3D(1); + FILTER_F_3D(2); + FILTER_F_3D(3); + } + } + else if ((datatype == FLOAT) && (comps == 3)) { + DECLARE_ROW_POINTERS(float, 3); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_F_3D(0); + FILTER_F_3D(1); + FILTER_F_3D(2); + } + } + else if ((datatype == FLOAT) && (comps == 2)) { + DECLARE_ROW_POINTERS(float, 2); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_F_3D(0); + FILTER_F_3D(1); + } + } + else if ((datatype == FLOAT) && (comps == 1)) { + DECLARE_ROW_POINTERS(float, 1); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_F_3D(0); + } + } + else if ((datatype == HALF_FLOAT) && (comps == 4)) { + DECLARE_ROW_POINTERS(half_float, 4); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_HF_3D(0); + FILTER_HF_3D(1); + FILTER_HF_3D(2); + FILTER_HF_3D(3); + } + } + else if ((datatype == HALF_FLOAT) && (comps == 3)) { + DECLARE_ROW_POINTERS(half_float, 4); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_HF_3D(0); + FILTER_HF_3D(1); + FILTER_HF_3D(2); + } + } + else if ((datatype == HALF_FLOAT) && (comps == 2)) { + DECLARE_ROW_POINTERS(half_float, 4); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_HF_3D(0); + FILTER_HF_3D(1); + } + } + else if ((datatype == HALF_FLOAT) && (comps == 1)) { + DECLARE_ROW_POINTERS(half_float, 4); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_HF_3D(0); + } + } + else if ((datatype == UINT) && (comps == 1)) { + const uint *rowA = (const uint *) srcRowA; + const uint *rowB = (const uint *) srcRowB; + const uint *rowC = (const uint *) srcRowC; + const uint *rowD = (const uint *) srcRowD; + float *dst = (float *) dstRow; + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + const uint64_t tmp = (((uint64_t) rowA[j] + (uint64_t) rowA[k]) + + ((uint64_t) rowB[j] + (uint64_t) rowB[k]) + + ((uint64_t) rowC[j] + (uint64_t) rowC[k]) + + ((uint64_t) rowD[j] + (uint64_t) rowD[k])); + dst[i] = (float)((double) tmp * 0.125); + } + } + else if ((datatype == USHORT_5_6_5) && (comps == 3)) { + DECLARE_ROW_POINTERS0(ushort); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + const int rowAr0 = rowA[j] & 0x1f; + const int rowAr1 = rowA[k] & 0x1f; + const int rowBr0 = rowB[j] & 0x1f; + const int rowBr1 = rowB[k] & 0x1f; + const int rowCr0 = rowC[j] & 0x1f; + const int rowCr1 = rowC[k] & 0x1f; + const int rowDr0 = rowD[j] & 0x1f; + const int rowDr1 = rowD[k] & 0x1f; + const int rowAg0 = (rowA[j] >> 5) & 0x3f; + const int rowAg1 = (rowA[k] >> 5) & 0x3f; + const int rowBg0 = (rowB[j] >> 5) & 0x3f; + const int rowBg1 = (rowB[k] >> 5) & 0x3f; + const int rowCg0 = (rowC[j] >> 5) & 0x3f; + const int rowCg1 = (rowC[k] >> 5) & 0x3f; + const int rowDg0 = (rowD[j] >> 5) & 0x3f; + const int rowDg1 = (rowD[k] >> 5) & 0x3f; + const int rowAb0 = (rowA[j] >> 11) & 0x1f; + const int rowAb1 = (rowA[k] >> 11) & 0x1f; + const int rowBb0 = (rowB[j] >> 11) & 0x1f; + const int rowBb1 = (rowB[k] >> 11) & 0x1f; + const int rowCb0 = (rowC[j] >> 11) & 0x1f; + const int rowCb1 = (rowC[k] >> 11) & 0x1f; + const int rowDb0 = (rowD[j] >> 11) & 0x1f; + const int rowDb1 = (rowD[k] >> 11) & 0x1f; + const int r = FILTER_SUM_3D(rowAr0, rowAr1, rowBr0, rowBr1, + rowCr0, rowCr1, rowDr0, rowDr1); + const int g = FILTER_SUM_3D(rowAg0, rowAg1, rowBg0, rowBg1, + rowCg0, rowCg1, rowDg0, rowDg1); + const int b = FILTER_SUM_3D(rowAb0, rowAb1, rowBb0, rowBb1, + rowCb0, rowCb1, rowDb0, rowDb1); + dst[i] = (b << 11) | (g << 5) | r; + } + } + else if ((datatype == USHORT_4_4_4_4) && (comps == 4)) { + DECLARE_ROW_POINTERS0(ushort); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + const int rowAr0 = rowA[j] & 0xf; + const int rowAr1 = rowA[k] & 0xf; + const int rowBr0 = rowB[j] & 0xf; + const int rowBr1 = rowB[k] & 0xf; + const int rowCr0 = rowC[j] & 0xf; + const int rowCr1 = rowC[k] & 0xf; + const int rowDr0 = rowD[j] & 0xf; + const int rowDr1 = rowD[k] & 0xf; + const int rowAg0 = (rowA[j] >> 4) & 0xf; + const int rowAg1 = (rowA[k] >> 4) & 0xf; + const int rowBg0 = (rowB[j] >> 4) & 0xf; + const int rowBg1 = (rowB[k] >> 4) & 0xf; + const int rowCg0 = (rowC[j] >> 4) & 0xf; + const int rowCg1 = (rowC[k] >> 4) & 0xf; + const int rowDg0 = (rowD[j] >> 4) & 0xf; + const int rowDg1 = (rowD[k] >> 4) & 0xf; + const int rowAb0 = (rowA[j] >> 8) & 0xf; + const int rowAb1 = (rowA[k] >> 8) & 0xf; + const int rowBb0 = (rowB[j] >> 8) & 0xf; + const int rowBb1 = (rowB[k] >> 8) & 0xf; + const int rowCb0 = (rowC[j] >> 8) & 0xf; + const int rowCb1 = (rowC[k] >> 8) & 0xf; + const int rowDb0 = (rowD[j] >> 8) & 0xf; + const int rowDb1 = (rowD[k] >> 8) & 0xf; + const int rowAa0 = (rowA[j] >> 12) & 0xf; + const int rowAa1 = (rowA[k] >> 12) & 0xf; + const int rowBa0 = (rowB[j] >> 12) & 0xf; + const int rowBa1 = (rowB[k] >> 12) & 0xf; + const int rowCa0 = (rowC[j] >> 12) & 0xf; + const int rowCa1 = (rowC[k] >> 12) & 0xf; + const int rowDa0 = (rowD[j] >> 12) & 0xf; + const int rowDa1 = (rowD[k] >> 12) & 0xf; + const int r = FILTER_SUM_3D(rowAr0, rowAr1, rowBr0, rowBr1, + rowCr0, rowCr1, rowDr0, rowDr1); + const int g = FILTER_SUM_3D(rowAg0, rowAg1, rowBg0, rowBg1, + rowCg0, rowCg1, rowDg0, rowDg1); + const int b = FILTER_SUM_3D(rowAb0, rowAb1, rowBb0, rowBb1, + rowCb0, rowCb1, rowDb0, rowDb1); + const int a = FILTER_SUM_3D(rowAa0, rowAa1, rowBa0, rowBa1, + rowCa0, rowCa1, rowDa0, rowDa1); + + dst[i] = (a << 12) | (b << 8) | (g << 4) | r; + } + } + else if ((datatype == USHORT_1_5_5_5_REV) && (comps == 4)) { + DECLARE_ROW_POINTERS0(ushort); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + const int rowAr0 = rowA[j] & 0x1f; + const int rowAr1 = rowA[k] & 0x1f; + const int rowBr0 = rowB[j] & 0x1f; + const int rowBr1 = rowB[k] & 0x1f; + const int rowCr0 = rowC[j] & 0x1f; + const int rowCr1 = rowC[k] & 0x1f; + const int rowDr0 = rowD[j] & 0x1f; + const int rowDr1 = rowD[k] & 0x1f; + const int rowAg0 = (rowA[j] >> 5) & 0x1f; + const int rowAg1 = (rowA[k] >> 5) & 0x1f; + const int rowBg0 = (rowB[j] >> 5) & 0x1f; + const int rowBg1 = (rowB[k] >> 5) & 0x1f; + const int rowCg0 = (rowC[j] >> 5) & 0x1f; + const int rowCg1 = (rowC[k] >> 5) & 0x1f; + const int rowDg0 = (rowD[j] >> 5) & 0x1f; + const int rowDg1 = (rowD[k] >> 5) & 0x1f; + const int rowAb0 = (rowA[j] >> 10) & 0x1f; + const int rowAb1 = (rowA[k] >> 10) & 0x1f; + const int rowBb0 = (rowB[j] >> 10) & 0x1f; + const int rowBb1 = (rowB[k] >> 10) & 0x1f; + const int rowCb0 = (rowC[j] >> 10) & 0x1f; + const int rowCb1 = (rowC[k] >> 10) & 0x1f; + const int rowDb0 = (rowD[j] >> 10) & 0x1f; + const int rowDb1 = (rowD[k] >> 10) & 0x1f; + const int rowAa0 = (rowA[j] >> 15) & 0x1; + const int rowAa1 = (rowA[k] >> 15) & 0x1; + const int rowBa0 = (rowB[j] >> 15) & 0x1; + const int rowBa1 = (rowB[k] >> 15) & 0x1; + const int rowCa0 = (rowC[j] >> 15) & 0x1; + const int rowCa1 = (rowC[k] >> 15) & 0x1; + const int rowDa0 = (rowD[j] >> 15) & 0x1; + const int rowDa1 = (rowD[k] >> 15) & 0x1; + const int r = FILTER_SUM_3D(rowAr0, rowAr1, rowBr0, rowBr1, + rowCr0, rowCr1, rowDr0, rowDr1); + const int g = FILTER_SUM_3D(rowAg0, rowAg1, rowBg0, rowBg1, + rowCg0, rowCg1, rowDg0, rowDg1); + const int b = FILTER_SUM_3D(rowAb0, rowAb1, rowBb0, rowBb1, + rowCb0, rowCb1, rowDb0, rowDb1); + const int a = FILTER_SUM_3D(rowAa0, rowAa1, rowBa0, rowBa1, + rowCa0, rowCa1, rowDa0, rowDa1); + + dst[i] = (a << 15) | (b << 10) | (g << 5) | r; + } + } + else if ((datatype == UBYTE_3_3_2) && (comps == 3)) { + DECLARE_ROW_POINTERS0(ushort); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + const int rowAr0 = rowA[j] & 0x3; + const int rowAr1 = rowA[k] & 0x3; + const int rowBr0 = rowB[j] & 0x3; + const int rowBr1 = rowB[k] & 0x3; + const int rowCr0 = rowC[j] & 0x3; + const int rowCr1 = rowC[k] & 0x3; + const int rowDr0 = rowD[j] & 0x3; + const int rowDr1 = rowD[k] & 0x3; + const int rowAg0 = (rowA[j] >> 2) & 0x7; + const int rowAg1 = (rowA[k] >> 2) & 0x7; + const int rowBg0 = (rowB[j] >> 2) & 0x7; + const int rowBg1 = (rowB[k] >> 2) & 0x7; + const int rowCg0 = (rowC[j] >> 2) & 0x7; + const int rowCg1 = (rowC[k] >> 2) & 0x7; + const int rowDg0 = (rowD[j] >> 2) & 0x7; + const int rowDg1 = (rowD[k] >> 2) & 0x7; + const int rowAb0 = (rowA[j] >> 5) & 0x7; + const int rowAb1 = (rowA[k] >> 5) & 0x7; + const int rowBb0 = (rowB[j] >> 5) & 0x7; + const int rowBb1 = (rowB[k] >> 5) & 0x7; + const int rowCb0 = (rowC[j] >> 5) & 0x7; + const int rowCb1 = (rowC[k] >> 5) & 0x7; + const int rowDb0 = (rowD[j] >> 5) & 0x7; + const int rowDb1 = (rowD[k] >> 5) & 0x7; + const int r = FILTER_SUM_3D(rowAr0, rowAr1, rowBr0, rowBr1, + rowCr0, rowCr1, rowDr0, rowDr1); + const int g = FILTER_SUM_3D(rowAg0, rowAg1, rowBg0, rowBg1, + rowCg0, rowCg1, rowDg0, rowDg1); + const int b = FILTER_SUM_3D(rowAb0, rowAb1, rowBb0, rowBb1, + rowCb0, rowCb1, rowDb0, rowDb1); + dst[i] = (b << 5) | (g << 2) | r; + } + } + else { + debug_printf("bad format in do_row_3D()"); + } +} + + + static void format_to_type_comps(enum pipe_format pformat, enum dtype *datatype, uint *comps) @@ -576,6 +1024,87 @@ reduce_2d(enum pipe_format pformat, static void +reduce_3d(enum pipe_format pformat, + int srcWidth, int srcHeight, int srcDepth, + int srcRowStride, const ubyte *srcPtr, + int dstWidth, int dstHeight, int dstDepth, + int dstRowStride, ubyte *dstPtr) +{ + const int bpt = pf_get_size(pformat); + const int border = 0; + int img, row; + int bytesPerSrcImage, bytesPerDstImage; + int bytesPerSrcRow, bytesPerDstRow; + int srcImageOffset, srcRowOffset; + enum dtype datatype; + uint comps; + + format_to_type_comps(pformat, &datatype, &comps); + + bytesPerSrcImage = srcWidth * srcHeight * bpt; + bytesPerDstImage = dstWidth * dstHeight * bpt; + + bytesPerSrcRow = srcWidth * bpt; + bytesPerDstRow = dstWidth * bpt; + + /* Offset between adjacent src images to be averaged together */ + srcImageOffset = (srcDepth == dstDepth) ? 0 : bytesPerSrcImage; + + /* Offset between adjacent src rows to be averaged together */ + srcRowOffset = (srcHeight == dstHeight) ? 0 : srcWidth * bpt; + + /* + * Need to average together up to 8 src pixels for each dest pixel. + * Break that down into 3 operations: + * 1. take two rows from source image and average them together. + * 2. take two rows from next source image and average them together. + * 3. take the two averaged rows and average them for the final dst row. + */ + + /* + _mesa_printf("mip3d %d x %d x %d -> %d x %d x %d\n", + srcWidth, srcHeight, srcDepth, dstWidth, dstHeight, dstDepth); + */ + + for (img = 0; img < dstDepth; img++) { + /* first source image pointer, skipping border */ + const ubyte *imgSrcA = srcPtr + + (bytesPerSrcImage + bytesPerSrcRow + border) * bpt * border + + img * (bytesPerSrcImage + srcImageOffset); + /* second source image pointer, skipping border */ + const ubyte *imgSrcB = imgSrcA + srcImageOffset; + /* address of the dest image, skipping border */ + ubyte *imgDst = dstPtr + + (bytesPerDstImage + bytesPerDstRow + border) * bpt * border + + img * bytesPerDstImage; + + /* setup the four source row pointers and the dest row pointer */ + const ubyte *srcImgARowA = imgSrcA; + const ubyte *srcImgARowB = imgSrcA + srcRowOffset; + const ubyte *srcImgBRowA = imgSrcB; + const ubyte *srcImgBRowB = imgSrcB + srcRowOffset; + ubyte *dstImgRow = imgDst; + + for (row = 0; row < dstHeight; row++) { + do_row_3D(datatype, comps, srcWidth, + srcImgARowA, srcImgARowB, + srcImgBRowA, srcImgBRowB, + dstWidth, dstImgRow); + + /* advance to next rows */ + srcImgARowA += bytesPerSrcRow + srcRowOffset; + srcImgARowB += bytesPerSrcRow + srcRowOffset; + srcImgBRowA += bytesPerSrcRow + srcRowOffset; + srcImgBRowB += bytesPerSrcRow + srcRowOffset; + dstImgRow += bytesPerDstRow; + } + } +} + + + + +static void make_1d_mipmap(struct gen_mipmap_state *ctx, struct pipe_texture *pt, uint face, uint baseLevel, uint lastLevel) @@ -587,31 +1116,30 @@ make_1d_mipmap(struct gen_mipmap_state *ctx, for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { const uint srcLevel = dstLevel - 1; - struct pipe_surface *srcSurf, *dstSurf; + struct pipe_transfer *srcTrans, *dstTrans; void *srcMap, *dstMap; - srcSurf = screen->get_tex_surface(screen, pt, face, srcLevel, zslice, - PIPE_BUFFER_USAGE_CPU_READ); - - dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice, - PIPE_BUFFER_USAGE_CPU_WRITE); - - srcMap = ((ubyte *) pipe_buffer_map(screen, srcSurf->buffer, - PIPE_BUFFER_USAGE_CPU_READ) - + srcSurf->offset); - dstMap = ((ubyte *) pipe_buffer_map(screen, dstSurf->buffer, - PIPE_BUFFER_USAGE_CPU_WRITE) - + dstSurf->offset); + srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice, + PIPE_TRANSFER_READ, 0, 0, + pt->width[srcLevel], + pt->height[srcLevel]); + dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice, + PIPE_TRANSFER_WRITE, 0, 0, + pt->width[dstLevel], + pt->height[dstLevel]); + + srcMap = (ubyte *) screen->transfer_map(screen, srcTrans); + dstMap = (ubyte *) screen->transfer_map(screen, dstTrans); reduce_1d(pt->format, - srcSurf->width, srcMap, - dstSurf->width, dstMap); + srcTrans->width, srcMap, + dstTrans->width, dstMap); - pipe_buffer_unmap(screen, srcSurf->buffer); - pipe_buffer_unmap(screen, dstSurf->buffer); + screen->transfer_unmap(screen, srcTrans); + screen->transfer_unmap(screen, dstTrans); - pipe_surface_reference(&srcSurf, NULL); - pipe_surface_reference(&dstSurf, NULL); + screen->tex_transfer_release(screen, &srcTrans); + screen->tex_transfer_release(screen, &dstTrans); } } @@ -631,32 +1159,32 @@ make_2d_mipmap(struct gen_mipmap_state *ctx, for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { const uint srcLevel = dstLevel - 1; - struct pipe_surface *srcSurf, *dstSurf; + struct pipe_transfer *srcTrans, *dstTrans; ubyte *srcMap, *dstMap; - srcSurf = screen->get_tex_surface(screen, pt, face, srcLevel, zslice, - PIPE_BUFFER_USAGE_CPU_READ); - dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice, - PIPE_BUFFER_USAGE_CPU_WRITE); - - srcMap = ((ubyte *) pipe_buffer_map(screen, srcSurf->buffer, - PIPE_BUFFER_USAGE_CPU_READ) - + srcSurf->offset); - dstMap = ((ubyte *) pipe_buffer_map(screen, dstSurf->buffer, - PIPE_BUFFER_USAGE_CPU_WRITE) - + dstSurf->offset); + srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice, + PIPE_TRANSFER_READ, 0, 0, + pt->width[srcLevel], + pt->height[srcLevel]); + dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice, + PIPE_TRANSFER_WRITE, 0, 0, + pt->width[dstLevel], + pt->height[dstLevel]); + + srcMap = (ubyte *) screen->transfer_map(screen, srcTrans); + dstMap = (ubyte *) screen->transfer_map(screen, dstTrans); reduce_2d(pt->format, - srcSurf->width, srcSurf->height, - srcSurf->stride, srcMap, - dstSurf->width, dstSurf->height, - dstSurf->stride, dstMap); + srcTrans->width, srcTrans->height, + srcTrans->stride, srcMap, + dstTrans->width, dstTrans->height, + dstTrans->stride, dstMap); - pipe_buffer_unmap(screen, srcSurf->buffer); - pipe_buffer_unmap(screen, dstSurf->buffer); + screen->transfer_unmap(screen, srcTrans); + screen->transfer_unmap(screen, dstTrans); - pipe_surface_reference(&srcSurf, NULL); - pipe_surface_reference(&dstSurf, NULL); + screen->tex_transfer_release(screen, &srcTrans); + screen->tex_transfer_release(screen, &dstTrans); } } @@ -666,6 +1194,46 @@ make_3d_mipmap(struct gen_mipmap_state *ctx, struct pipe_texture *pt, uint face, uint baseLevel, uint lastLevel) { +#if 0 + struct pipe_context *pipe = ctx->pipe; + struct pipe_screen *screen = pipe->screen; + uint dstLevel, zslice = 0; + + assert(pt->block.width == 1); + assert(pt->block.height == 1); + + for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { + const uint srcLevel = dstLevel - 1; + struct pipe_transfer *srcTrans, *dstTrans; + ubyte *srcMap, *dstMap; + + srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice, + PIPE_TRANSFER_READ, 0, 0, + pt->width[srcLevel], + pt->height[srcLevel]); + dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice, + PIPE_TRANSFER_WRITE, 0, 0, + pt->width[dstLevel], + pt->height[dstLevel]); + + srcMap = (ubyte *) screen->transfer_map(screen, srcTrans); + dstMap = (ubyte *) screen->transfer_map(screen, dstTrans); + + reduce_3d(pt->format, + srcTrans->width, srcTrans->height, + srcTrans->stride, srcMap, + dstTrans->width, dstTrans->height, + dstTrans->stride, dstMap); + + screen->transfer_unmap(screen, srcTrans); + screen->transfer_unmap(screen, dstTrans); + + screen->tex_transfer_release(screen, &srcTrans); + screen->tex_transfer_release(screen, &dstTrans); + } +#else + (void) reduce_3d; +#endif } @@ -920,7 +1488,7 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, /* init framebuffer state */ memset(&fb, 0, sizeof(fb)); - fb.num_cbufs = 1; + fb.nr_cbufs = 1; /* set min/mag to same filter for faster sw speed */ ctx->sampler.mag_img_filter = filter; diff --git a/src/gallium/auxiliary/util/u_handle_table.c b/src/gallium/auxiliary/util/u_handle_table.c index 2d15932ce3..6da7353e25 100644 --- a/src/gallium/auxiliary/util/u_handle_table.c +++ b/src/gallium/auxiliary/util/u_handle_table.c @@ -34,7 +34,7 @@ #include "pipe/p_compiler.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "util/u_memory.h" #include "util/u_handle_table.h" diff --git a/src/gallium/auxiliary/util/u_hash_table.c b/src/gallium/auxiliary/util/u_hash_table.c index 0bc8de9632..2f83e318e4 100644 --- a/src/gallium/auxiliary/util/u_hash_table.c +++ b/src/gallium/auxiliary/util/u_hash_table.c @@ -39,7 +39,7 @@ #include "pipe/p_compiler.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "cso_cache/cso_hash.h" diff --git a/src/gallium/auxiliary/util/u_keymap.c b/src/gallium/auxiliary/util/u_keymap.c new file mode 100644 index 0000000000..3f70809efd --- /dev/null +++ b/src/gallium/auxiliary/util/u_keymap.c @@ -0,0 +1,309 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Key lookup/associative container. + * + * Like Jose's u_hash_table, based on CSO cache code for now. + * + * Author: Brian Paul + */ + + +#include "pipe/p_compiler.h" +#include "util/u_debug.h" +#include "pipe/p_error.h" + +#include "cso_cache/cso_hash.h" + +#include "util/u_memory.h" +#include "util/u_keymap.h" + + +struct keymap +{ + struct cso_hash *cso; + unsigned key_size; + unsigned max_entries; /* XXX not obeyed net */ + unsigned num_entries; + keymap_delete_func delete_func; +}; + + +struct keymap_item +{ + void *key, *value; +}; + + +/** + * This the default key-delete function used when the client doesn't + * provide one. + */ +static void +default_delete_func(const struct keymap *map, + const void *key, void *data, void *user) +{ + FREE((void*) data); +} + + +static INLINE struct keymap_item * +hash_table_item(struct cso_hash_iter iter) +{ + return (struct keymap_item *) cso_hash_iter_data(iter); +} + + +/** + * Return 4-byte hash key for a block of bytes. + */ +static unsigned +hash(const void *key, unsigned keySize) +{ + unsigned i, hash; + + keySize /= 4; /* convert from bytes to uints */ + + hash = 0; + for (i = 0; i < keySize; i++) { + hash ^= (i + 1) * ((const unsigned *) key)[i]; + } + + /*hash = hash ^ (hash >> 11) ^ (hash >> 22);*/ + + return hash; +} + + +/** + * Create a new map. + * \param keySize size of the keys in bytes + * \param maxEntries max number of entries to allow (~0 = infinity) + * \param deleteFunc optional callback to call when entries + * are deleted/replaced + */ +struct keymap * +util_new_keymap(unsigned keySize, unsigned maxEntries, + keymap_delete_func deleteFunc) +{ + struct keymap *map = MALLOC_STRUCT(keymap); + if (!map) + return NULL; + + map->cso = cso_hash_create(); + if (!map->cso) { + FREE(map); + return NULL; + } + + map->max_entries = maxEntries; + map->num_entries = 0; + map->key_size = keySize; + map->delete_func = deleteFunc ? deleteFunc : default_delete_func; + + return map; +} + + +/** + * Delete/free a keymap and all entries. The deleteFunc that was given at + * create time will be called for each entry. + * \param user user-provided pointer passed through to the delete callback + */ +void +util_delete_keymap(struct keymap *map, void *user) +{ + util_keymap_remove_all(map, user); + cso_hash_delete(map->cso); + FREE(map); +} + + +static INLINE struct cso_hash_iter +hash_table_find_iter(const struct keymap *map, const void *key, + unsigned key_hash) +{ + struct cso_hash_iter iter; + struct keymap_item *item; + + iter = cso_hash_find(map->cso, key_hash); + while (!cso_hash_iter_is_null(iter)) { + item = (struct keymap_item *) cso_hash_iter_data(iter); + if (!memcmp(item->key, key, map->key_size)) + break; + iter = cso_hash_iter_next(iter); + } + + return iter; +} + + +static INLINE struct keymap_item * +hash_table_find_item(const struct keymap *map, const void *key, + unsigned key_hash) +{ + struct cso_hash_iter iter = hash_table_find_iter(map, key, key_hash); + if (cso_hash_iter_is_null(iter)) { + return NULL; + } + else { + return hash_table_item(iter); + } +} + + +/** + * Insert a new key + data pointer into the table. + * Note: we create a copy of the key, but not the data! + * If the key is already present in the table, replace the existing + * entry (calling the delete callback on the previous entry). + * If the maximum capacity of the map is reached an old entry + * will be deleted (the delete callback will be called). + */ +boolean +util_keymap_insert(struct keymap *map, const void *key, + const void *data, void *user) +{ + unsigned key_hash; + struct keymap_item *item; + struct cso_hash_iter iter; + + assert(map); + + key_hash = hash(key, map->key_size); + + item = hash_table_find_item(map, key, key_hash); + if (item) { + /* call delete callback for old entry/item */ + map->delete_func(map, item->key, item->value, user); + item->value = (void *) data; + return TRUE; + } + + item = MALLOC_STRUCT(keymap_item); + if (!item) + return FALSE; + + item->key = mem_dup(key, map->key_size); + item->value = (void *) data; + + iter = cso_hash_insert(map->cso, key_hash, item); + if (cso_hash_iter_is_null(iter)) { + FREE(item); + return FALSE; + } + + map->num_entries++; + + return TRUE; +} + + +/** + * Look up a key in the map and return the associated data pointer. + */ +const void * +util_keymap_lookup(const struct keymap *map, const void *key) +{ + unsigned key_hash; + struct keymap_item *item; + + assert(map); + + key_hash = hash(key, map->key_size); + + item = hash_table_find_item(map, key, key_hash); + if (!item) + return NULL; + + return item->value; +} + + +/** + * Remove an entry from the map. + * The delete callback will be called if the given key/entry is found. + * \param user passed to the delete callback as the last param. + */ +void +util_keymap_remove(struct keymap *map, const void *key, void *user) +{ + unsigned key_hash; + struct cso_hash_iter iter; + struct keymap_item *item; + + assert(map); + + key_hash = hash(key, map->key_size); + + iter = hash_table_find_iter(map, key, key_hash); + if (cso_hash_iter_is_null(iter)) + return; + + item = hash_table_item(iter); + assert(item); + map->delete_func(map, item->key, item->value, user); + FREE(item->key); + FREE(item); + + map->num_entries--; + + cso_hash_erase(map->cso, iter); +} + + +/** + * Remove all entries from the map, calling the delete callback for each. + * \param user passed to the delete callback as the last param. + */ +void +util_keymap_remove_all(struct keymap *map, void *user) +{ + struct cso_hash_iter iter; + struct keymap_item *item; + + assert(map); + + iter = cso_hash_first_node(map->cso); + while (!cso_hash_iter_is_null(iter)) { + item = (struct keymap_item *) + cso_hash_take(map->cso, cso_hash_iter_key(iter)); + map->delete_func(map, item->key, item->value, user); + FREE(item->key); + FREE(item); + iter = cso_hash_first_node(map->cso); + } +} + + +extern void +util_keymap_info(const struct keymap *map) +{ + debug_printf("Keymap %p: %u of max %u entries\n", + (void *) map, map->num_entries, map->max_entries); +} diff --git a/src/gallium/auxiliary/util/u_keymap.h b/src/gallium/auxiliary/util/u_keymap.h new file mode 100644 index 0000000000..8d60a76fc3 --- /dev/null +++ b/src/gallium/auxiliary/util/u_keymap.h @@ -0,0 +1,68 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_KEYMAP_H +#define U_KEYMAP_H + +#include "pipe/p_compiler.h" + + +/** opaque keymap type */ +struct keymap; + + +/** Delete/callback function type */ +typedef void (*keymap_delete_func)(const struct keymap *map, + const void *key, void *data, + void *user); + + +extern struct keymap * +util_new_keymap(unsigned keySize, unsigned maxEntries, + keymap_delete_func deleteFunc); + +extern void +util_delete_keymap(struct keymap *map, void *user); + +extern boolean +util_keymap_insert(struct keymap *map, const void *key, + const void *data, void *user); + +extern const void * +util_keymap_lookup(const struct keymap *map, const void *key); + +extern void +util_keymap_remove(struct keymap *map, const void *key, void *user); + +extern void +util_keymap_remove_all(struct keymap *map, void *user); + +extern void +util_keymap_info(const struct keymap *map); + + +#endif /* U_KEYMAP_H */ diff --git a/src/gallium/auxiliary/util/u_linear.c b/src/gallium/auxiliary/util/u_linear.c new file mode 100644 index 0000000000..6be365e53b --- /dev/null +++ b/src/gallium/auxiliary/util/u_linear.c @@ -0,0 +1,70 @@ + +#include "util/u_debug.h" +#include "u_linear.h" + +void +pipe_linear_to_tile(size_t src_stride, const void *src_ptr, + struct pipe_tile_info *t, void *dst_ptr) +{ + int x, y, z; + char *ptr; + size_t bytes = t->cols * t->block.size; + char *dst_ptr2 = (char *) dst_ptr; + + assert(pipe_linear_check_tile(t)); + + /* lets write lineary to the tiled buffer */ + for (y = 0; y < t->tiles_y; y++) { + for (x = 0; x < t->tiles_x; x++) { + /* this inner loop could be replace with SSE magic */ + ptr = (char*)src_ptr + src_stride * t->rows * y + bytes * x; + for (z = 0; z < t->rows; z++) { + memcpy(dst_ptr2, ptr, bytes); + dst_ptr2 += bytes; + ptr += src_stride; + } + } + } +} + +void pipe_linear_from_tile(struct pipe_tile_info *t, const void *src_ptr, + size_t dst_stride, void *dst_ptr) +{ + int x, y, z; + char *ptr; + size_t bytes = t->cols * t->block.size; + const char *src_ptr2 = (const char *) src_ptr; + + /* lets read lineary from the tiled buffer */ + for (y = 0; y < t->tiles_y; y++) { + for (x = 0; x < t->tiles_x; x++) { + /* this inner loop could be replace with SSE magic */ + ptr = (char*)dst_ptr + dst_stride * t->rows * y + bytes * x; + for (z = 0; z < t->rows; z++) { + memcpy(ptr, src_ptr2, bytes); + src_ptr2 += bytes; + ptr += dst_stride; + } + } + } +} + +void +pipe_linear_fill_info(struct pipe_tile_info *t, + const struct pipe_format_block *block, + unsigned tile_width, unsigned tile_height, + unsigned tiles_x, unsigned tiles_y) +{ + t->block = *block; + + t->tile.width = tile_width; + t->tile.height = tile_height; + t->cols = t->tile.width / t->block.width; + t->rows = t->tile.height / t->block.height; + t->tile.size = t->cols * t->rows * t->block.size; + + t->tiles_x = tiles_x; + t->tiles_y = tiles_y; + t->stride = t->cols * t->tiles_x * t->block.size; + t->size = t->tiles_x * t->tiles_y * t->tile.size; +} diff --git a/src/gallium/auxiliary/util/u_linear.h b/src/gallium/auxiliary/util/u_linear.h new file mode 100644 index 0000000000..1589f029bc --- /dev/null +++ b/src/gallium/auxiliary/util/u_linear.h @@ -0,0 +1,61 @@ + +#ifndef U_LINEAR_H +#define U_LINEAR_H + +#include "pipe/p_format.h" + +struct pipe_tile_info +{ + unsigned size; + unsigned stride; + + /* The number of tiles */ + unsigned tiles_x; + unsigned tiles_y; + + /* size of each tile expressed in blocks */ + unsigned cols; + unsigned rows; + + /* Describe the tile in pixels */ + struct pipe_format_block tile; + + /* Describe each block within the tile */ + struct pipe_format_block block; +}; + +void pipe_linear_to_tile(size_t src_stride, const void *src_ptr, + struct pipe_tile_info *t, void *dst_ptr); + +void pipe_linear_from_tile(struct pipe_tile_info *t, const void *src_ptr, + size_t dst_stride, void *dst_ptr); + +/** + * Convenience function to fillout a pipe_tile_info struct. + * @t info to fill out. + * @block block info about pixel layout + * @tile_width the width of the tile in pixels + * @tile_height the height of the tile in pixels + * @tiles_x number of tiles in x axis + * @tiles_y number of tiles in y axis + */ +void pipe_linear_fill_info(struct pipe_tile_info *t, + const struct pipe_format_block *block, + unsigned tile_width, unsigned tile_height, + unsigned tiles_x, unsigned tiles_y); + +static INLINE boolean pipe_linear_check_tile(const struct pipe_tile_info *t) +{ + if (t->tile.size != t->block.size * t->cols * t->rows) + return FALSE; + + if (t->stride != t->block.size * t->cols * t->tiles_x) + return FALSE; + + if (t->size < t->stride * t->rows * t->tiles_y) + return FALSE; + + return TRUE; +} + +#endif /* U_LINEAR_H */ diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 1ae3234423..1ecde7a912 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -40,7 +40,7 @@ #include "pipe/p_compiler.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #ifdef __cplusplus @@ -68,7 +68,7 @@ __inline double ceil(double val) return ceil_val; } -#ifndef PIPE_SUBSYSTEM_WINDOWS_CE +#ifndef PIPE_SUBSYSTEM_WINDOWS_CE_OGL __inline double floor(double val) { double floor_val; @@ -341,6 +341,10 @@ unsigned ffs( unsigned u ) } #endif +#ifdef __MINGW32__ +#define ffs __builtin_ffs +#endif + /** * Return float bits. diff --git a/src/gallium/auxiliary/util/u_memory.h b/src/gallium/auxiliary/util/u_memory.h index 79e34e185f..ceb3a1cb61 100644 --- a/src/gallium/auxiliary/util/u_memory.h +++ b/src/gallium/auxiliary/util/u_memory.h @@ -36,7 +36,7 @@ #include "util/u_pointer.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #ifdef __cplusplus @@ -56,7 +56,7 @@ extern "C" { /* memory debugging */ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #define MALLOC( _size ) \ debug_malloc( __FILE__, __LINE__, __FUNCTION__, _size ) @@ -151,6 +151,8 @@ REALLOC( void *old_ptr, unsigned old_size, unsigned new_size ) #define CALLOC_STRUCT(T) (struct T *) CALLOC(1, sizeof(struct T)) +#define CALLOC_VARIANT_LENGTH_STRUCT(T,more_size) ((struct T *) CALLOC(1, sizeof(struct T) + more_size)) + /** * Return memory on given byte alignment diff --git a/src/gallium/auxiliary/util/u_mm.c b/src/gallium/auxiliary/util/u_mm.c index 01dd67c810..151a480d34 100644 --- a/src/gallium/auxiliary/util/u_mm.c +++ b/src/gallium/auxiliary/util/u_mm.c @@ -24,14 +24,14 @@ #include "pipe/p_compiler.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "util/u_memory.h" #include "util/u_mm.h" void -mmDumpMemInfo(const struct mem_block *heap) +u_mmDumpMemInfo(const struct mem_block *heap) { debug_printf("Memory heap %p:\n", (void *)heap); if (heap == 0) { @@ -58,7 +58,7 @@ mmDumpMemInfo(const struct mem_block *heap) } struct mem_block * -mmInit(int ofs, int size) +u_mmInit(int ofs, int size) { struct mem_block *heap, *block; @@ -165,7 +165,7 @@ SliceBlock(struct mem_block *p, struct mem_block * -mmAllocMem(struct mem_block *heap, int size, int align2, int startSearch) +u_mmAllocMem(struct mem_block *heap, int size, int align2, int startSearch) { struct mem_block *p; const int mask = (1 << align2)-1; @@ -202,7 +202,7 @@ mmAllocMem(struct mem_block *heap, int size, int align2, int startSearch) struct mem_block * -mmFindBlock(struct mem_block *heap, int start) +u_mmFindBlock(struct mem_block *heap, int start) { struct mem_block *p; @@ -241,7 +241,7 @@ Join2Blocks(struct mem_block *p) } int -mmFreeMem(struct mem_block *b) +u_mmFreeMem(struct mem_block *b) { if (!b) return 0; @@ -270,7 +270,7 @@ mmFreeMem(struct mem_block *b) void -mmDestroy(struct mem_block *heap) +u_mmDestroy(struct mem_block *heap) { struct mem_block *p; diff --git a/src/gallium/auxiliary/util/u_mm.h b/src/gallium/auxiliary/util/u_mm.h index b226b101cb..ce20e48763 100644 --- a/src/gallium/auxiliary/util/u_mm.h +++ b/src/gallium/auxiliary/util/u_mm.h @@ -49,7 +49,7 @@ struct mem_block { * input: total size in bytes * return: a heap pointer if OK, NULL if error */ -extern struct mem_block *mmInit(int ofs, int size); +extern struct mem_block *u_mmInit(int ofs, int size); /** * Allocate 'size' bytes with 2^align2 bytes alignment, @@ -61,7 +61,7 @@ extern struct mem_block *mmInit(int ofs, int size); * startSearch = linear offset from start of heap to begin search * return: pointer to the allocated block, 0 if error */ -extern struct mem_block *mmAllocMem(struct mem_block *heap, int size, int align2, +extern struct mem_block *u_mmAllocMem(struct mem_block *heap, int size, int align2, int startSearch); /** @@ -69,23 +69,23 @@ extern struct mem_block *mmAllocMem(struct mem_block *heap, int size, int align2 * input: pointer to a block * return: 0 if OK, -1 if error */ -extern int mmFreeMem(struct mem_block *b); +extern int u_mmFreeMem(struct mem_block *b); /** * Free block starts at offset * input: pointer to a heap, start offset * return: pointer to a block */ -extern struct mem_block *mmFindBlock(struct mem_block *heap, int start); +extern struct mem_block *u_mmFindBlock(struct mem_block *heap, int start); /** * destroy MM */ -extern void mmDestroy(struct mem_block *mmInit); +extern void u_mmDestroy(struct mem_block *mmInit); /** * For debuging purpose. */ -extern void mmDumpMemInfo(const struct mem_block *mmInit); +extern void u_mmDumpMemInfo(const struct mem_block *mmInit); #endif diff --git a/src/gallium/auxiliary/util/u_prim.h b/src/gallium/auxiliary/util/u_prim.h index e45e84ded2..d7c3995dbf 100644 --- a/src/gallium/auxiliary/util/u_prim.h +++ b/src/gallium/auxiliary/util/u_prim.h @@ -119,4 +119,20 @@ static INLINE boolean u_trim_pipe_prim( unsigned pipe_prim, unsigned *nr ) } +static INLINE boolean u_reduced_prim( unsigned pipe_prim ) +{ + switch (pipe_prim) { + case PIPE_PRIM_POINTS: + return PIPE_PRIM_POINTS; + + case PIPE_PRIM_LINES: + case PIPE_PRIM_LINE_STRIP: + case PIPE_PRIM_LINE_LOOP: + return PIPE_PRIM_LINES; + + default: + return PIPE_PRIM_TRIANGLES; + } +} + #endif diff --git a/src/gallium/auxiliary/util/u_rect.c b/src/gallium/auxiliary/util/u_rect.c index fe81a685be..6e24e594e4 100644 --- a/src/gallium/auxiliary/util/u_rect.c +++ b/src/gallium/auxiliary/util/u_rect.c @@ -169,46 +169,35 @@ util_surface_copy(struct pipe_context *pipe, unsigned w, unsigned h) { struct pipe_screen *screen = pipe->screen; - struct pipe_surface *new_src = NULL, *new_dst = NULL; + struct pipe_transfer *src_trans, *dst_trans; void *dst_map; const void *src_map; - assert(dst->block.size == src->block.size); - assert(dst->block.width == src->block.width); - assert(dst->block.height == src->block.height); - - if ((src->usage & PIPE_BUFFER_USAGE_CPU_READ) == 0) { - /* Need to create new src surface which is CPU readable */ - assert(src->texture); - if (!src->texture) - return; - new_src = screen->get_tex_surface(screen, + assert(src->texture && dst->texture); + if (!src->texture || !dst->texture) + return; + src_trans = screen->get_tex_transfer(screen, src->texture, src->face, src->level, src->zslice, - PIPE_BUFFER_USAGE_CPU_READ); - src = new_src; - } + PIPE_TRANSFER_READ, + src_x, src_y, w, h); - if ((dst->usage & PIPE_BUFFER_USAGE_CPU_WRITE) == 0) { - /* Need to create new dst surface which is CPU writable */ - assert(dst->texture); - if (!dst->texture) - return; - new_dst = screen->get_tex_surface(screen, + dst_trans = screen->get_tex_transfer(screen, dst->texture, dst->face, dst->level, dst->zslice, - PIPE_BUFFER_USAGE_CPU_WRITE); - dst = new_dst; - } + PIPE_TRANSFER_WRITE, + dst_x, dst_y, w, h); - src_map = pipe->screen->surface_map(screen, - src, PIPE_BUFFER_USAGE_CPU_READ); - dst_map = pipe->screen->surface_map(screen, - dst, PIPE_BUFFER_USAGE_CPU_WRITE); + assert(dst_trans->block.size == src_trans->block.size); + assert(dst_trans->block.width == src_trans->block.width); + assert(dst_trans->block.height == src_trans->block.height); + + src_map = pipe->screen->transfer_map(screen, src_trans); + dst_map = pipe->screen->transfer_map(screen, dst_trans); assert(src_map); assert(dst_map); @@ -216,36 +205,25 @@ util_surface_copy(struct pipe_context *pipe, if (src_map && dst_map) { /* If do_flip, invert src_y position and pass negative src stride */ pipe_copy_rect(dst_map, - &dst->block, - dst->stride, - dst_x, dst_y, + &dst_trans->block, + dst_trans->stride, + 0, 0, w, h, src_map, - do_flip ? -(int) src->stride : src->stride, - src_x, - do_flip ? src_y + h - 1 : src_y); + do_flip ? -(int) src_trans->stride : src_trans->stride, + 0, + do_flip ? h - 1 : 0); } - pipe->screen->surface_unmap(pipe->screen, src); - pipe->screen->surface_unmap(pipe->screen, dst); + pipe->screen->transfer_unmap(pipe->screen, src_trans); + pipe->screen->transfer_unmap(pipe->screen, dst_trans); - if (new_src) - screen->tex_surface_release(screen, &new_src); - if (new_dst) - screen->tex_surface_release(screen, &new_dst); + screen->tex_transfer_release(screen, &src_trans); + screen->tex_transfer_release(screen, &dst_trans); } -static void * -get_pointer(struct pipe_surface *dst, void *dst_map, unsigned x, unsigned y) -{ - return (char *)dst_map - + y / dst->block.height * dst->stride - + x / dst->block.width * dst->block.size; -} - - #define UBYTE_TO_USHORT(B) ((B) | ((B) << 8)) @@ -260,42 +238,38 @@ util_surface_fill(struct pipe_context *pipe, unsigned width, unsigned height, unsigned value) { struct pipe_screen *screen = pipe->screen; - struct pipe_surface *new_dst = NULL; + struct pipe_transfer *dst_trans; void *dst_map; - if ((dst->usage & PIPE_BUFFER_USAGE_CPU_WRITE) == 0) { - /* Need to create new dst surface which is CPU writable */ - assert(dst->texture); - if (!dst->texture) - return; - new_dst = screen->get_tex_surface(screen, + assert(dst->texture); + if (!dst->texture) + return; + dst_trans = screen->get_tex_transfer(screen, dst->texture, dst->face, dst->level, dst->zslice, - PIPE_BUFFER_USAGE_CPU_WRITE); - dst = new_dst; - } + PIPE_TRANSFER_WRITE, + dstx, dsty, width, height); - dst_map = pipe->screen->surface_map(screen, - dst, PIPE_BUFFER_USAGE_CPU_WRITE); + dst_map = pipe->screen->transfer_map(screen, dst_trans); assert(dst_map); if (dst_map) { - assert(dst->stride > 0); + assert(dst_trans->stride > 0); - switch (dst->block.size) { + switch (dst_trans->block.size) { case 1: case 2: case 4: - pipe_fill_rect(dst_map, &dst->block, dst->stride, - dstx, dsty, width, height, value); + pipe_fill_rect(dst_map, &dst_trans->block, dst_trans->stride, + 0, 0, width, height, value); break; case 8: { /* expand the 4-byte clear value to an 8-byte value */ - ushort *row = (ushort *) get_pointer(dst, dst_map, dstx, dsty); + ushort *row = (ushort *) dst_map; ushort val0 = UBYTE_TO_USHORT((value >> 0) & 0xff); ushort val1 = UBYTE_TO_USHORT((value >> 8) & 0xff); ushort val2 = UBYTE_TO_USHORT((value >> 16) & 0xff); @@ -312,7 +286,7 @@ util_surface_fill(struct pipe_context *pipe, row[j*4+2] = val2; row[j*4+3] = val3; } - row += dst->stride/2; + row += dst_trans->stride/2; } } break; @@ -322,8 +296,6 @@ util_surface_fill(struct pipe_context *pipe, } } - pipe->screen->surface_unmap(pipe->screen, dst); - - if (new_dst) - screen->tex_surface_release(screen, &new_dst); + pipe->screen->transfer_unmap(pipe->screen, dst_trans); + screen->tex_transfer_release(screen, &dst_trans); } diff --git a/src/gallium/auxiliary/util/u_simple_screen.c b/src/gallium/auxiliary/util/u_simple_screen.c new file mode 100644 index 0000000000..089bbbc48a --- /dev/null +++ b/src/gallium/auxiliary/util/u_simple_screen.c @@ -0,0 +1,143 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "u_simple_screen.h" + +#include "pipe/p_screen.h" +#include "pipe/internal/p_winsys_screen.h" + + +static struct pipe_buffer * +pass_buffer_create(struct pipe_screen *screen, + unsigned alignment, + unsigned usage, + unsigned size) +{ + return screen->winsys->buffer_create(screen->winsys, + alignment, usage, size); +} + +static struct pipe_buffer * +pass_user_buffer_create(struct pipe_screen *screen, + void *ptr, + unsigned bytes) +{ + return screen->winsys->user_buffer_create(screen->winsys, + ptr, bytes); +} + +static struct pipe_buffer * +pass_surface_buffer_create(struct pipe_screen *screen, + unsigned width, unsigned height, + enum pipe_format format, + unsigned usage, + unsigned *stride) +{ + return screen->winsys->surface_buffer_create(screen->winsys, + width, height, + format, usage, stride); +} + +static void * +pass_buffer_map(struct pipe_screen *screen, + struct pipe_buffer *buf, + unsigned usage) +{ + return screen->winsys->buffer_map(screen->winsys, + buf, usage); +} + +static void +pass_buffer_unmap(struct pipe_screen *screen, + struct pipe_buffer *buf) +{ + screen->winsys->buffer_unmap(screen->winsys, buf); +} + +static void +pass_buffer_destroy(struct pipe_screen *screen, + struct pipe_buffer *buf) +{ + screen->winsys->buffer_destroy(screen->winsys, buf); +} + + +static void +pass_flush_frontbuffer(struct pipe_screen *screen, + struct pipe_surface *surf, + void *context_private) +{ + screen->winsys->flush_frontbuffer(screen->winsys, + surf, context_private); +} + +static void +pass_fence_reference(struct pipe_screen *screen, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ + screen->winsys->fence_reference(screen->winsys, + ptr, fence); +} + +static int +pass_fence_signalled(struct pipe_screen *screen, + struct pipe_fence_handle *fence, + unsigned flag) +{ + return screen->winsys->fence_signalled(screen->winsys, + fence, flag); +} + +static int +pass_fence_finish(struct pipe_screen *screen, + struct pipe_fence_handle *fence, + unsigned flag) +{ + return screen->winsys->fence_finish(screen->winsys, + fence, flag); +} + +void u_simple_screen_init(struct pipe_screen *screen) +{ + screen->buffer_create = pass_buffer_create; + screen->user_buffer_create = pass_user_buffer_create; + screen->surface_buffer_create = pass_surface_buffer_create; + + screen->buffer_map = pass_buffer_map; + screen->buffer_unmap = pass_buffer_unmap; + screen->buffer_destroy = pass_buffer_destroy; + screen->flush_frontbuffer = pass_flush_frontbuffer; + screen->fence_reference = pass_fence_reference; + screen->fence_signalled = pass_fence_signalled; + screen->fence_finish = pass_fence_finish; +} + +const char* u_simple_screen_winsys_name(struct pipe_screen *screen) +{ + return screen->winsys->get_name(screen->winsys); +} diff --git a/src/gallium/auxiliary/util/u_simple_screen.h b/src/gallium/auxiliary/util/u_simple_screen.h new file mode 100644 index 0000000000..6612a8a7c0 --- /dev/null +++ b/src/gallium/auxiliary/util/u_simple_screen.h @@ -0,0 +1,47 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_SIMPLE_SCREEN_H +#define U_SIMPLE_SCREEN_H + +struct pipe_screen; +struct pipe_winsys; + +/** + * The following function initializes a simple passthrough screen. + * + * All the relevant screen function pointers will forwarded to the + * winsys. + */ +void u_simple_screen_init(struct pipe_screen *screen); + +/** + * Returns the name of the winsys associated with this screen. + */ +const char* u_simple_screen_winsys_name(struct pipe_screen *screen); + +#endif diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c index f06d13c2c4..2fcad6fe3d 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.c +++ b/src/gallium/auxiliary/util/u_simple_shaders.c @@ -34,10 +34,10 @@ #include "pipe/p_context.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_winsys.h" +#include "pipe/p_screen.h" #include "pipe/p_shader_tokens.h" #include "util/u_memory.h" @@ -359,3 +359,10 @@ util_make_fragment_passthrough_shader(struct pipe_context *pipe, return pipe->create_fs_state(pipe, shader); } + +void +util_free_shader(struct pipe_shader_state *shader) +{ + FREE((struct tgsi_token *)shader->tokens); + shader->tokens = NULL; +} diff --git a/src/gallium/auxiliary/util/u_simple_shaders.h b/src/gallium/auxiliary/util/u_simple_shaders.h index 8ca4977d71..99b8d9067d 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.h +++ b/src/gallium/auxiliary/util/u_simple_shaders.h @@ -60,6 +60,10 @@ util_make_fragment_passthrough_shader(struct pipe_context *pipe, struct pipe_shader_state *shader); +extern void +util_free_shader(struct pipe_shader_state *shader); + + #ifdef __cplusplus } #endif diff --git a/src/gallium/auxiliary/util/u_sse.h b/src/gallium/auxiliary/util/u_sse.h new file mode 100644 index 0000000000..e2a8491e62 --- /dev/null +++ b/src/gallium/auxiliary/util/u_sse.h @@ -0,0 +1,77 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * SSE intrinsics portability header. + * + * Although the SSE intrinsics are support by all modern x86 and x86-64 + * compilers, there are some intrisincs missing in some implementations + * (especially older MSVC versions). This header abstracts that away. + */ + +#ifndef U_SSE_H_ +#define U_SSE_H_ + +#include "pipe/p_config.h" + +#if defined(PIPE_ARCH_SSE) + +#include <xmmintrin.h> +#include <emmintrin.h> + + +/* MSVC before VC8 does not support the _mm_castxxx_yyy */ +#if defined(_MSC_VER) && _MSC_VER < 1500 + +union __declspec(align(16)) m128_types { + __m128 m128; + __m128i m128i; + __m128d m128d; +}; + +static __inline __m128 +_mm_castsi128_ps(__m128i a) +{ + union m128_types u; + u.m128i = a; + return u.m128; +} + +static __inline __m128i +_mm_castps_si128(__m128 a) +{ + union m128_types u; + u.m128 = a; + return u.m128i; +} + +#endif /* defined(_MSC_VER) && _MSC_VER < 1500 */ + +#endif /* PIPE_ARCH_X86 || PIPE_ARCH_X86_64 */ + +#endif /* U_SSE_H_ */ diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index 336c7714d4..d31ca9c029 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -28,7 +28,6 @@ /** * RGBA/float tile get/put functions. * Usable both by drivers and state trackers. - * Surfaces should already be in a mapped state. */ @@ -42,58 +41,58 @@ /** - * Move raw block of pixels from surface to user memory. - * This should be usable by any hw driver that has mappable surfaces. + * Move raw block of pixels from transfer object to user memory. */ void -pipe_get_tile_raw(struct pipe_surface *ps, +pipe_get_tile_raw(struct pipe_transfer *pt, uint x, uint y, uint w, uint h, void *dst, int dst_stride) { + struct pipe_screen *screen = pt->texture->screen; const void *src; if (dst_stride == 0) - dst_stride = pf_get_nblocksx(&ps->block, w) * ps->block.size; + dst_stride = pf_get_nblocksx(&pt->block, w) * pt->block.size; - if (pipe_clip_tile(x, y, &w, &h, ps)) + if (pipe_clip_tile(x, y, &w, &h, pt)) return; - src = pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_READ); + src = screen->transfer_map(screen, pt); assert(src); if(!src) return; - pipe_copy_rect(dst, &ps->block, dst_stride, 0, 0, w, h, src, ps->stride, x, y); + pipe_copy_rect(dst, &pt->block, dst_stride, 0, 0, w, h, src, pt->stride, x, y); - pipe_surface_unmap(ps); + screen->transfer_unmap(screen, pt); } /** - * Move raw block of pixels from user memory to surface. - * This should be usable by any hw driver that has mappable surfaces. + * Move raw block of pixels from user memory to transfer object. */ void -pipe_put_tile_raw(struct pipe_surface *ps, +pipe_put_tile_raw(struct pipe_transfer *pt, uint x, uint y, uint w, uint h, const void *src, int src_stride) { + struct pipe_screen *screen = pt->texture->screen; void *dst; if (src_stride == 0) - src_stride = pf_get_nblocksx(&ps->block, w) * ps->block.size; + src_stride = pf_get_nblocksx(&pt->block, w) * pt->block.size; - if (pipe_clip_tile(x, y, &w, &h, ps)) + if (pipe_clip_tile(x, y, &w, &h, pt)) return; - dst = pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_WRITE); + dst = screen->transfer_map(screen, pt); assert(dst); if(!dst) return; - pipe_copy_rect(dst, &ps->block, ps->stride, x, y, w, h, src, src_stride, 0, 0); + pipe_copy_rect(dst, &pt->block, pt->stride, x, y, w, h, src, src_stride, 0, 0); - pipe_surface_unmap(ps); + screen->transfer_unmap(screen, pt); } @@ -460,7 +459,7 @@ l8_put_tile_rgba(ubyte *dst, for (j = 0; j < w; j++, pRow += 4) { unsigned r; r = float_to_ubyte(pRow[0]); - *dst++ = r; + *dst++ = (ubyte) r; } p += src_stride; } @@ -504,7 +503,7 @@ a8_put_tile_rgba(ubyte *dst, for (j = 0; j < w; j++, pRow += 4) { unsigned a; a = float_to_ubyte(pRow[3]); - *dst++ = a; + *dst++ = (ubyte) a; } p += src_stride; } @@ -634,7 +633,7 @@ i8_put_tile_rgba(ubyte *dst, for (j = 0; j < w; j++, pRow += 4) { unsigned r; r = float_to_ubyte(pRow[0]); - *dst++ = r; + *dst++ = (ubyte) r; } p += src_stride; } @@ -772,7 +771,7 @@ z24s8_get_tile_rgba(const unsigned *src, /*** PIPE_FORMAT_Z32_FLOAT ***/ /** - * Return each Z value as four floats. + * Return each Z value as four floats in [0,1]. */ static void z32f_get_tile_rgba(const float *src, @@ -977,49 +976,49 @@ pipe_tile_raw_to_rgba(enum pipe_format format, void -pipe_get_tile_rgba(struct pipe_surface *ps, +pipe_get_tile_rgba(struct pipe_transfer *pt, uint x, uint y, uint w, uint h, float *p) { unsigned dst_stride = w * 4; void *packed; - if (pipe_clip_tile(x, y, &w, &h, ps)) + if (pipe_clip_tile(x, y, &w, &h, pt)) return; - packed = MALLOC(pf_get_nblocks(&ps->block, w, h) * ps->block.size); + packed = MALLOC(pf_get_nblocks(&pt->block, w, h) * pt->block.size); if (!packed) return; - if(ps->format == PIPE_FORMAT_YCBCR || ps->format == PIPE_FORMAT_YCBCR_REV) + if(pt->format == PIPE_FORMAT_YCBCR || pt->format == PIPE_FORMAT_YCBCR_REV) assert((x & 1) == 0); - pipe_get_tile_raw(ps, x, y, w, h, packed, 0); + pipe_get_tile_raw(pt, x, y, w, h, packed, 0); - pipe_tile_raw_to_rgba(ps->format, packed, w, h, p, dst_stride); + pipe_tile_raw_to_rgba(pt->format, packed, w, h, p, dst_stride); FREE(packed); } void -pipe_put_tile_rgba(struct pipe_surface *ps, +pipe_put_tile_rgba(struct pipe_transfer *pt, uint x, uint y, uint w, uint h, const float *p) { unsigned src_stride = w * 4; void *packed; - if (pipe_clip_tile(x, y, &w, &h, ps)) + if (pipe_clip_tile(x, y, &w, &h, pt)) return; - packed = MALLOC(pf_get_nblocks(&ps->block, w, h) * ps->block.size); + packed = MALLOC(pf_get_nblocks(&pt->block, w, h) * pt->block.size); if (!packed) return; - switch (ps->format) { + switch (pt->format) { case PIPE_FORMAT_A8R8G8B8_UNORM: a8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride); break; @@ -1073,10 +1072,10 @@ pipe_put_tile_rgba(struct pipe_surface *ps, /*z24s8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/ break; default: - debug_printf("%s: unsupported format %s\n", __FUNCTION__, pf_name(ps->format)); + debug_printf("%s: unsupported format %s\n", __FUNCTION__, pf_name(pt->format)); } - pipe_put_tile_raw(ps, x, y, w, h, packed, 0); + pipe_put_tile_raw(pt, x, y, w, h, packed, 0); FREE(packed); } @@ -1086,62 +1085,63 @@ pipe_put_tile_rgba(struct pipe_surface *ps, * Get a block of Z values, converted to 32-bit range. */ void -pipe_get_tile_z(struct pipe_surface *ps, +pipe_get_tile_z(struct pipe_transfer *pt, uint x, uint y, uint w, uint h, uint *z) { + struct pipe_screen *screen = pt->texture->screen; const uint dstStride = w; ubyte *map; uint *pDest = z; uint i, j; - if (pipe_clip_tile(x, y, &w, &h, ps)) + if (pipe_clip_tile(x, y, &w, &h, pt)) return; - map = (ubyte *)pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_READ); + map = (ubyte *)screen->transfer_map(screen, pt); if (!map) { assert(0); return; } - switch (ps->format) { + switch (pt->format) { case PIPE_FORMAT_Z32_UNORM: { - const uint *pSrc - = (const uint *)(map + y * ps->stride + x*4); + const uint *ptrc + = (const uint *)(map + y * pt->stride + x*4); for (i = 0; i < h; i++) { - memcpy(pDest, pSrc, 4 * w); + memcpy(pDest, ptrc, 4 * w); pDest += dstStride; - pSrc += ps->stride/4; + ptrc += pt->stride/4; } } break; case PIPE_FORMAT_S8Z24_UNORM: case PIPE_FORMAT_X8Z24_UNORM: { - const uint *pSrc - = (const uint *)(map + y * ps->stride + x*4); + const uint *ptrc + = (const uint *)(map + y * pt->stride + x*4); for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { /* convert 24-bit Z to 32-bit Z */ - pDest[j] = (pSrc[j] << 8) | (pSrc[j] & 0xff); + pDest[j] = (ptrc[j] << 8) | (ptrc[j] & 0xff); } pDest += dstStride; - pSrc += ps->stride/4; + ptrc += pt->stride/4; } } break; case PIPE_FORMAT_Z16_UNORM: { - const ushort *pSrc - = (const ushort *)(map + y * ps->stride + x*2); + const ushort *ptrc + = (const ushort *)(map + y * pt->stride + x*2); for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { /* convert 16-bit Z to 32-bit Z */ - pDest[j] = (pSrc[j] << 16) | pSrc[j]; + pDest[j] = (ptrc[j] << 16) | ptrc[j]; } pDest += dstStride; - pSrc += ps->stride/2; + ptrc += pt->stride/2; } } break; @@ -1149,64 +1149,65 @@ pipe_get_tile_z(struct pipe_surface *ps, assert(0); } - pipe_surface_unmap(ps); + screen->transfer_unmap(screen, pt); } void -pipe_put_tile_z(struct pipe_surface *ps, +pipe_put_tile_z(struct pipe_transfer *pt, uint x, uint y, uint w, uint h, const uint *zSrc) { + struct pipe_screen *screen = pt->texture->screen; const uint srcStride = w; - const uint *pSrc = zSrc; + const uint *ptrc = zSrc; ubyte *map; uint i, j; - if (pipe_clip_tile(x, y, &w, &h, ps)) + if (pipe_clip_tile(x, y, &w, &h, pt)) return; - map = (ubyte *)pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_WRITE); + map = (ubyte *)screen->transfer_map(screen, pt); if (!map) { assert(0); return; } - switch (ps->format) { + switch (pt->format) { case PIPE_FORMAT_Z32_UNORM: { - uint *pDest = (uint *) (map + y * ps->stride + x*4); + uint *pDest = (uint *) (map + y * pt->stride + x*4); for (i = 0; i < h; i++) { - memcpy(pDest, pSrc, 4 * w); - pDest += ps->stride/4; - pSrc += srcStride; + memcpy(pDest, ptrc, 4 * w); + pDest += pt->stride/4; + ptrc += srcStride; } } break; case PIPE_FORMAT_S8Z24_UNORM: case PIPE_FORMAT_X8Z24_UNORM: { - uint *pDest = (uint *) (map + y * ps->stride + x*4); + uint *pDest = (uint *) (map + y * pt->stride + x*4); for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { /* convert 32-bit Z to 24-bit Z (0 stencil) */ - pDest[j] = pSrc[j] >> 8; + pDest[j] = ptrc[j] >> 8; } - pDest += ps->stride/4; - pSrc += srcStride; + pDest += pt->stride/4; + ptrc += srcStride; } } break; case PIPE_FORMAT_Z16_UNORM: { - ushort *pDest = (ushort *) (map + y * ps->stride + x*2); + ushort *pDest = (ushort *) (map + y * pt->stride + x*2); for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { /* convert 32-bit Z to 16-bit Z */ - pDest[j] = pSrc[j] >> 16; + pDest[j] = ptrc[j] >> 16; } - pDest += ps->stride/2; - pSrc += srcStride; + pDest += pt->stride/2; + ptrc += srcStride; } } break; @@ -1214,7 +1215,7 @@ pipe_put_tile_z(struct pipe_surface *ps, assert(0); } - pipe_surface_unmap(ps); + screen->transfer_unmap(screen, pt); } diff --git a/src/gallium/auxiliary/util/u_tile.h b/src/gallium/auxiliary/util/u_tile.h index a8ac805308..1453af38b8 100644 --- a/src/gallium/auxiliary/util/u_tile.h +++ b/src/gallium/auxiliary/util/u_tile.h @@ -30,24 +30,24 @@ #include "pipe/p_compiler.h" -struct pipe_surface; +struct pipe_transfer; /** - * Clip tile against surface dims. + * Clip tile against transfer dims. * \return TRUE if tile is totally clipped, FALSE otherwise */ static INLINE boolean -pipe_clip_tile(uint x, uint y, uint *w, uint *h, const struct pipe_surface *ps) +pipe_clip_tile(uint x, uint y, uint *w, uint *h, const struct pipe_transfer *pt) { - if (x >= ps->width) + if (x >= pt->width) return TRUE; - if (y >= ps->height) + if (y >= pt->height) return TRUE; - if (x + *w > ps->width) - *w = ps->width - x; - if (y + *h > ps->height) - *h = ps->height - y; + if (x + *w > pt->width) + *w = pt->width - x; + if (y + *h > pt->height) + *h = pt->height - y; return FALSE; } @@ -56,34 +56,34 @@ extern "C" { #endif void -pipe_get_tile_raw(struct pipe_surface *ps, +pipe_get_tile_raw(struct pipe_transfer *pt, uint x, uint y, uint w, uint h, void *p, int dst_stride); void -pipe_put_tile_raw(struct pipe_surface *ps, +pipe_put_tile_raw(struct pipe_transfer *pt, uint x, uint y, uint w, uint h, const void *p, int src_stride); void -pipe_get_tile_rgba(struct pipe_surface *ps, +pipe_get_tile_rgba(struct pipe_transfer *pt, uint x, uint y, uint w, uint h, float *p); void -pipe_put_tile_rgba(struct pipe_surface *ps, +pipe_put_tile_rgba(struct pipe_transfer *pt, uint x, uint y, uint w, uint h, const float *p); void -pipe_get_tile_z(struct pipe_surface *ps, +pipe_get_tile_z(struct pipe_transfer *pt, uint x, uint y, uint w, uint h, uint *z); void -pipe_put_tile_z(struct pipe_surface *ps, +pipe_put_tile_z(struct pipe_transfer *pt, uint x, uint y, uint w, uint h, const uint *z); diff --git a/src/gallium/auxiliary/util/u_time.c b/src/gallium/auxiliary/util/u_time.c index f84514165a..dde2c74fa8 100644 --- a/src/gallium/auxiliary/util/u_time.c +++ b/src/gallium/auxiliary/util/u_time.c @@ -200,7 +200,7 @@ util_time_timeout(const struct util_time *start, } -#if defined(PIPE_SUBSYSYEM_WINDOWS_DISPLAY) +#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) void util_time_sleep(unsigned usecs) { LONGLONG start, curr, end; diff --git a/src/gallium/auxiliary/util/u_timed_winsys.c b/src/gallium/auxiliary/util/u_timed_winsys.c index 8beb3b4c88..f237e12d73 100644 --- a/src/gallium/auxiliary/util/u_timed_winsys.c +++ b/src/gallium/auxiliary/util/u_timed_winsys.c @@ -29,7 +29,7 @@ * Authors: Keith Whitwell <keithw-at-tungstengraphics-dot-com> */ -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "u_timed_winsys.h" #include "util/u_memory.h" #include "util/u_time.h" @@ -121,7 +121,8 @@ timed_buffer_create(struct pipe_winsys *winsys, struct pipe_winsys *backend = timed_winsys(winsys)->backend; uint64_t start = time_start(); - struct pipe_buffer *buf = backend->buffer_create( backend, alignment, usage, size ); + struct pipe_buffer *buf = + backend->buffer_create( backend, alignment, usage, size ); time_finish(winsys, start, 0, __FUNCTION__); @@ -205,34 +206,18 @@ timed_flush_frontbuffer( struct pipe_winsys *winsys, -static struct pipe_surface * -timed_surface_alloc(struct pipe_winsys *winsys) -{ - struct pipe_winsys *backend = timed_winsys(winsys)->backend; - uint64_t start = time_start(); - - struct pipe_surface *surf = backend->surface_alloc( backend ); - - time_finish(winsys, start, 6, __FUNCTION__); - - return surf; -} - - - -static int -timed_surface_alloc_storage(struct pipe_winsys *winsys, - struct pipe_surface *surf, +static struct pipe_buffer * +timed_surface_buffer_create(struct pipe_winsys *winsys, unsigned width, unsigned height, enum pipe_format format, - unsigned flags, - unsigned tex_usage) + unsigned usage, + unsigned *stride) { struct pipe_winsys *backend = timed_winsys(winsys)->backend; uint64_t start = time_start(); - int ret = backend->surface_alloc_storage( backend, surf, width, height, - format, flags, tex_usage ); + struct pipe_buffer *ret = backend->surface_buffer_create( backend, width, height, + format, usage, stride ); time_finish(winsys, start, 7, __FUNCTION__); @@ -240,19 +225,6 @@ timed_surface_alloc_storage(struct pipe_winsys *winsys, } -static void -timed_surface_release(struct pipe_winsys *winsys, struct pipe_surface **s) -{ - struct pipe_winsys *backend = timed_winsys(winsys)->backend; - uint64_t start = time_start(); - - backend->surface_release( backend, s ); - - time_finish(winsys, start, 8, __FUNCTION__); -} - - - static const char * timed_get_name( struct pipe_winsys *winsys ) { @@ -329,11 +301,9 @@ struct pipe_winsys *u_timed_winsys_create( struct pipe_winsys *backend ) ws->base.buffer_unmap = timed_buffer_unmap; ws->base.buffer_destroy = timed_buffer_destroy; ws->base.buffer_create = timed_buffer_create; + ws->base.surface_buffer_create = timed_surface_buffer_create; ws->base.flush_frontbuffer = timed_flush_frontbuffer; ws->base.get_name = timed_get_name; - ws->base.surface_alloc = timed_surface_alloc; - ws->base.surface_alloc_storage = timed_surface_alloc_storage; - ws->base.surface_release = timed_surface_release; ws->base.fence_reference = timed_fence_reference; ws->base.fence_signalled = timed_fence_signalled; ws->base.fence_finish = timed_fence_finish; diff --git a/src/gallium/drivers/Makefile b/src/gallium/drivers/Makefile index 6161cb6ff8..9fe9b2c11d 100644 --- a/src/gallium/drivers/Makefile +++ b/src/gallium/drivers/Makefile @@ -1,20 +1,12 @@ +# src/gallium/drivers/Makefile TOP = ../../.. include $(TOP)/configs/current +SUBDIRS = $(GALLIUM_DRIVERS_DIRS) -SUBDIRS = $(GALLIUM_DRIVER_DIRS) - - -default: subdirs - - -subdirs: +default install clean: @for dir in $(SUBDIRS) ; do \ if [ -d $$dir ] ; then \ - (cd $$dir && $(MAKE)) || exit 1 ; \ + (cd $$dir && $(MAKE) $@) || exit 1; \ fi \ done - - -clean: - rm -f `find . -name \*.[oa]` diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index cb0631baf5..1f6860da11 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -49,6 +49,15 @@ } + +#define JOIN(x, y) JOIN_AGAIN(x, y) +#define JOIN_AGAIN(x, y) x ## y + +#define STATIC_ASSERT(e) \ +{typedef char JOIN(assertion_failed_at_line_, __LINE__) [(e) ? 1 : -1];} + + + /** for sanity checking */ #define ASSERT_ALIGN16(ptr) \ ASSERT((((unsigned long) (ptr)) & 0xf) == 0); @@ -64,9 +73,13 @@ #define ROUNDUP16(k) (((k) + 0xf) & ~0xf) -#define CELL_MAX_SPUS 6 +#define CELL_MAX_SPUS 8 #define CELL_MAX_SAMPLERS 4 +#define CELL_MAX_TEXTURE_LEVELS 12 /* 2k x 2k */ +#define CELL_MAX_CONSTANTS 32 /**< number of float[4] constants */ +#define CELL_MAX_WIDTH 1024 /**< max framebuffer width */ +#define CELL_MAX_HEIGHT 1024 /**< max framebuffer width */ #define TILE_SIZE 32 @@ -94,48 +107,106 @@ #define CELL_CMD_STATE_BIND_VS 18 #define CELL_CMD_STATE_FRAGMENT_PROGRAM 19 #define CELL_CMD_STATE_ATTRIB_FETCH 20 -#define CELL_CMD_VS_EXECUTE 22 -#define CELL_CMD_FLUSH_BUFFER_RANGE 23 +#define CELL_CMD_STATE_FS_CONSTANTS 21 +#define CELL_CMD_STATE_RASTERIZER 22 +#define CELL_CMD_VS_EXECUTE 23 +#define CELL_CMD_FLUSH_BUFFER_RANGE 24 +#define CELL_CMD_FENCE 25 +/** Command/batch buffers */ #define CELL_NUM_BUFFERS 4 #define CELL_BUFFER_SIZE (4*1024) /**< 16KB would be the max */ #define CELL_BUFFER_STATUS_FREE 10 #define CELL_BUFFER_STATUS_USED 20 +/** Debug flags */ +#define CELL_DEBUG_CHECKER (1 << 0) +#define CELL_DEBUG_ASM (1 << 1) +#define CELL_DEBUG_SYNC (1 << 2) +#define CELL_DEBUG_FRAGMENT_OPS (1 << 3) +#define CELL_DEBUG_FRAGMENT_OP_FALLBACK (1 << 4) +#define CELL_DEBUG_CMD (1 << 5) +#define CELL_DEBUG_CACHE (1 << 6) -#define CELL_DEBUG_CHECKER (1 << 0) -#define CELL_DEBUG_SYNC (1 << 1) +#define CELL_FENCE_IDLE 0 +#define CELL_FENCE_EMITTED 1 +#define CELL_FENCE_SIGNALLED 2 +#define CELL_FACING_FRONT 0 +#define CELL_FACING_BACK 1 +struct cell_fence +{ + /** There's a 16-byte status qword per SPU */ + volatile uint status[CELL_MAX_SPUS][4]; +}; -/** Max instructions for doing per-fragment operations */ -#define SPU_MAX_FRAGMENT_OPS_INSTS 64 +#ifdef __SPU__ +typedef vector unsigned int opcode_t; +#else +typedef unsigned int opcode_t[4]; +#endif + +/** + * Fence command sent to SPUs. In response, the SPUs will write + * CELL_FENCE_STATUS_SIGNALLED back to the fence status word in main memory. + */ +struct cell_command_fence +{ + opcode_t opcode; /**< CELL_CMD_FENCE */ + struct cell_fence *fence; + uint32_t pad_[3]; +}; /** * Command to specify per-fragment operations state and generated code. + * Note that this is a variant-length structure, allocated with as + * much memory as needed to hold the generated code; the "code" + * field *must* be the last field in the structure. Also, the entire + * length of the structure (including the variant code field) must be + * a multiple of 8 bytes; we require that this structure itself be + * a multiple of 8 bytes, and that the generated code also be a multiple + * of 8 bytes. + * + * Also note that the dsa, blend, blend_color fields are really only needed + * for the fallback/C per-pixel code. They're not used when we generate + * dynamic SPU fragment code (which is the normal case), and will eventually + * be removed from this structure. */ struct cell_command_fragment_ops { - uint64_t opcode; /**< CELL_CMD_STATE_FRAGMENT_OPS */ + opcode_t opcode; /**< CELL_CMD_STATE_FRAGMENT_OPS */ + + /* Fields for the fallback case */ struct pipe_depth_stencil_alpha_state dsa; struct pipe_blend_state blend; - unsigned code[SPU_MAX_FRAGMENT_OPS_INSTS]; + struct pipe_blend_color blend_color; + + /* Fields for the generated SPU code */ + unsigned total_code_size; + unsigned front_code_index; + unsigned back_code_index; + /* this field has variant length, and must be the last field in + * the structure + */ + unsigned code[0]; }; /** Max instructions for fragment programs */ -#define SPU_MAX_FRAGMENT_PROGRAM_INSTS 128 +#define SPU_MAX_FRAGMENT_PROGRAM_INSTS 512 /** - * Command to send a fragment progra to SPUs. + * Command to send a fragment program to SPUs. */ struct cell_command_fragment_program { - uint64_t opcode; /**< CELL_CMD_STATE_FRAGMENT_PROGRAM */ + opcode_t opcode; /**< CELL_CMD_STATE_FRAGMENT_PROGRAM */ uint num_inst; /**< Number of instructions */ + uint32_t pad[3]; unsigned code[SPU_MAX_FRAGMENT_PROGRAM_INSTS]; }; @@ -145,10 +216,21 @@ struct cell_command_fragment_program */ struct cell_command_framebuffer { - uint64_t opcode; /**< CELL_CMD_FRAMEBUFFER */ + opcode_t opcode; /**< CELL_CMD_STATE_FRAMEBUFFER */ int width, height; void *color_start, *depth_start; enum pipe_format color_format, depth_format; + uint32_t pad_[2]; +}; + + +/** + * Tell SPUs about rasterizer state. + */ +struct cell_command_rasterizer +{ + opcode_t opcode; /**< CELL_CMD_STATE_RASTERIZER */ + struct pipe_rasterizer_state rasterizer; }; @@ -157,9 +239,10 @@ struct cell_command_framebuffer */ struct cell_command_clear_surface { - uint64_t opcode; /**< CELL_CMD_CLEAR_SURFACE */ + opcode_t opcode; /**< CELL_CMD_CLEAR_SURFACE */ uint surface; /**< Temporary: 0=color, 1=Z */ uint value; + uint32_t pad[2]; }; @@ -206,7 +289,7 @@ struct cell_shader_info #define SPU_VERTS_PER_BATCH 64 struct cell_command_vs { - uint64_t opcode; /**< CELL_CMD_VS_EXECUTE */ + opcode_t opcode; /**< CELL_CMD_VS_EXECUTE */ uint64_t vOut[SPU_VERTS_PER_BATCH]; unsigned num_elts; unsigned elts[SPU_VERTS_PER_BATCH]; @@ -218,7 +301,7 @@ struct cell_command_vs struct cell_command_render { - uint64_t opcode; /**< CELL_CMD_RENDER */ + opcode_t opcode; /**< CELL_CMD_RENDER */ uint prim_type; /**< PIPE_PRIM_x */ uint num_verts; uint vertex_size; /**< bytes per vertex */ @@ -227,44 +310,51 @@ struct cell_command_render float xmin, ymin, xmax, ymax; /* XXX another dummy field */ uint min_index; boolean inline_verts; + uint32_t pad_[1]; }; struct cell_command_release_verts { - uint64_t opcode; /**< CELL_CMD_RELEASE_VERTS */ + opcode_t opcode; /**< CELL_CMD_RELEASE_VERTS */ uint vertex_buf; /**< in [0, CELL_NUM_BUFFERS-1] */ + uint32_t pad_[3]; }; struct cell_command_sampler { - uint64_t opcode; /**< CELL_CMD_STATE_SAMPLER */ + opcode_t opcode; /**< CELL_CMD_STATE_SAMPLER */ uint unit; struct pipe_sampler_state state; + uint32_t pad_[1]; }; struct cell_command_texture { - uint64_t opcode; /**< CELL_CMD_STATE_TEXTURE */ + opcode_t opcode; /**< CELL_CMD_STATE_TEXTURE */ + uint target; /**< PIPE_TEXTURE_x */ uint unit; - void *start; /**< Address in main memory */ - ushort width, height; + void *start[CELL_MAX_TEXTURE_LEVELS]; /**< Address in main memory */ + ushort width[CELL_MAX_TEXTURE_LEVELS]; + ushort height[CELL_MAX_TEXTURE_LEVELS]; + ushort depth[CELL_MAX_TEXTURE_LEVELS]; }; -/** XXX unions don't seem to work */ -/* XXX this should go away; all commands should be placed in batch buffers */ -struct cell_command +#define MAX_SPU_FUNCTIONS 12 +/** + * Used to tell the PPU about the address of particular functions in the + * SPU's address space. + */ +struct cell_spu_function_info { -#if 0 - struct cell_command_framebuffer fb; - struct cell_command_clear_surface clear; - struct cell_command_render render; -#endif - struct cell_command_vs vs; -} ALIGN16_ATTRIB; + uint num; + char names[MAX_SPU_FUNCTIONS][16]; + uint addrs[MAX_SPU_FUNCTIONS]; + char pad[12]; /**< Pad struct to multiple of 16 bytes (256 currently) */ +}; /** This is the object passed to spe_create_thread() */ @@ -273,11 +363,13 @@ struct cell_init_info unsigned id; unsigned num_spus; unsigned debug_flags; /**< mask of CELL_DEBUG_x flags */ - struct cell_command *cmd; + float inv_timebase; /**< 1.0/timebase, for perf measurement */ /** Buffers for command batches, vertex/index data */ ubyte *buffers[CELL_NUM_BUFFERS]; uint *buffer_status; /**< points at cell_context->buffer_status */ + + struct cell_spu_function_info *spu_functions; } ALIGN16_ATTRIB; diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile index b28f4c5c31..c92f8e5cba 100644 --- a/src/gallium/drivers/cell/ppu/Makefile +++ b/src/gallium/drivers/cell/ppu/Makefile @@ -24,6 +24,7 @@ SOURCES = \ cell_clear.c \ cell_context.c \ cell_draw_arrays.c \ + cell_fence.c \ cell_flush.c \ cell_gen_fragment.c \ cell_gen_fp.c \ @@ -38,8 +39,7 @@ SOURCES = \ cell_texture.c \ cell_vbuf.c \ cell_vertex_fetch.c \ - cell_vertex_shader.c \ - cell_winsys.c + cell_vertex_shader.c OBJECTS = $(SOURCES:.c=.o) \ @@ -54,6 +54,9 @@ INCLUDE_DIRS = \ $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ +.c.s: + $(CC) -S $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ + default: $(CELL_LIB) diff --git a/src/gallium/drivers/cell/ppu/cell_batch.c b/src/gallium/drivers/cell/ppu/cell_batch.c index 16882c0129..fe144f8b84 100644 --- a/src/gallium/drivers/cell/ppu/cell_batch.c +++ b/src/gallium/drivers/cell/ppu/cell_batch.c @@ -28,6 +28,7 @@ #include "cell_context.h" #include "cell_batch.h" +#include "cell_fence.h" #include "cell_spu.h" @@ -42,7 +43,9 @@ uint cell_get_empty_buffer(struct cell_context *cell) { - uint buf = 0, tries = 0; + static uint prev_buffer = 0; + uint buf = (prev_buffer + 1) % CELL_NUM_BUFFERS; + uint tries = 0; /* Find a buffer that's marked as free by all SPUs */ while (1) { @@ -58,8 +61,13 @@ cell_get_empty_buffer(struct cell_context *cell) cell->buffer_status[spu][buf][0] = CELL_BUFFER_STATUS_USED; } /* - printf("PPU: ALLOC BUFFER %u\n", buf); + printf("PPU: ALLOC BUFFER %u, %u tries\n", buf, tries); */ + prev_buffer = buf; + + /* release tex buffer associated w/ prev use of this batch buf */ + cell_free_fenced_buffers(cell, &cell->fenced_buffers[buf]); + return buf; } } @@ -82,6 +90,38 @@ cell_get_empty_buffer(struct cell_context *cell) /** + * Append a fence command to the current batch buffer. + * Note that we're sure there's always room for this because of the + * adjusted size check in cell_batch_free_space(). + */ +static void +emit_fence(struct cell_context *cell) +{ + const uint batch = cell->cur_batch; + const uint size = cell->buffer_size[batch]; + struct cell_command_fence *fence_cmd; + struct cell_fence *fence = &cell->fenced_buffers[batch].fence; + uint i; + + /* set fence status to emitted, not yet signalled */ + for (i = 0; i < cell->num_spus; i++) { + fence->status[i][0] = CELL_FENCE_EMITTED; + } + + STATIC_ASSERT(sizeof(struct cell_command_fence) % 16 == 0); + ASSERT(size % 16 == 0); + ASSERT(size + sizeof(struct cell_command_fence) <= CELL_BUFFER_SIZE); + + fence_cmd = (struct cell_command_fence *) (cell->buffer[batch] + size); + fence_cmd->opcode[0] = CELL_CMD_FENCE; + fence_cmd->fence = fence; + + /* update batch buffer size */ + cell->buffer_size[batch] = size + sizeof(struct cell_command_fence); +} + + +/** * Flush the current batch buffer to the SPUs. * An empty buffer will be found and set as the new current batch buffer * for subsequent commands/data. @@ -91,7 +131,7 @@ cell_batch_flush(struct cell_context *cell) { static boolean flushing = FALSE; uint batch = cell->cur_batch; - const uint size = cell->buffer_size[batch]; + uint size = cell->buffer_size[batch]; uint spu, cmd_word; assert(!flushing); @@ -99,6 +139,14 @@ cell_batch_flush(struct cell_context *cell) if (size == 0) return; + /* Before we use this batch buffer, make sure any fenced texture buffers + * are released. + */ + if (cell->fenced_buffers[batch].head) { + emit_fence(cell); + size = cell->buffer_size[batch]; + } + flushing = TRUE; assert(batch < CELL_NUM_BUFFERS); @@ -139,74 +187,24 @@ uint cell_batch_free_space(const struct cell_context *cell) { uint free = CELL_BUFFER_SIZE - cell->buffer_size[cell->cur_batch]; + free -= sizeof(struct cell_command_fence); return free; } /** - * Append data to the current batch buffer. - * \param data address of block of bytes to append - * \param bytes size of block of bytes - */ -void -cell_batch_append(struct cell_context *cell, const void *data, uint bytes) -{ - uint size; - - ASSERT(bytes % 8 == 0); - ASSERT(bytes <= CELL_BUFFER_SIZE); - ASSERT(cell->cur_batch >= 0); - -#ifdef ASSERT - { - uint spu; - for (spu = 0; spu < cell->num_spus; spu++) { - ASSERT(cell->buffer_status[spu][cell->cur_batch][0] - == CELL_BUFFER_STATUS_USED); - } - } -#endif - - size = cell->buffer_size[cell->cur_batch]; - - if (size + bytes > CELL_BUFFER_SIZE) { - cell_batch_flush(cell); - size = 0; - } - - ASSERT(size + bytes <= CELL_BUFFER_SIZE); - - memcpy(cell->buffer[cell->cur_batch] + size, data, bytes); - - cell->buffer_size[cell->cur_batch] = size + bytes; -} - - -/** * Allocate space in the current batch buffer for 'bytes' space. + * Bytes must be a multiple of 16 bytes. Allocation will be 16 byte aligned. * \return address in batch buffer to put data */ void * -cell_batch_alloc(struct cell_context *cell, uint bytes) -{ - return cell_batch_alloc_aligned(cell, bytes, 1); -} - - -/** - * Same as \sa cell_batch_alloc, but return an address at a particular - * alignment. - */ -void * -cell_batch_alloc_aligned(struct cell_context *cell, uint bytes, - uint alignment) +cell_batch_alloc16(struct cell_context *cell, uint bytes) { void *pos; - uint size, padbytes; + uint size; - ASSERT(bytes % 8 == 0); + ASSERT(bytes % 16 == 0); ASSERT(bytes <= CELL_BUFFER_SIZE); - ASSERT(alignment > 0); ASSERT(cell->cur_batch >= 0); #ifdef ASSERT @@ -221,17 +219,12 @@ cell_batch_alloc_aligned(struct cell_context *cell, uint bytes, size = cell->buffer_size[cell->cur_batch]; - padbytes = (alignment - (size % alignment)) % alignment; - - if (padbytes + size + bytes > CELL_BUFFER_SIZE) { + if (bytes > cell_batch_free_space(cell)) { cell_batch_flush(cell); size = 0; } - else { - size += padbytes; - } - ASSERT(size % alignment == 0); + ASSERT(size % 16 == 0); ASSERT(size + bytes <= CELL_BUFFER_SIZE); pos = (void *) (cell->buffer[cell->cur_batch] + size); diff --git a/src/gallium/drivers/cell/ppu/cell_batch.h b/src/gallium/drivers/cell/ppu/cell_batch.h index f74dd60079..290136031a 100644 --- a/src/gallium/drivers/cell/ppu/cell_batch.h +++ b/src/gallium/drivers/cell/ppu/cell_batch.h @@ -44,15 +44,8 @@ cell_batch_flush(struct cell_context *cell); extern uint cell_batch_free_space(const struct cell_context *cell); -extern void -cell_batch_append(struct cell_context *cell, const void *data, uint bytes); - -extern void * -cell_batch_alloc(struct cell_context *cell, uint bytes); - extern void * -cell_batch_alloc_aligned(struct cell_context *cell, uint bytes, - uint alignment); +cell_batch_alloc16(struct cell_context *cell, uint bytes); extern void cell_init_batch_buffers(struct cell_context *cell); diff --git a/src/gallium/drivers/cell/ppu/cell_clear.c b/src/gallium/drivers/cell/ppu/cell_clear.c index c9c0c721bb..edc06747ac 100644 --- a/src/gallium/drivers/cell/ppu/cell_clear.c +++ b/src/gallium/drivers/cell/ppu/cell_clear.c @@ -70,18 +70,12 @@ void cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps, unsigned clearValue) { - struct pipe_screen *screen = pipe->screen; struct cell_context *cell = cell_context(pipe); uint surfIndex; if (cell->dirty) cell_update_derived(cell); - - if (!cell->cbuf_map[0]) - cell->cbuf_map[0] = screen->surface_map(screen, ps, - PIPE_BUFFER_USAGE_GPU_WRITE); - if (ps == cell->framebuffer.zsbuf) { /* clear z/stencil buffer */ surfIndex = 1; @@ -99,11 +93,25 @@ cell_clear_surface(struct pipe_context *pipe, struct pipe_surface *ps, /* Build a CLEAR command and place it in the current batch buffer */ { + STATIC_ASSERT(sizeof(struct cell_command_clear_surface) % 16 == 0); struct cell_command_clear_surface *clr = (struct cell_command_clear_surface *) - cell_batch_alloc(cell, sizeof(*clr)); - clr->opcode = CELL_CMD_CLEAR_SURFACE; + cell_batch_alloc16(cell, sizeof(*clr)); + clr->opcode[0] = CELL_CMD_CLEAR_SURFACE; clr->surface = surfIndex; clr->value = clearValue; } + + /* Technically, the surface's contents are now known and cleared, + * so we could set the status to PIPE_SURFACE_STATUS_CLEAR. But + * it turns out it's quite painful to recognize when any particular + * surface goes from PIPE_SURFACE_STATUS_CLEAR to + * PIPE_SURFACE_STATUS_DEFINED (i.e. with known contents), because + * the drawing commands could be operating on numerous draw buffers, + * which we'd have to iterate through to set all their stati... + * For now, we cheat a bit and set the surface's status to DEFINED + * right here. Later we should revisit this and set the status to + * CLEAR here, and find a better place to set the status to DEFINED. + */ + ps->status = PIPE_SURFACE_STATUS_DEFINED; } diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index 71f1a3049d..ae82ded334 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -36,7 +36,7 @@ #include "pipe/p_defines.h" #include "pipe/p_format.h" #include "util/u_memory.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_screen.h" #include "draw/draw_context.h" @@ -47,6 +47,7 @@ #include "cell_clear.h" #include "cell_context.h" #include "cell_draw_arrays.h" +#include "cell_fence.h" #include "cell_flush.h" #include "cell_state.h" #include "cell_surface.h" @@ -62,6 +63,8 @@ cell_destroy_context( struct pipe_context *pipe ) { struct cell_context *cell = cell_context(pipe); + util_delete_keymap(cell->fragment_ops_cache, NULL); + cell_spu_exit(cell); align_free(cell); @@ -85,13 +88,16 @@ cell_draw_create(struct cell_context *cell) } -#ifdef DEBUG static const struct debug_named_value cell_debug_flags[] = { {"checker", CELL_DEBUG_CHECKER},/**< modulate tile clear color by SPU ID */ + {"asm", CELL_DEBUG_ASM}, /**< dump SPU asm code */ {"sync", CELL_DEBUG_SYNC}, /**< SPUs do synchronous DMA */ + {"fragops", CELL_DEBUG_FRAGMENT_OPS}, /**< SPUs emit fragment ops debug messages*/ + {"fragopfallback", CELL_DEBUG_FRAGMENT_OP_FALLBACK}, /**< SPUs use reference implementation for fragment ops*/ + {"cmd", CELL_DEBUG_CMD}, /**< SPUs dump command buffer info */ + {"cache", CELL_DEBUG_CACHE}, /**< report texture cache stats on exit */ {NULL, 0} }; -#endif struct pipe_context * @@ -99,6 +105,7 @@ cell_create_context(struct pipe_screen *screen, struct cell_winsys *cws) { struct cell_context *cell; + uint i; /* some fields need to be 16-byte aligned, so align the whole object */ cell = (struct cell_context*) align_malloc(sizeof(struct cell_context), 16); @@ -125,11 +132,14 @@ cell_create_context(struct pipe_screen *screen, cell_init_state_functions(cell); cell_init_shader_functions(cell); cell_init_surface_functions(cell); - cell_init_texture_functions(cell); cell_init_vertex_functions(cell); cell->draw = cell_draw_create(cell); + /* Create cache of fragment ops generated code */ + cell->fragment_ops_cache = + util_new_keymap(sizeof(struct cell_fragment_ops_key), ~0, NULL); + cell_init_vbuf(cell); draw_set_rasterize_stage(cell->draw, cell->vbuf); @@ -143,17 +153,31 @@ cell_create_context(struct pipe_screen *screen, cell_debug_flags, 0 ); + for (i = 0; i < CELL_NUM_BUFFERS; i++) + cell_fence_init(&cell->fenced_buffers[i].fence); + + /* * SPU stuff */ - cell->num_spus = 6; - /* XXX is this in SDK 3.0 only? - cell->num_spus = spe_cpu_info_get(SPE_COUNT_PHYSICAL_SPES, -1); - */ + /* This call only works with SDK 3.0. Anyone still using 2.1??? */ + cell->num_cells = spe_cpu_info_get(SPE_COUNT_PHYSICAL_CPU_NODES, -1); + cell->num_spus = spe_cpu_info_get(SPE_COUNT_USABLE_SPES, -1); + if (cell->debug_flags) { + printf("Cell: found %d Cell(s) with %u SPUs\n", + cell->num_cells, cell->num_spus); + } + if (getenv("CELL_NUM_SPUS")) { + cell->num_spus = atoi(getenv("CELL_NUM_SPUS")); + assert(cell->num_spus > 0); + } cell_start_spus(cell); cell_init_batch_buffers(cell); + /* make sure SPU initializations are done before proceeding */ + cell_flush_int(cell, CELL_FLUSH_WAIT); + return &cell->pipe; } diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index 14914b9c6f..ca03dc1511 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -38,6 +38,7 @@ #include "cell/common.h" #include "rtasm/rtasm_ppc_spe.h" #include "tgsi/tgsi_scan.h" +#include "util/u_keymap.h" struct cell_vbuf_render; @@ -67,31 +68,29 @@ struct cell_fragment_shader_state /** - * Cell blend state atom, subclass of pipe_blend_state. + * Key for mapping per-fragment state to cached SPU machine code. + * keymap(cell_fragment_ops_key) => cell_command_fragment_ops */ -struct cell_blend_state +struct cell_fragment_ops_key { - struct pipe_blend_state base; - - /** - * Generated code to perform alpha blending - */ - struct spe_function code; + struct pipe_blend_state blend; + struct pipe_blend_color blend_color; + struct pipe_depth_stencil_alpha_state dsa; + enum pipe_format color_format; + enum pipe_format zs_format; }; +struct cell_buffer_node; + /** - * Cell depth/stencil/alpha state atom, subclass of - * pipe_depth_stencil_alpha_state. + * Fenced buffer list. List of buffers which can be unreferenced after + * the fence has been executed/signalled. */ -struct cell_depth_stencil_alpha_state +struct cell_buffer_list { - struct pipe_depth_stencil_alpha_state base; - - /** - * Generated code to perform alpha, stencil, and depth testing on the SPE - */ - struct spe_function code; + struct cell_fence fence ALIGN16_ATTRIB; + struct cell_buffer_node *head; }; @@ -104,10 +103,10 @@ struct cell_context struct cell_winsys *winsys; - const struct cell_blend_state *blend; + const struct pipe_blend_state *blend; const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS]; uint num_samplers; - const struct cell_depth_stencil_alpha_state *depth_stencil; + const struct pipe_depth_stencil_alpha_state *depth_stencil; const struct pipe_rasterizer_state *rasterizer; const struct cell_vertex_shader_state *vs; const struct cell_fragment_shader_state *fs; @@ -128,6 +127,9 @@ struct cell_context struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; uint num_vertex_elements; + struct pipe_transfer *cbuf_transfer[PIPE_MAX_COLOR_BUFS]; + struct pipe_transfer *zsbuf_transfer; + ubyte *cbuf_map[PIPE_MAX_COLOR_BUFS]; ubyte *zsbuf_map; @@ -135,6 +137,11 @@ struct cell_context uint *tex_map; uint dirty; + uint dirty_textures; /* bitmask of texture units */ + uint dirty_samplers; /* bitmask of sampler units */ + + /** Cache of code generated for per-fragment ops */ + struct keymap *fragment_ops_cache; /** The primitive drawing context */ struct draw_context *draw; @@ -149,8 +156,9 @@ struct cell_context /** Mapped constant buffers */ void *mapped_constants[PIPE_SHADER_TYPES]; + struct cell_spu_function_info spu_functions ALIGN16_ATTRIB; - uint num_spus; + uint num_cells, num_spus; /** Buffers for command batches, vertex/index data */ uint buffer_size[CELL_NUM_BUFFERS]; @@ -162,6 +170,14 @@ struct cell_context uint buffer_status[CELL_MAX_SPUS][CELL_NUM_BUFFERS][4] ALIGN16_ATTRIB; + /** Associated with each command/batch buffer is a list of pipe_buffers + * that are fenced. When the last command in a buffer is executed, the + * fence will be signalled, indicating that any pipe_buffers preceeding + * that fence can be unreferenced (and probably freed). + */ + struct cell_buffer_list fenced_buffers[CELL_NUM_BUFFERS]; + + struct spe_function attrib_fetch; unsigned attrib_fetch_offsets[PIPE_MAX_ATTRIBS]; diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c index 880d535320..644496db40 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -33,7 +33,7 @@ #include "pipe/p_defines.h" #include "pipe/p_context.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_inlines.h" #include "cell_context.h" @@ -51,9 +51,9 @@ cell_map_constant_buffers(struct cell_context *sp) struct pipe_winsys *ws = sp->pipe.winsys; uint i; for (i = 0; i < 2; i++) { - if (sp->constants[i].size) { + if (sp->constants[i].buffer && sp->constants[i].buffer->size) { sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i].buffer, - PIPE_BUFFER_USAGE_CPU_READ); + PIPE_BUFFER_USAGE_CPU_READ); cell_flush_buffer_range(sp, sp->mapped_constants[i], sp->constants[i].buffer->size); } @@ -61,7 +61,7 @@ cell_map_constant_buffers(struct cell_context *sp) draw_set_mapped_constant_buffer(sp->draw, sp->mapped_constants[PIPE_SHADER_VERTEX], - sp->constants[PIPE_SHADER_VERTEX].size); + sp->constants[PIPE_SHADER_VERTEX].buffer->size); } static void @@ -70,7 +70,7 @@ cell_unmap_constant_buffers(struct cell_context *sp) struct pipe_winsys *ws = sp->pipe.winsys; uint i; for (i = 0; i < 2; i++) { - if (sp->constants[i].size) + if (sp->constants[i].buffer && sp->constants[i].buffer->size) ws->buffer_unmap(ws, sp->constants[i].buffer); sp->mapped_constants[i] = NULL; } diff --git a/src/gallium/drivers/cell/ppu/cell_fence.c b/src/gallium/drivers/cell/ppu/cell_fence.c new file mode 100644 index 0000000000..867b5dcaa0 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_fence.c @@ -0,0 +1,168 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <unistd.h> +#include "util/u_memory.h" +#include "pipe/p_inlines.h" +#include "cell_context.h" +#include "cell_batch.h" +#include "cell_fence.h" +#include "cell_texture.h" + + +void +cell_fence_init(struct cell_fence *fence) +{ + uint i; + ASSERT_ALIGN16(fence->status); + for (i = 0; i < CELL_MAX_SPUS; i++) { + fence->status[i][0] = CELL_FENCE_IDLE; + } +} + + +boolean +cell_fence_signalled(const struct cell_context *cell, + const struct cell_fence *fence) +{ + uint i; + for (i = 0; i < cell->num_spus; i++) { + if (fence->status[i][0] != CELL_FENCE_SIGNALLED) + return FALSE; + /*assert(fence->status[i][0] == CELL_FENCE_EMITTED);*/ + } + return TRUE; +} + + +void +cell_fence_finish(const struct cell_context *cell, + const struct cell_fence *fence) +{ + while (!cell_fence_signalled(cell, fence)) { + usleep(10); + } + +#ifdef DEBUG + { + uint i; + for (i = 0; i < cell->num_spus; i++) { + assert(fence->status[i][0] == CELL_FENCE_SIGNALLED); + } + } +#endif +} + + + + +struct cell_buffer_node +{ + struct pipe_buffer *buffer; + struct cell_buffer_node *next; +}; + + +static void +cell_add_buffer_to_list(struct cell_context *cell, + struct cell_buffer_list *list, + struct pipe_buffer *buffer) +{ + struct pipe_screen *ps = cell->pipe.screen; + struct cell_buffer_node *node = CALLOC_STRUCT(cell_buffer_node); + /* create new list node which references the buffer, insert at head */ + if (node) { + pipe_buffer_reference(ps, &node->buffer, buffer); + node->next = list->head; + list->head = node; + } +} + + +/** + * Wait for completion of the given fence, then unreference any buffers + * on the list. + * This typically unrefs/frees texture buffers after any rendering which uses + * them has completed. + */ +void +cell_free_fenced_buffers(struct cell_context *cell, + struct cell_buffer_list *list) +{ + if (list->head) { + struct pipe_screen *ps = cell->pipe.screen; + struct cell_buffer_node *node; + + cell_fence_finish(cell, &list->fence); + + /* traverse the list, unreferencing buffers, freeing nodes */ + node = list->head; + while (node) { + struct cell_buffer_node *next = node->next; + assert(node->buffer); + pipe_buffer_unmap(ps, node->buffer); +#if 0 + printf("Unref buffer %p\n", node->buffer); + if (node->buffer->refcount == 1) + printf(" Delete!\n"); +#endif + pipe_buffer_reference(ps, &node->buffer, NULL); + FREE(node); + node = next; + } + list->head = NULL; + } +} + + +/** + * This should be called for each render command. + * Any texture buffers that are current bound will be added to a fenced + * list to be freed later when the fence is executed/signalled. + */ +void +cell_add_fenced_textures(struct cell_context *cell) +{ + struct cell_buffer_list *list = &cell->fenced_buffers[cell->cur_batch]; + uint i; + + for (i = 0; i < cell->num_textures; i++) { + struct cell_texture *ct = cell->texture[i]; + if (ct) { + uint level; + for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) { + if (ct->tiled_buffer[level]) { +#if 0 + printf("Adding texture %p buffer %p to list\n", + ct, ct->tiled_buffer[level]); +#endif + cell_add_buffer_to_list(cell, list, ct->tiled_buffer[level]); + } + } + } + } +} diff --git a/src/gallium/drivers/cell/ppu/cell_fence.h b/src/gallium/drivers/cell/ppu/cell_fence.h new file mode 100644 index 0000000000..536b4ba411 --- /dev/null +++ b/src/gallium/drivers/cell/ppu/cell_fence.h @@ -0,0 +1,57 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CELL_FENCE_H +#define CELL_FENCE_H + + +extern void +cell_fence_init(struct cell_fence *fence); + + +extern boolean +cell_fence_signalled(const struct cell_context *cell, + const struct cell_fence *fence); + + +extern void +cell_fence_finish(const struct cell_context *cell, + const struct cell_fence *fence); + + + +extern void +cell_free_fenced_buffers(struct cell_context *cell, + struct cell_buffer_list *list); + + +extern void +cell_add_fenced_textures(struct cell_context *cell); + + +#endif /* CELL_FENCE_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_flush.c b/src/gallium/drivers/cell/ppu/cell_flush.c index 6596b72010..8275c9dc9c 100644 --- a/src/gallium/drivers/cell/ppu/cell_flush.c +++ b/src/gallium/drivers/cell/ppu/cell_flush.c @@ -49,7 +49,7 @@ cell_flush(struct pipe_context *pipe, unsigned flags, flags |= CELL_FLUSH_WAIT; } - if (flags & PIPE_FLUSH_SWAPBUFFERS) + if (flags & (PIPE_FLUSH_SWAPBUFFERS | PIPE_FLUSH_RENDER_CACHE)) flags |= CELL_FLUSH_WAIT; draw_flush( cell->draw ); @@ -72,8 +72,9 @@ cell_flush_int(struct cell_context *cell, unsigned flags) flushing = TRUE; if (flags & CELL_FLUSH_WAIT) { - uint64_t *cmd = (uint64_t *) cell_batch_alloc(cell, sizeof(uint64_t)); - *cmd = CELL_CMD_FINISH; + STATIC_ASSERT(sizeof(opcode_t) % 16 == 0); + opcode_t *cmd = (opcode_t*) cell_batch_alloc16(cell, sizeof(opcode_t)); + *cmd[0] = CELL_CMD_FINISH; } cell_batch_flush(cell); @@ -101,11 +102,11 @@ void cell_flush_buffer_range(struct cell_context *cell, void *ptr, unsigned size) { - uint64_t batch[1 + (ROUNDUP8(sizeof(struct cell_buffer_range)) / 8)]; - struct cell_buffer_range *br = (struct cell_buffer_range *) & batch[1]; - + STATIC_ASSERT((sizeof(opcode_t) + sizeof(struct cell_buffer_range)) % 16 == 0); + uint32_t *batch = (uint32_t*)cell_batch_alloc16(cell, + sizeof(opcode_t) + sizeof(struct cell_buffer_range)); + struct cell_buffer_range *br = (struct cell_buffer_range *) &batch[4]; batch[0] = CELL_CMD_FLUSH_BUFFER_RANGE; br->base = (uintptr_t) ptr; br->size = size; - cell_batch_append(cell, batch, sizeof(batch)); } diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 6ffe94eb14..5a889a6119 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -2,6 +2,7 @@ * * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. + * Copyright 2009 VMware, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -37,7 +38,7 @@ * \author Brian Paul */ - +#include <math.h> #include "pipe/p_defines.h" #include "pipe/p_state.h" #include "pipe/p_shader_tokens.h" @@ -51,25 +52,55 @@ #include "cell_gen_fp.h" -/** Set to 1 to enable debug/disassembly printfs */ -#define DISASSEM 01 +#define MAX_TEMPS 16 +#define MAX_IMMED 8 +#define CHAN_X 0 +#define CHAN_Y 1 +#define CHAN_Z 2 +#define CHAN_W 3 /** * Context needed during code generation. */ struct codegen { + struct cell_context *cell; int inputs_reg; /**< 1st function parameter */ int outputs_reg; /**< 2nd function parameter */ int constants_reg; /**< 3rd function parameter */ - int temp_regs[8][4]; /**< maps TGSI temps to SPE registers */ + int temp_regs[MAX_TEMPS][4]; /**< maps TGSI temps to SPE registers */ + int imm_regs[MAX_IMMED][4]; /**< maps TGSI immediates to SPE registers */ + + int num_imm; /**< number of immediates */ int one_reg; /**< register containing {1.0, 1.0, 1.0, 1.0} */ + int addr_reg; /**< address register, integer values */ + /** Per-instruction temps / intermediate temps */ int num_itemps; - int itemps[3]; + int itemps[12]; + + /** Current IF/ELSE/ENDIF nesting level */ + int if_nesting; + /** Current BGNLOOP/ENDLOOP nesting level */ + int loop_nesting; + /** Location of start of current loop */ + int loop_start; + + /** Index of if/conditional mask register */ + int cond_mask_reg; + /** Index of loop mask register */ + int loop_mask_reg; + + /** Index of master execution mask register */ + int exec_mask_reg; + + /** KIL mask: indicates which fragments have been killed */ + int kill_mask_reg; + + int frame_size; /**< Stack frame size, in words */ struct spe_function *f; boolean error; @@ -112,19 +143,122 @@ get_const_one_reg(struct codegen *gen) { if (gen->one_reg <= 0) { gen->one_reg = spe_allocate_available_register(gen->f); - } - /* one = {1.0, 1.0, 1.0, 1.0} */ - spe_load_float(gen->f, gen->one_reg, 1.0f); -#if DISASSEM - printf("il\tr%d, 1.0f\n", gen->one_reg); -#endif + spe_indent(gen->f, 4); + spe_comment(gen->f, -4, "init constant reg = 1.0:"); + + /* one = {1.0, 1.0, 1.0, 1.0} */ + spe_load_float(gen->f, gen->one_reg, 1.0f); + + spe_indent(gen->f, -4); + } return gen->one_reg; } /** + * Return index of the address register. + * Used for indirect register loads/stores. + */ +static int +get_address_reg(struct codegen *gen) +{ + if (gen->addr_reg <= 0) { + gen->addr_reg = spe_allocate_available_register(gen->f); + + spe_indent(gen->f, 4); + spe_comment(gen->f, -4, "init address reg = 0:"); + + /* init addr = {0, 0, 0, 0} */ + spe_zero(gen->f, gen->addr_reg); + + spe_indent(gen->f, -4); + } + + return gen->addr_reg; +} + + +/** + * Return index of the master execution mask. + * The register is allocated an initialized upon the first call. + * + * The master execution mask controls which pixels in a quad are + * modified, according to surrounding conditionals, loops, etc. + */ +static int +get_exec_mask_reg(struct codegen *gen) +{ + if (gen->exec_mask_reg <= 0) { + gen->exec_mask_reg = spe_allocate_available_register(gen->f); + + /* XXX this may not be needed */ + spe_comment(gen->f, 0*-4, "initialize master execution mask = ~0"); + spe_load_int(gen->f, gen->exec_mask_reg, ~0); + } + + return gen->exec_mask_reg; +} + + +/** Return index of the conditional (if/else) execution mask register */ +static int +get_cond_mask_reg(struct codegen *gen) +{ + if (gen->cond_mask_reg <= 0) { + gen->cond_mask_reg = spe_allocate_available_register(gen->f); + } + + return gen->cond_mask_reg; +} + + +/** Return index of the loop execution mask register */ +static int +get_loop_mask_reg(struct codegen *gen) +{ + if (gen->loop_mask_reg <= 0) { + gen->loop_mask_reg = spe_allocate_available_register(gen->f); + } + + return gen->loop_mask_reg; +} + + + +static boolean +is_register_src(struct codegen *gen, int channel, + const struct tgsi_full_src_register *src) +{ + int swizzle = tgsi_util_get_full_src_register_extswizzle(src, channel); + int sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel); + + if (swizzle > TGSI_SWIZZLE_W || sign_op != TGSI_UTIL_SIGN_KEEP) { + return FALSE; + } + if (src->SrcRegister.File == TGSI_FILE_TEMPORARY || + src->SrcRegister.File == TGSI_FILE_IMMEDIATE) { + return TRUE; + } + return FALSE; +} + + +static boolean +is_memory_dst(struct codegen *gen, int channel, + const struct tgsi_full_dst_register *dst) +{ + if (dst->DstRegister.File == TGSI_FILE_OUTPUT) { + return TRUE; + } + else { + return FALSE; + } +} + + +/** * Return the index of the SPU temporary containing the named TGSI * source register. If the TGSI register is a TGSI_FILE_TEMPORARY we * just return the corresponding SPE register. If the TGIS register @@ -136,35 +270,99 @@ get_src_reg(struct codegen *gen, int channel, const struct tgsi_full_src_register *src) { - int reg; + int reg = -1; + int swizzle = tgsi_util_get_full_src_register_extswizzle(src, channel); + boolean reg_is_itemp = FALSE; + uint sign_op; + + assert(swizzle >= TGSI_SWIZZLE_X); + assert(swizzle <= TGSI_EXTSWIZZLE_ONE); + + if (swizzle == TGSI_EXTSWIZZLE_ONE) { + /* Load const one float and early out */ + reg = get_const_one_reg(gen); + } + else if (swizzle == TGSI_EXTSWIZZLE_ZERO) { + /* Load const zero float and early out */ + reg = get_itemp(gen); + spe_xor(gen->f, reg, reg, reg); + } + else { + int index = src->SrcRegister.Index; + + assert(swizzle < 4); + + if (src->SrcRegister.Indirect) { + /* XXX unfinished */ + } + + switch (src->SrcRegister.File) { + case TGSI_FILE_TEMPORARY: + reg = gen->temp_regs[index][swizzle]; + break; + case TGSI_FILE_INPUT: + { + /* offset is measured in quadwords, not bytes */ + int offset = index * 4 + swizzle; + reg = get_itemp(gen); + reg_is_itemp = TRUE; + /* Load: reg = memory[(machine_reg) + offset] */ + spe_lqd(gen->f, reg, gen->inputs_reg, offset * 16); + } + break; + case TGSI_FILE_IMMEDIATE: + reg = gen->imm_regs[index][swizzle]; + break; + case TGSI_FILE_CONSTANT: + { + /* offset is measured in quadwords, not bytes */ + int offset = index * 4 + swizzle; + reg = get_itemp(gen); + reg_is_itemp = TRUE; + /* Load: reg = memory[(machine_reg) + offset] */ + spe_lqd(gen->f, reg, gen->constants_reg, offset * 16); + } + break; + default: + assert(0); + } + } - /* XXX need to examine src swizzle info here. - * That will involve changing the channel var... + /* + * Handle absolute value, negate or set-negative of src register. */ + sign_op = tgsi_util_get_full_src_register_sign_mode(src, channel); + if (sign_op != TGSI_UTIL_SIGN_KEEP) { + /* + * All sign ops are done by manipulating bit 31, the IEEE float sign bit. + */ + const int bit31mask_reg = get_itemp(gen); + int result_reg; + + if (reg_is_itemp) { + /* re-use 'reg' for the result */ + result_reg = reg; + } + else { + /* alloc a new reg for the result */ + result_reg = get_itemp(gen); + } + /* mask with bit 31 set, the rest cleared */ + spe_load_uint(gen->f, bit31mask_reg, (1 << 31)); - switch (src->SrcRegister.File) { - case TGSI_FILE_TEMPORARY: - reg = gen->temp_regs[src->SrcRegister.Index][channel]; - break; - case TGSI_FILE_INPUT: - { - /* offset is measured in quadwords, not bytes */ - int offset = src->SrcRegister.Index * 4 + channel; - reg = get_itemp(gen); - /* Load: reg = memory[(machine_reg) + offset] */ - spe_lqd(gen->f, reg, gen->inputs_reg, offset); -#if DISASSEM - printf("lqd\tr%d, r%d + %d\n", reg, gen->inputs_reg, offset); -#endif + if (sign_op == TGSI_UTIL_SIGN_CLEAR) { + spe_andc(gen->f, result_reg, reg, bit31mask_reg); } - break; - case TGSI_FILE_IMMEDIATE: - /* xxx fall-through for now / fix */ - case TGSI_FILE_CONSTANT: - /* xxx fall-through for now / fix */ - default: - assert(0); + else if (sign_op == TGSI_UTIL_SIGN_SET) { + spe_and(gen->f, result_reg, reg, bit31mask_reg); + } + else { + assert(sign_op == TGSI_UTIL_SIGN_TOGGLE); + spe_xor(gen->f, result_reg, reg, bit31mask_reg); + } + + reg = result_reg; } return reg; @@ -183,11 +381,14 @@ get_dst_reg(struct codegen *gen, int channel, const struct tgsi_full_dst_register *dest) { - int reg; + int reg = -1; switch (dest->DstRegister.File) { case TGSI_FILE_TEMPORARY: - reg = gen->temp_regs[dest->DstRegister.Index][channel]; + if (gen->if_nesting > 0 || gen->loop_nesting > 0) + reg = get_itemp(gen); + else + reg = gen->temp_regs[dest->DstRegister.Index][channel]; break; case TGSI_FILE_OUTPUT: reg = get_itemp(gen); @@ -211,19 +412,59 @@ store_dest_reg(struct codegen *gen, int value_reg, int channel, const struct tgsi_full_dst_register *dest) { + /* + * XXX need to implement dst reg clamping/saturation + */ +#if 0 + switch (inst->Instruction.Saturate) { + case TGSI_SAT_NONE: + break; + case TGSI_SAT_ZERO_ONE: + break; + case TGSI_SAT_MINUS_PLUS_ONE: + break; + default: + assert( 0 ); + } +#endif + switch (dest->DstRegister.File) { case TGSI_FILE_TEMPORARY: - /* no-op */ + if (gen->if_nesting > 0 || gen->loop_nesting > 0) { + int d_reg = gen->temp_regs[dest->DstRegister.Index][channel]; + int exec_reg = get_exec_mask_reg(gen); + /* Mix d with new value according to exec mask: + * d[i] = mask_reg[i] ? value_reg : d_reg + */ + spe_selb(gen->f, d_reg, d_reg, value_reg, exec_reg); + } + else { + /* we're not inside a condition or loop: do nothing special */ + + } break; case TGSI_FILE_OUTPUT: { /* offset is measured in quadwords, not bytes */ int offset = dest->DstRegister.Index * 4 + channel; - /* Store: memory[(machine_reg) + offset] = reg */ - spe_stqd(gen->f, value_reg, gen->outputs_reg, offset); -#if DISASSEM - printf("stqd\tr%d, r%d + %d\n", value_reg, gen->outputs_reg, offset); -#endif + if (gen->if_nesting > 0 || gen->loop_nesting > 0) { + int exec_reg = get_exec_mask_reg(gen); + int curval_reg = get_itemp(gen); + /* First read the current value from memory: + * Load: curval = memory[(machine_reg) + offset] + */ + spe_lqd(gen->f, curval_reg, gen->outputs_reg, offset * 16); + /* Mix curval with newvalue according to exec mask: + * d[i] = mask_reg[i] ? value_reg : d_reg + */ + spe_selb(gen->f, curval_reg, curval_reg, value_reg, exec_reg); + /* Store: memory[(machine_reg) + offset] = curval */ + spe_stqd(gen->f, curval_reg, gen->outputs_reg, offset * 16); + } + else { + /* Store: memory[(machine_reg) + offset] = reg */ + spe_stqd(gen->f, value_reg, gen->outputs_reg, offset * 16); + } } break; default: @@ -232,125 +473,1265 @@ store_dest_reg(struct codegen *gen, } + +static void +emit_prologue(struct codegen *gen) +{ + gen->frame_size = 1024; /* XXX temporary, should be dynamic */ + + spe_comment(gen->f, 0, "Function prologue:"); + + /* save $lr on stack # stqd $lr,16($sp) */ + spe_stqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16); + + if (gen->frame_size >= 512) { + /* offset is too large for ai instruction */ + int offset_reg = spe_allocate_available_register(gen->f); + int sp_reg = spe_allocate_available_register(gen->f); + /* offset = -framesize */ + spe_load_int(gen->f, offset_reg, -gen->frame_size); + /* sp = $sp */ + spe_move(gen->f, sp_reg, SPE_REG_SP); + /* $sp = $sp + offset_reg */ + spe_a(gen->f, SPE_REG_SP, SPE_REG_SP, offset_reg); + /* save $sp in stack frame */ + spe_stqd(gen->f, sp_reg, SPE_REG_SP, 0); + /* clean up */ + spe_release_register(gen->f, offset_reg); + spe_release_register(gen->f, sp_reg); + } + else { + /* save stack pointer # stqd $sp,-frameSize($sp) */ + spe_stqd(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size); + + /* adjust stack pointer # ai $sp,$sp,-frameSize */ + spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size); + } +} + + +static void +emit_epilogue(struct codegen *gen) +{ + const int return_reg = 3; + + spe_comment(gen->f, 0, "Function epilogue:"); + + spe_comment(gen->f, 0, "return the killed mask"); + if (gen->kill_mask_reg > 0) { + /* shader called KIL, return the "alive" mask */ + spe_move(gen->f, return_reg, gen->kill_mask_reg); + } + else { + /* return {0,0,0,0} */ + spe_load_uint(gen->f, return_reg, 0); + } + + spe_comment(gen->f, 0, "restore stack and return"); + if (gen->frame_size >= 512) { + /* offset is too large for ai instruction */ + int offset_reg = spe_allocate_available_register(gen->f); + /* offset = framesize */ + spe_load_int(gen->f, offset_reg, gen->frame_size); + /* $sp = $sp + offset */ + spe_a(gen->f, SPE_REG_SP, SPE_REG_SP, offset_reg); + /* clean up */ + spe_release_register(gen->f, offset_reg); + } + else { + /* restore stack pointer # ai $sp,$sp,frameSize */ + spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, gen->frame_size); + } + + /* restore $lr # lqd $lr,16($sp) */ + spe_lqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16); + + /* return from function call */ + spe_bi(gen->f, SPE_REG_RA, 0, 0); +} + + +#define FOR_EACH_ENABLED_CHANNEL(inst, ch) \ + for (ch = 0; ch < 4; ch++) \ + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) + + +static boolean +emit_ARL(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch = 0, src_reg, addr_reg; + + src_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + addr_reg = get_address_reg(gen); + + /* convert float to int */ + spe_cflts(gen->f, addr_reg, src_reg, 0); + + free_itemps(gen); + + return TRUE; +} + + static boolean emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst) { + int ch, src_reg[4], dst_reg[4]; + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + src_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + dst_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + } + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + if (is_register_src(gen, ch, &inst->FullSrcRegisters[0]) && + is_memory_dst(gen, ch, &inst->FullDstRegisters[0])) { + /* special-case: register to memory store */ + store_dest_reg(gen, src_reg[ch], ch, &inst->FullDstRegisters[0]); + } + else { + spe_move(gen->f, dst_reg[ch], src_reg[ch]); + store_dest_reg(gen, dst_reg[ch], ch, &inst->FullDstRegisters[0]); + } + } + + free_itemps(gen); + + return TRUE; +} + +/** + * Emit binary operation + */ +static boolean +emit_binop(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch, s1_reg[4], s2_reg[4], d_reg[4]; + + /* Loop over Red/Green/Blue/Alpha channels, fetch src operands */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + } + + /* Loop over Red/Green/Blue/Alpha channels, do the op, store results */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + /* Emit actual SPE instruction: d = s1 + s2 */ + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ADD: + spe_fa(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); + break; + case TGSI_OPCODE_SUB: + spe_fs(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); + break; + case TGSI_OPCODE_MUL: + spe_fm(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); + break; + default: + ; + } + } + + /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + } + + /* Free any intermediate temps we allocated */ + free_itemps(gen); + + return TRUE; +} + + +/** + * Emit multiply add. See emit_ADD for comments. + */ +static boolean +emit_MAD(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4]; + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_fma(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch], s3_reg[ch]); + } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + } + free_itemps(gen); + return TRUE; +} + + +/** + * Emit linear interpolate. See emit_ADD for comments. + */ +static boolean +emit_LERP(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch, s1_reg[4], s2_reg[4], s3_reg[4], d_reg[4], tmp_reg[4]; + + /* setup/get src/dst/temp regs */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + s3_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + tmp_reg[ch] = get_itemp(gen); + } + + /* d = s3 + s1(s2 - s3) */ + /* do all subtracts, then all fma, then all stores to better pipeline */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_fs(gen->f, tmp_reg[ch], s2_reg[ch], s3_reg[ch]); + } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_fma(gen->f, d_reg[ch], tmp_reg[ch], s1_reg[ch], s3_reg[ch]); + } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + } + free_itemps(gen); + return TRUE; +} + + + +/** + * Emit reciprocal or recip sqrt. + */ +static boolean +emit_RCP_RSQ(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch, s1_reg[4], d_reg[4], tmp_reg[4]; + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + tmp_reg[ch] = get_itemp(gen); + } + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + if (inst->Instruction.Opcode == TGSI_OPCODE_RCP) { + /* tmp = 1/s1 */ + spe_frest(gen->f, tmp_reg[ch], s1_reg[ch]); + } + else { + /* tmp = 1/sqrt(s1) */ + spe_frsqest(gen->f, tmp_reg[ch], s1_reg[ch]); + } + } + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + /* d = float_interp(s1, tmp) */ + spe_fi(gen->f, d_reg[ch], s1_reg[ch], tmp_reg[ch]); + } + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + } + + free_itemps(gen); + return TRUE; +} + + +/** + * Emit absolute value. See emit_ADD for comments. + */ +static boolean +emit_ABS(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch, s1_reg[4], d_reg[4]; + const int bit31mask_reg = get_itemp(gen); + + /* mask with bit 31 set, the rest cleared */ + spe_load_uint(gen->f, bit31mask_reg, (1 << 31)); + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + } + + /* d = sign bit cleared in s1 */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_andc(gen->f, d_reg[ch], s1_reg[ch], bit31mask_reg); + } + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + } + + free_itemps(gen); + return TRUE; +} + +/** + * Emit 3 component dot product. See emit_ADD for comments. + */ +static boolean +emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ int ch; - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - int src_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - int dst_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - /* XXX we don't always need to actually emit a mov instruction here */ - spe_move(gen->f, dst_reg, src_reg); -#if DISASSEM - printf("mov\tr%d, r%d\n", dst_reg, src_reg); -#endif - store_dest_reg(gen, dst_reg, ch, &inst->FullDstRegisters[0]); - free_itemps(gen); + int s1x_reg, s1y_reg, s1z_reg; + int s2x_reg, s2y_reg, s2z_reg; + int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen); + + s1x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); + s2x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); + s1y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); + s2y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); + s1z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); + s2z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); + + /* t0 = x0 * x1 */ + spe_fm(gen->f, t0_reg, s1x_reg, s2x_reg); + + /* t1 = y0 * y1 */ + spe_fm(gen->f, t1_reg, s1y_reg, s2y_reg); + + /* t0 = z0 * z1 + t0 */ + spe_fma(gen->f, t0_reg, s1z_reg, s2z_reg, t0_reg); + + /* t0 = t0 + t1 */ + spe_fa(gen->f, t0_reg, t0_reg, t1_reg); + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + spe_move(gen->f, d_reg, t0_reg); + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + } + + free_itemps(gen); + return TRUE; +} + +/** + * Emit 4 component dot product. See emit_ADD for comments. + */ +static boolean +emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + int s0x_reg, s0y_reg, s0z_reg, s0w_reg; + int s1x_reg, s1y_reg, s1z_reg, s1w_reg; + int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen); + + s0x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); + s1x_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); + s0y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); + s1y_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); + s0z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); + s1z_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); + s0w_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[0]); + s1w_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]); + + /* t0 = x0 * x1 */ + spe_fm(gen->f, t0_reg, s0x_reg, s1x_reg); + + /* t1 = y0 * y1 */ + spe_fm(gen->f, t1_reg, s0y_reg, s1y_reg); + + /* t0 = z0 * z1 + t0 */ + spe_fma(gen->f, t0_reg, s0z_reg, s1z_reg, t0_reg); + + /* t1 = w0 * w1 + t1 */ + spe_fma(gen->f, t1_reg, s0w_reg, s1w_reg, t1_reg); + + /* t0 = t0 + t1 */ + spe_fa(gen->f, t0_reg, t0_reg, t1_reg); + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + spe_move(gen->f, d_reg, t0_reg); + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + } + + free_itemps(gen); + return TRUE; +} + +/** + * Emit homogeneous dot product. See emit_ADD for comments. + */ +static boolean +emit_DPH(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + /* XXX rewrite this function to look more like DP3/DP4 */ + int ch; + int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); + int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); + int tmp_reg = get_itemp(gen); + + /* t = x0 * x1 */ + spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); + + s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); + s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); + /* t = y0 * y1 + t */ + spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); + + s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); + s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); + /* t = z0 * z1 + t */ + spe_fma(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); + + s2_reg = get_src_reg(gen, CHAN_W, &inst->FullSrcRegisters[1]); + /* t = w1 + t */ + spe_fa(gen->f, tmp_reg, s2_reg, tmp_reg); + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + spe_move(gen->f, d_reg, tmp_reg); + store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]); + } + + free_itemps(gen); + return TRUE; +} + +/** + * Emit 3-component vector normalize. + */ +static boolean +emit_NRM3(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch; + int src_reg[3]; + int t0_reg = get_itemp(gen), t1_reg = get_itemp(gen); + + src_reg[0] = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); + src_reg[1] = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); + src_reg[2] = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); + + /* t0 = x * x */ + spe_fm(gen->f, t0_reg, src_reg[0], src_reg[0]); + + /* t1 = y * y */ + spe_fm(gen->f, t1_reg, src_reg[1], src_reg[1]); + + /* t0 = z * z + t0 */ + spe_fma(gen->f, t0_reg, src_reg[2], src_reg[2], t0_reg); + + /* t0 = t0 + t1 */ + spe_fa(gen->f, t0_reg, t0_reg, t1_reg); + + /* t1 = 1.0 / sqrt(t0) */ + spe_frsqest(gen->f, t1_reg, t0_reg); + spe_fi(gen->f, t1_reg, t0_reg, t1_reg); + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + /* dst = src[ch] * t1 */ + spe_fm(gen->f, d_reg, src_reg[ch], t1_reg); + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + } + + free_itemps(gen); + return TRUE; +} + + +/** + * Emit cross product. See emit_ADD for comments. + */ +static boolean +emit_XPD(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); + int s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); + int tmp_reg = get_itemp(gen); + + /* t = z0 * y1 */ + spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); + + s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); + s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); + /* t = y0 * z1 - t */ + spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); + + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_X)) { + store_dest_reg(gen, tmp_reg, CHAN_X, &inst->FullDstRegisters[0]); + } + + s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); + s2_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[1]); + /* t = x0 * z1 */ + spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); + + s1_reg = get_src_reg(gen, CHAN_Z, &inst->FullSrcRegisters[0]); + s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); + /* t = z0 * x1 - t */ + spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); + + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_Y)) { + store_dest_reg(gen, tmp_reg, CHAN_Y, &inst->FullDstRegisters[0]); + } + + s1_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[0]); + s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); + /* t = y0 * x1 */ + spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); + + s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); + s2_reg = get_src_reg(gen, CHAN_Y, &inst->FullSrcRegisters[1]); + /* t = x0 * y1 - t */ + spe_fms(gen->f, tmp_reg, s1_reg, s2_reg, tmp_reg); + + if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << CHAN_Z)) { + store_dest_reg(gen, tmp_reg, CHAN_Z, &inst->FullDstRegisters[0]); + } + + free_itemps(gen); + return TRUE; +} + + +/** + * Emit inequality instruction. + * Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as + * the result but OpenGL/TGSI needs 0.0 and 1.0 results. + * We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND. + */ +static boolean +emit_inequality(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch, s1_reg[4], s2_reg[4], d_reg[4], one_reg; + bool complement = FALSE; + + one_reg = get_const_one_reg(gen); + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + s2_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + } + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_SGT: + spe_fcgt(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); + break; + case TGSI_OPCODE_SLT: + spe_fcgt(gen->f, d_reg[ch], s2_reg[ch], s1_reg[ch]); + break; + case TGSI_OPCODE_SGE: + spe_fcgt(gen->f, d_reg[ch], s2_reg[ch], s1_reg[ch]); + complement = TRUE; + break; + case TGSI_OPCODE_SLE: + spe_fcgt(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); + complement = TRUE; + break; + case TGSI_OPCODE_SEQ: + spe_fceq(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); + break; + case TGSI_OPCODE_SNE: + spe_fceq(gen->f, d_reg[ch], s1_reg[ch], s2_reg[ch]); + complement = TRUE; + break; + default: + assert(0); } } - return true; + + /* convert d from 0x0/0xffffffff to 0.0/1.0 */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + /* d = d & one_reg */ + if (complement) + spe_andc(gen->f, d_reg[ch], one_reg, d_reg[ch]); + else + spe_and(gen->f, d_reg[ch], one_reg, d_reg[ch]); + } + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + } + + free_itemps(gen); + return TRUE; } /** - * Emit addition instructions. Recall that a single TGSI_OPCODE_ADD - * becomes (up to) four SPU "fa" instructions because we're doing SOA - * processing. + * Emit compare. */ static boolean -emit_ADD(struct codegen *gen, const struct tgsi_full_instruction *inst) +emit_CMP(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch; - /* Loop over Red/Green/Blue/Alpha channels */ - for (ch = 0; ch < 4; ch++) { - /* If the dest R, G, B or A writemask is enabled... */ - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - /* get indexes of the two src, one dest SPE registers */ - int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - - /* Emit actual SPE instruction: d = s1 + s2 */ - spe_fa(gen->f, d_reg, s1_reg, s2_reg); -#if DISASSEM - printf("fa\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg); + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + int s3_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[2]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + int zero_reg = get_itemp(gen); + + spe_zero(gen->f, zero_reg); + + /* d = (s1 < 0) ? s2 : s3 */ + spe_fcgt(gen->f, d_reg, zero_reg, s1_reg); + spe_selb(gen->f, d_reg, s3_reg, s2_reg, d_reg); + + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + + return TRUE; +} + +/** + * Emit trunc. + * Convert float to signed int + * Convert signed int to float + */ +static boolean +emit_TRUNC(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch, s1_reg[4], d_reg[4]; + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + } + + /* Convert float to int */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_cflts(gen->f, d_reg[ch], s1_reg[ch], 0); + } + + /* Convert int to float */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_csflt(gen->f, d_reg[ch], d_reg[ch], 0); + } + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + } + + free_itemps(gen); + return TRUE; +} + + +/** + * Emit floor. + * If negative int subtract one + * Convert float to signed int + * Convert signed int to float + */ +static boolean +emit_FLR(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch, s1_reg[4], d_reg[4], tmp_reg[4], zero_reg, one_reg; + + zero_reg = get_itemp(gen); + spe_zero(gen->f, zero_reg); + one_reg = get_const_one_reg(gen); + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + tmp_reg[ch] = get_itemp(gen); + } + + /* If negative, subtract 1.0 */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_fcgt(gen->f, tmp_reg[ch], zero_reg, s1_reg[ch]); + } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_selb(gen->f, tmp_reg[ch], zero_reg, one_reg, tmp_reg[ch]); + } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_fs(gen->f, tmp_reg[ch], s1_reg[ch], tmp_reg[ch]); + } + + /* Convert float to int */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_cflts(gen->f, tmp_reg[ch], tmp_reg[ch], 0); + } + + /* Convert int to float */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_csflt(gen->f, d_reg[ch], tmp_reg[ch], 0); + } + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + } + + free_itemps(gen); + return TRUE; +} + + +/** + * Compute frac = Input - FLR(Input) + */ +static boolean +emit_FRC(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch, s1_reg[4], d_reg[4], tmp_reg[4], zero_reg, one_reg; + + zero_reg = get_itemp(gen); + spe_zero(gen->f, zero_reg); + one_reg = get_const_one_reg(gen); + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + tmp_reg[ch] = get_itemp(gen); + } + + /* If negative, subtract 1.0 */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_fcgt(gen->f, tmp_reg[ch], zero_reg, s1_reg[ch]); + } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_selb(gen->f, tmp_reg[ch], zero_reg, one_reg, tmp_reg[ch]); + } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_fs(gen->f, tmp_reg[ch], s1_reg[ch], tmp_reg[ch]); + } + + /* Convert float to int */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_cflts(gen->f, tmp_reg[ch], tmp_reg[ch], 0); + } + + /* Convert int to float */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_csflt(gen->f, tmp_reg[ch], tmp_reg[ch], 0); + } + + /* d = s1 - FLR(s1) */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_fs(gen->f, d_reg[ch], s1_reg[ch], tmp_reg[ch]); + } + + /* store result */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + } + + free_itemps(gen); + return TRUE; +} + + +#if 0 +static void +print_functions(struct cell_context *cell) +{ + struct cell_spu_function_info *funcs = &cell->spu_functions; + uint i; + for (i = 0; i < funcs->num; i++) { + printf("SPU func %u: %s at %u\n", + i, funcs->names[i], funcs->addrs[i]); + } +} #endif - /* Store the result (a no-op for TGSI_FILE_TEMPORARY dests) */ - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); - /* Free any intermediate temps we allocated */ - free_itemps(gen); + +static uint +lookup_function(struct cell_context *cell, const char *funcname) +{ + const struct cell_spu_function_info *funcs = &cell->spu_functions; + uint i, addr = 0; + for (i = 0; i < funcs->num; i++) { + if (strcmp(funcs->names[i], funcname) == 0) { + addr = funcs->addrs[i]; } } - return true; + assert(addr && "spu function not found"); + return addr / 4; /* discard 2 least significant bits */ } /** - * Emit multiply. See emit_ADD for comments. + * Emit code to call a SPU function. + * Used to implement instructions like SIN/COS/POW/TEX/etc. + * If scalar, only the X components of the src regs are used, and the + * result is replicated across the dest register's XYZW components. */ static boolean -emit_MUL(struct codegen *gen, const struct tgsi_full_instruction *inst) +emit_function_call(struct codegen *gen, + const struct tgsi_full_instruction *inst, + char *funcname, uint num_args, boolean scalar) +{ + const uint addr = lookup_function(gen->cell, funcname); + char comment[100]; + int s_regs[3]; + int func_called = FALSE; + uint a, ch; + int retval_reg = -1; + + assert(num_args <= 3); + + snprintf(comment, sizeof(comment), "CALL %s:", funcname); + spe_comment(gen->f, -4, comment); + + if (scalar) { + for (a = 0; a < num_args; a++) { + s_regs[a] = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[a]); + } + /* we'll call the function, put the return value in this register, + * then replicate it across all write-enabled components in d_reg. + */ + retval_reg = spe_allocate_available_register(gen->f); + } + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + int d_reg; + ubyte usedRegs[SPE_NUM_REGS]; + uint i, numUsed; + + if (!scalar) { + for (a = 0; a < num_args; a++) { + s_regs[a] = get_src_reg(gen, ch, &inst->FullSrcRegisters[a]); + } + } + + d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + + if (!scalar || !func_called) { + /* for a scalar function, we'll really only call the function once */ + + numUsed = spe_get_registers_used(gen->f, usedRegs); + assert(numUsed < gen->frame_size / 16 - 2); + + /* save registers to stack */ + for (i = 0; i < numUsed; i++) { + uint reg = usedRegs[i]; + int offset = 2 + i; + spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset); + } + + /* setup function arguments */ + for (a = 0; a < num_args; a++) { + spe_move(gen->f, 3 + a, s_regs[a]); + } + + /* branch to function, save return addr */ + spe_brasl(gen->f, SPE_REG_RA, addr); + + /* save function's return value */ + if (scalar) + spe_move(gen->f, retval_reg, 3); + else + spe_move(gen->f, d_reg, 3); + + /* restore registers from stack */ + for (i = 0; i < numUsed; i++) { + uint reg = usedRegs[i]; + if (reg != d_reg && reg != retval_reg) { + int offset = 2 + i; + spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset); + } + } + + func_called = TRUE; + } + + if (scalar) { + spe_move(gen->f, d_reg, retval_reg); + } + + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); + free_itemps(gen); + } + + if (scalar) { + spe_release_register(gen->f, retval_reg); + } + + return TRUE; +} + + +static boolean +emit_TEX(struct codegen *gen, const struct tgsi_full_instruction *inst) { + const uint target = inst->InstructionExtTexture.Texture; + const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + uint addr; int ch; + int coord_regs[4], d_regs[4]; + + switch (target) { + case TGSI_TEXTURE_1D: + case TGSI_TEXTURE_2D: + addr = lookup_function(gen->cell, "spu_tex_2d"); + break; + case TGSI_TEXTURE_3D: + addr = lookup_function(gen->cell, "spu_tex_3d"); + break; + case TGSI_TEXTURE_CUBE: + addr = lookup_function(gen->cell, "spu_tex_cube"); + break; + default: + ASSERT(0 && "unsupported texture target"); + return FALSE; + } + + assert(inst->FullSrcRegisters[1].SrcRegister.File == TGSI_FILE_SAMPLER); + + spe_comment(gen->f, -4, "CALL tex:"); + + /* get src/dst reg info */ for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - /* d = s1 * s2 */ - spe_fm(gen->f, d_reg, s1_reg, s2_reg); -#if DISASSEM - printf("fm\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg); -#endif - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); - free_itemps(gen); + coord_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + d_regs[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + } + + { + ubyte usedRegs[SPE_NUM_REGS]; + uint i, numUsed; + + numUsed = spe_get_registers_used(gen->f, usedRegs); + assert(numUsed < gen->frame_size / 16 - 2); + + /* save registers to stack */ + for (i = 0; i < numUsed; i++) { + uint reg = usedRegs[i]; + int offset = 2 + i; + spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset); + } + + /* setup function arguments (XXX depends on target) */ + for (i = 0; i < 4; i++) { + spe_move(gen->f, 3 + i, coord_regs[i]); } + spe_load_uint(gen->f, 7, unit); /* sampler unit */ + + /* branch to function, save return addr */ + spe_brasl(gen->f, SPE_REG_RA, addr); + + /* save function's return values (four pixel's colors) */ + for (i = 0; i < 4; i++) { + spe_move(gen->f, d_regs[i], 3 + i); + } + + /* restore registers from stack */ + for (i = 0; i < numUsed; i++) { + uint reg = usedRegs[i]; + if (reg != d_regs[0] && + reg != d_regs[1] && + reg != d_regs[2] && + reg != d_regs[3]) { + int offset = 2 + i; + spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset); + } + } + } + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + store_dest_reg(gen, d_regs[ch], ch, &inst->FullDstRegisters[0]); + free_itemps(gen); } - return true; + + return TRUE; } /** - * Emit set-if-greater-than. - * Note that the SPE fcgt instruction produces 0x0 and 0xffffffff as - * the result but OpenGL/TGSI needs 0.0 and 1.0 results. - * We can easily convert 0x0/0xffffffff to 0.0/1.0 with a bitwise AND. + * KILL if any of src reg values are less than zero. */ static boolean -emit_SGT(struct codegen *gen, const struct tgsi_full_instruction *inst) +emit_KIL(struct codegen *gen, const struct tgsi_full_instruction *inst) { int ch; + int s_regs[4], kil_reg = -1, cmp_reg, zero_reg; - for (ch = 0; ch < 4; ch++) { - if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); - int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - - /* d = (s1 > s2) */ - spe_fcgt(gen->f, d_reg, s1_reg, s2_reg); -#if DISASSEM - printf("fcgt\tr%d, r%d, r%d\n", d_reg, s1_reg, s2_reg); -#endif + spe_comment(gen->f, -4, "CALL kil:"); - /* convert d from 0x0/0xffffffff to 0.0/1.0 */ - /* d = d & one_reg */ - spe_and(gen->f, d_reg, d_reg, get_const_one_reg(gen)); -#if DISASSEM - printf("and\tr%d, r%d, r%d\n", d_reg, d_reg, get_const_one_reg(gen)); -#endif + /* zero = {0,0,0,0} */ + zero_reg = get_itemp(gen); + spe_zero(gen->f, zero_reg); + + cmp_reg = get_itemp(gen); + + /* get src regs */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + s_regs[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + } + + /* test if any src regs are < 0 */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + if (kil_reg >= 0) { + /* cmp = 0 > src ? : ~0 : 0 */ + spe_fcgt(gen->f, cmp_reg, zero_reg, s_regs[ch]); + /* kil = kil | cmp */ + spe_or(gen->f, kil_reg, kil_reg, cmp_reg); + } + else { + kil_reg = get_itemp(gen); + /* kil = 0 > src ? : ~0 : 0 */ + spe_fcgt(gen->f, kil_reg, zero_reg, s_regs[ch]); + } + } + + if (gen->if_nesting || gen->loop_nesting) { + /* may have been a conditional kil */ + spe_and(gen->f, kil_reg, kil_reg, gen->exec_mask_reg); + } - store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); - free_itemps(gen); + /* allocate the kill mask reg if needed */ + if (gen->kill_mask_reg <= 0) { + gen->kill_mask_reg = spe_allocate_available_register(gen->f); + spe_move(gen->f, gen->kill_mask_reg, kil_reg); + } + else { + spe_or(gen->f, gen->kill_mask_reg, gen->kill_mask_reg, kil_reg); + } + + free_itemps(gen); + + return TRUE; +} + + + +/** + * Emit min or max. + */ +static boolean +emit_MIN_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int ch, s0_reg[4], s1_reg[4], d_reg[4], tmp_reg[4]; + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + s0_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + s1_reg[ch] = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); + d_reg[ch] = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + tmp_reg[ch] = get_itemp(gen); + } + + /* d = (s0 > s1) ? s0 : s1 */ + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + if (inst->Instruction.Opcode == TGSI_OPCODE_MAX) + spe_fcgt(gen->f, tmp_reg[ch], s0_reg[ch], s1_reg[ch]); + else + spe_fcgt(gen->f, tmp_reg[ch], s1_reg[ch], s0_reg[ch]); + } + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + spe_selb(gen->f, d_reg[ch], s1_reg[ch], s0_reg[ch], tmp_reg[ch]); + } + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + store_dest_reg(gen, d_reg[ch], ch, &inst->FullDstRegisters[0]); + } + + free_itemps(gen); + return TRUE; +} + + +/** + * Emit code to update the execution mask. + * This needs to be done whenever the execution status of a conditional + * or loop is changed. + */ +static void +emit_update_exec_mask(struct codegen *gen) +{ + const int exec_reg = get_exec_mask_reg(gen); + const int cond_reg = gen->cond_mask_reg; + const int loop_reg = gen->loop_mask_reg; + + spe_comment(gen->f, 0, "Update master execution mask"); + + if (gen->if_nesting > 0 && gen->loop_nesting > 0) { + /* exec_mask = cond_mask & loop_mask */ + assert(cond_reg > 0); + assert(loop_reg > 0); + spe_and(gen->f, exec_reg, cond_reg, loop_reg); + } + else if (gen->if_nesting > 0) { + assert(cond_reg > 0); + spe_move(gen->f, exec_reg, cond_reg); + } + else if (gen->loop_nesting > 0) { + assert(loop_reg > 0); + spe_move(gen->f, exec_reg, loop_reg); + } + else { + spe_load_int(gen->f, exec_reg, ~0x0); + } +} + + +static boolean +emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + const int channel = 0; + int cond_reg; + + cond_reg = get_cond_mask_reg(gen); + + /* XXX push cond exec mask */ + + spe_comment(gen->f, 0, "init conditional exec mask = ~0:"); + spe_load_int(gen->f, cond_reg, ~0); + + /* update conditional execution mask with the predicate register */ + int tmp_reg = get_itemp(gen); + int s1_reg = get_src_reg(gen, channel, &inst->FullSrcRegisters[0]); + + /* tmp = (s1_reg == 0) */ + spe_ceqi(gen->f, tmp_reg, s1_reg, 0); + /* tmp = !tmp */ + spe_complement(gen->f, tmp_reg, tmp_reg); + /* cond_mask = cond_mask & tmp */ + spe_and(gen->f, cond_reg, cond_reg, tmp_reg); + + gen->if_nesting++; + + /* update the master execution mask */ + emit_update_exec_mask(gen); + + free_itemps(gen); + + return TRUE; +} + + +static boolean +emit_ELSE(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + const int cond_reg = get_cond_mask_reg(gen); + + spe_comment(gen->f, 0, "cond exec mask = !cond exec mask"); + spe_complement(gen->f, cond_reg, cond_reg); + emit_update_exec_mask(gen); + + return TRUE; +} + + +static boolean +emit_ENDIF(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + /* XXX todo: pop cond exec mask */ + + gen->if_nesting--; + + emit_update_exec_mask(gen); + + return TRUE; +} + + +static boolean +emit_BGNLOOP(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + int exec_reg, loop_reg; + + exec_reg = get_exec_mask_reg(gen); + loop_reg = get_loop_mask_reg(gen); + + /* XXX push loop_exec mask */ + + spe_comment(gen->f, 0*-4, "initialize loop exec mask = ~0"); + spe_load_int(gen->f, loop_reg, ~0x0); + + gen->loop_nesting++; + gen->loop_start = spe_code_size(gen->f); /* in bytes */ + + return TRUE; +} + + +static boolean +emit_ENDLOOP(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + const int loop_reg = get_loop_mask_reg(gen); + const int tmp_reg = get_itemp(gen); + int offset; + + /* tmp_reg = exec[0] | exec[1] | exec[2] | exec[3] */ + spe_orx(gen->f, tmp_reg, loop_reg); + + offset = gen->loop_start - spe_code_size(gen->f); /* in bytes */ + + /* branch back to top of loop if tmp_reg != 0 */ + spe_brnz(gen->f, tmp_reg, offset / 4); + + /* XXX pop loop_exec mask */ + + gen->loop_nesting--; + + emit_update_exec_mask(gen); + + return TRUE; +} + + +static boolean +emit_BRK(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + const int exec_reg = get_exec_mask_reg(gen); + const int loop_reg = get_loop_mask_reg(gen); + + assert(gen->loop_nesting > 0); + + spe_comment(gen->f, 0, "loop exec mask &= ~master exec mask"); + spe_andc(gen->f, loop_reg, loop_reg, exec_reg); + + emit_update_exec_mask(gen); + + return TRUE; +} + + +static boolean +emit_CONT(struct codegen *gen, const struct tgsi_full_instruction *inst) +{ + assert(gen->loop_nesting > 0); + + return TRUE; +} + + +static boolean +emit_DDX_DDY(struct codegen *gen, const struct tgsi_full_instruction *inst, + boolean ddx) +{ + int ch; + + FOR_EACH_ENABLED_CHANNEL(inst, ch) { + int s_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + + int t1_reg = get_itemp(gen); + int t2_reg = get_itemp(gen); + + spe_splat_word(gen->f, t1_reg, s_reg, 0); /* upper-left pixel */ + if (ddx) { + spe_splat_word(gen->f, t2_reg, s_reg, 1); /* upper-right pixel */ + } + else { + spe_splat_word(gen->f, t2_reg, s_reg, 2); /* lower-left pixel */ } + spe_fs(gen->f, d_reg, t2_reg, t1_reg); + + free_itemps(gen); } - return true; + return TRUE; } + + /** * Emit END instruction. * We just return from the shader function at this point. @@ -361,12 +1742,8 @@ emit_SGT(struct codegen *gen, const struct tgsi_full_instruction *inst) static boolean emit_END(struct codegen *gen) { - /* return from function call */ - spe_bi(gen->f, SPE_REG_RA, 0, 0); -#if DISASSEM - printf("bi\trRA\n"); -#endif - return true; + emit_epilogue(gen); + return TRUE; } @@ -378,24 +1755,153 @@ emit_instruction(struct codegen *gen, const struct tgsi_full_instruction *inst) { switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ARL: + return emit_ARL(gen, inst); case TGSI_OPCODE_MOV: + case TGSI_OPCODE_SWZ: return emit_MOV(gen, inst); - case TGSI_OPCODE_MUL: - return emit_MUL(gen, inst); case TGSI_OPCODE_ADD: - return emit_ADD(gen, inst); + case TGSI_OPCODE_SUB: + case TGSI_OPCODE_MUL: + return emit_binop(gen, inst); + case TGSI_OPCODE_MAD: + return emit_MAD(gen, inst); + case TGSI_OPCODE_LERP: + return emit_LERP(gen, inst); + case TGSI_OPCODE_DP3: + return emit_DP3(gen, inst); + case TGSI_OPCODE_DP4: + return emit_DP4(gen, inst); + case TGSI_OPCODE_DPH: + return emit_DPH(gen, inst); + case TGSI_OPCODE_NRM: + return emit_NRM3(gen, inst); + case TGSI_OPCODE_XPD: + return emit_XPD(gen, inst); + case TGSI_OPCODE_RCP: + case TGSI_OPCODE_RSQ: + return emit_RCP_RSQ(gen, inst); + case TGSI_OPCODE_ABS: + return emit_ABS(gen, inst); case TGSI_OPCODE_SGT: - return emit_SGT(gen, inst); + case TGSI_OPCODE_SLT: + case TGSI_OPCODE_SGE: + case TGSI_OPCODE_SLE: + case TGSI_OPCODE_SEQ: + case TGSI_OPCODE_SNE: + return emit_inequality(gen, inst); + case TGSI_OPCODE_CMP: + return emit_CMP(gen, inst); + case TGSI_OPCODE_MIN: + case TGSI_OPCODE_MAX: + return emit_MIN_MAX(gen, inst); + case TGSI_OPCODE_TRUNC: + return emit_TRUNC(gen, inst); + case TGSI_OPCODE_FLR: + return emit_FLR(gen, inst); + case TGSI_OPCODE_FRC: + return emit_FRC(gen, inst); case TGSI_OPCODE_END: return emit_END(gen); + case TGSI_OPCODE_COS: + return emit_function_call(gen, inst, "spu_cos", 1, TRUE); + case TGSI_OPCODE_SIN: + return emit_function_call(gen, inst, "spu_sin", 1, TRUE); + case TGSI_OPCODE_POW: + return emit_function_call(gen, inst, "spu_pow", 2, TRUE); + case TGSI_OPCODE_EXPBASE2: + return emit_function_call(gen, inst, "spu_exp2", 1, TRUE); + case TGSI_OPCODE_LOGBASE2: + return emit_function_call(gen, inst, "spu_log2", 1, TRUE); + case TGSI_OPCODE_TEX: + /* fall-through for now */ + case TGSI_OPCODE_TXD: + /* fall-through for now */ + case TGSI_OPCODE_TXB: + /* fall-through for now */ + case TGSI_OPCODE_TXL: + /* fall-through for now */ + case TGSI_OPCODE_TXP: + return emit_TEX(gen, inst); + case TGSI_OPCODE_KIL: + return emit_KIL(gen, inst); + + case TGSI_OPCODE_IF: + return emit_IF(gen, inst); + case TGSI_OPCODE_ELSE: + return emit_ELSE(gen, inst); + case TGSI_OPCODE_ENDIF: + return emit_ENDIF(gen, inst); + + case TGSI_OPCODE_BGNLOOP2: + return emit_BGNLOOP(gen, inst); + case TGSI_OPCODE_ENDLOOP2: + return emit_ENDLOOP(gen, inst); + case TGSI_OPCODE_BRK: + return emit_BRK(gen, inst); + case TGSI_OPCODE_CONT: + return emit_CONT(gen, inst); + + case TGSI_OPCODE_DDX: + return emit_DDX_DDY(gen, inst, TRUE); + case TGSI_OPCODE_DDY: + return emit_DDX_DDY(gen, inst, FALSE); + /* XXX lots more cases to do... */ default: - return false; + fprintf(stderr, "Cell: unimplemented TGSI instruction %d!\n", + inst->Instruction.Opcode); + return FALSE; } - return true; + return TRUE; +} + + + +/** + * Emit code for a TGSI immediate value (vector of four floats). + * This involves register allocation and initialization. + * XXX the initialization should be done by a "prepare" stage, not + * per quad execution! + */ +static boolean +emit_immediate(struct codegen *gen, const struct tgsi_full_immediate *immed) +{ + int ch; + + assert(gen->num_imm < MAX_TEMPS); + + for (ch = 0; ch < 4; ch++) { + float val = immed->u.ImmediateFloat32[ch].Float; + + if (ch > 0 && val == immed->u.ImmediateFloat32[ch - 1].Float) { + /* re-use previous register */ + gen->imm_regs[gen->num_imm][ch] = gen->imm_regs[gen->num_imm][ch - 1]; + } + else { + char str[100]; + int reg = spe_allocate_available_register(gen->f); + + if (reg < 0) + return FALSE; + + sprintf(str, "init $%d = %f", reg, val); + spe_comment(gen->f, 0, str); + + /* update immediate map */ + gen->imm_regs[gen->num_imm][ch] = reg; + + /* emit initializer instruction */ + spe_load_float(gen->f, reg, val); + } + } + + gen->num_imm++; + + return TRUE; } @@ -405,44 +1911,46 @@ emit_instruction(struct codegen *gen, * We only care about TGSI TEMPORARY register declarations at this time. * For each TGSI TEMPORARY we allocate four SPE registers. */ -static void -emit_declaration(struct codegen *gen, const struct tgsi_full_declaration *decl) +static boolean +emit_declaration(struct cell_context *cell, + struct codegen *gen, const struct tgsi_full_declaration *decl) { int i, ch; switch (decl->Declaration.File) { case TGSI_FILE_TEMPORARY: -#if DISASSEM - printf("Declare temp reg %d .. %d\n", - decl->DeclarationRange.First, - decl->DeclarationRange.Last); -#endif for (i = decl->DeclarationRange.First; i <= decl->DeclarationRange.Last; i++) { + assert(i < MAX_TEMPS); for (ch = 0; ch < 4; ch++) { gen->temp_regs[i][ch] = spe_allocate_available_register(gen->f); + if (gen->temp_regs[i][ch] < 0) + return FALSE; /* out of regs */ } /* XXX if we run out of SPE registers, we need to spill * to SPU memory. someday... */ -#if DISASSEM - printf(" SPE regs: %d %d %d %d\n", - gen->temp_regs[i][0], - gen->temp_regs[i][1], - gen->temp_regs[i][2], - gen->temp_regs[i][3]); -#endif + { + char buf[100]; + sprintf(buf, "TGSI temp[%d] maps to SPU regs [$%d $%d $%d $%d]", i, + gen->temp_regs[i][0], gen->temp_regs[i][1], + gen->temp_regs[i][2], gen->temp_regs[i][3]); + spe_comment(gen->f, 0, buf); + } } break; default: ; /* ignore */ } + + return TRUE; } + /** * Translate TGSI shader code to SPE instructions. This is done when * the state tracker gives us a new shader (via pipe->create_fs_state()). @@ -458,8 +1966,10 @@ cell_gen_fragment_program(struct cell_context *cell, { struct tgsi_parse_context parse; struct codegen gen; + uint ic = 0; memset(&gen, 0, sizeof(gen)); + gen.cell = cell; gen.f = f; /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */ @@ -472,50 +1982,63 @@ cell_gen_fragment_program(struct cell_context *cell, spe_allocate_register(f, gen.outputs_reg); spe_allocate_register(f, gen.constants_reg); -#if DISASSEM - printf("Begin %s\n", __FUNCTION__); - tgsi_dump(tokens, 0); -#endif + if (cell->debug_flags & CELL_DEBUG_ASM) { + spe_print_code(f, TRUE); + spe_indent(f, 2*8); + printf("Begin %s\n", __FUNCTION__); + tgsi_dump(tokens, 0); + } tgsi_parse_init(&parse, tokens); + emit_prologue(&gen); + while (!tgsi_parse_end_of_tokens(&parse) && !gen.error) { tgsi_parse_token(&parse); switch (parse.FullToken.Token.Type) { case TGSI_TOKEN_TYPE_IMMEDIATE: -#if 0 - if (!note_immediate(&gen, &parse.FullToken.FullImmediate )) - goto fail; -#endif + if (f->print) { + _debug_printf(" # "); + tgsi_dump_immediate(&parse.FullToken.FullImmediate); + } + if (!emit_immediate(&gen, &parse.FullToken.FullImmediate)) + gen.error = TRUE; break; case TGSI_TOKEN_TYPE_DECLARATION: - emit_declaration(&gen, &parse.FullToken.FullDeclaration); + if (f->print) { + _debug_printf(" # "); + tgsi_dump_declaration(&parse.FullToken.FullDeclaration); + } + if (!emit_declaration(cell, &gen, &parse.FullToken.FullDeclaration)) + gen.error = TRUE; break; case TGSI_TOKEN_TYPE_INSTRUCTION: - if (!emit_instruction(&gen, &parse.FullToken.FullInstruction )) { - gen.error = true; + if (f->print) { + _debug_printf(" # "); + ic++; + tgsi_dump_instruction(&parse.FullToken.FullInstruction, ic); } + if (!emit_instruction(&gen, &parse.FullToken.FullInstruction)) + gen.error = TRUE; break; default: assert(0); - } } - if (gen.error) { /* terminate the SPE code */ return emit_END(&gen); } -#if DISASSEM - printf("cell_gen_fragment_program nr instructions: %d\n", f->num_inst); - printf("End %s\n", __FUNCTION__); -#endif + if (cell->debug_flags & CELL_DEBUG_ASM) { + printf("cell_gen_fragment_program nr instructions: %d\n", f->num_inst); + printf("End %s\n", __FUNCTION__); + } tgsi_parse_free( &parse ); diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c index 06219d4e98..66d4b3b6a3 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -2,6 +2,7 @@ * * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. + * Copyright 2009 VMware, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -25,11 +26,10 @@ * **************************************************************************/ - - /** * Generate SPU per-fragment code (actually per-quad code). * \author Brian Paul + * \author Bob Ellison */ @@ -54,12 +54,17 @@ * \param ifragZ_reg register containing integer fragment Z values (in) * \param ifbZ_reg register containing integer frame buffer Z values (in/out) * \param zmask_reg register containing result of Z test/comparison (out) + * + * Returns TRUE if the Z-buffer needs to be updated. */ -static void -gen_depth_test(const struct pipe_depth_stencil_alpha_state *dsa, - struct spe_function *f, +static boolean +gen_depth_test(struct spe_function *f, + const struct pipe_depth_stencil_alpha_state *dsa, int mask_reg, int ifragZ_reg, int ifbZ_reg, int zmask_reg) { + /* NOTE: we use clgt below, not cgt, because we want to compare _unsigned_ + * quantities. This only makes a difference for 32-bit Z values though. + */ ASSERT(dsa->depth.enabled); switch (dsa->depth.func) { @@ -79,28 +84,28 @@ gen_depth_test(const struct pipe_depth_stencil_alpha_state *dsa, case PIPE_FUNC_GREATER: /* zmask = (ifragZ > ref) */ - spe_cgt(f, zmask_reg, ifragZ_reg, ifbZ_reg); + spe_clgt(f, zmask_reg, ifragZ_reg, ifbZ_reg); /* mask = (mask & zmask) */ spe_and(f, mask_reg, mask_reg, zmask_reg); break; case PIPE_FUNC_LESS: /* zmask = (ref > ifragZ) */ - spe_cgt(f, zmask_reg, ifbZ_reg, ifragZ_reg); + spe_clgt(f, zmask_reg, ifbZ_reg, ifragZ_reg); /* mask = (mask & zmask) */ spe_and(f, mask_reg, mask_reg, zmask_reg); break; case PIPE_FUNC_LEQUAL: /* zmask = (ifragZ > ref) */ - spe_cgt(f, zmask_reg, ifragZ_reg, ifbZ_reg); + spe_clgt(f, zmask_reg, ifragZ_reg, ifbZ_reg); /* mask = (mask & ~zmask) */ spe_andc(f, mask_reg, mask_reg, zmask_reg); break; case PIPE_FUNC_GEQUAL: /* zmask = (ref > ifragZ) */ - spe_cgt(f, zmask_reg, ifbZ_reg, ifragZ_reg); + spe_clgt(f, zmask_reg, ifbZ_reg, ifragZ_reg); /* mask = (mask & ~zmask) */ spe_andc(f, mask_reg, mask_reg, zmask_reg); break; @@ -129,7 +134,10 @@ gen_depth_test(const struct pipe_depth_stencil_alpha_state *dsa, * framebufferZ = (ztest_passed ? fragmentZ : framebufferZ; */ spe_selb(f, ifbZ_reg, ifbZ_reg, ifragZ_reg, mask_reg); + return TRUE; } + + return FALSE; } @@ -153,7 +161,7 @@ gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa, if ((dsa->alpha.func != PIPE_FUNC_NEVER) && (dsa->alpha.func != PIPE_FUNC_ALWAYS)) { /* load/splat the alpha reference float value */ - spe_load_float(f, ref_reg, dsa->alpha.ref); + spe_load_float(f, ref_reg, dsa->alpha.ref_value); } /* emit code to do the alpha comparison, updating 'mask' */ @@ -230,6 +238,134 @@ gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa, } +/** + * This pair of functions is used inline to allocate and deallocate + * optional constant registers. Once a constant is discovered to be + * needed, we will likely need it again, so we don't want to deallocate + * it and have to allocate and load it again unnecessarily. + */ +static INLINE void +setup_optional_register(struct spe_function *f, + int *r) +{ + if (*r < 0) + *r = spe_allocate_available_register(f); +} + +static INLINE void +release_optional_register(struct spe_function *f, + int r) +{ + if (r >= 0) + spe_release_register(f, r); +} + +static INLINE void +setup_const_register(struct spe_function *f, + int *r, + float value) +{ + if (*r >= 0) + return; + setup_optional_register(f, r); + spe_load_float(f, *r, value); +} + +static INLINE void +release_const_register(struct spe_function *f, + int r) +{ + release_optional_register(f, r); +} + + + +/** + * Unpack/convert framebuffer colors from four 32-bit packed colors + * (fbRGBA) to four float RGBA vectors (fbR, fbG, fbB, fbA). + * Each 8-bit color component is expanded into a float in [0.0, 1.0]. + */ +static void +unpack_colors(struct spe_function *f, + enum pipe_format color_format, + int fbRGBA_reg, + int fbR_reg, int fbG_reg, int fbB_reg, int fbA_reg) +{ + int mask0_reg = spe_allocate_available_register(f); + int mask1_reg = spe_allocate_available_register(f); + int mask2_reg = spe_allocate_available_register(f); + int mask3_reg = spe_allocate_available_register(f); + + spe_load_int(f, mask0_reg, 0xff); + spe_load_int(f, mask1_reg, 0xff00); + spe_load_int(f, mask2_reg, 0xff0000); + spe_load_int(f, mask3_reg, 0xff000000); + + spe_comment(f, 0, "Unpack framebuffer colors, convert to floats"); + + switch (color_format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + /* fbB = fbRGBA & mask */ + spe_and(f, fbB_reg, fbRGBA_reg, mask0_reg); + + /* fbG = fbRGBA & mask */ + spe_and(f, fbG_reg, fbRGBA_reg, mask1_reg); + + /* fbR = fbRGBA & mask */ + spe_and(f, fbR_reg, fbRGBA_reg, mask2_reg); + + /* fbA = fbRGBA & mask */ + spe_and(f, fbA_reg, fbRGBA_reg, mask3_reg); + + /* fbG = fbG >> 8 */ + spe_roti(f, fbG_reg, fbG_reg, -8); + + /* fbR = fbR >> 16 */ + spe_roti(f, fbR_reg, fbR_reg, -16); + + /* fbA = fbA >> 24 */ + spe_roti(f, fbA_reg, fbA_reg, -24); + break; + + case PIPE_FORMAT_B8G8R8A8_UNORM: + /* fbA = fbRGBA & mask */ + spe_and(f, fbA_reg, fbRGBA_reg, mask0_reg); + + /* fbR = fbRGBA & mask */ + spe_and(f, fbR_reg, fbRGBA_reg, mask1_reg); + + /* fbG = fbRGBA & mask */ + spe_and(f, fbG_reg, fbRGBA_reg, mask2_reg); + + /* fbB = fbRGBA & mask */ + spe_and(f, fbB_reg, fbRGBA_reg, mask3_reg); + + /* fbR = fbR >> 8 */ + spe_roti(f, fbR_reg, fbR_reg, -8); + + /* fbG = fbG >> 16 */ + spe_roti(f, fbG_reg, fbG_reg, -16); + + /* fbB = fbB >> 24 */ + spe_roti(f, fbB_reg, fbB_reg, -24); + break; + + default: + ASSERT(0); + } + + /* convert int[4] in [0,255] to float[4] in [0.0, 1.0] */ + spe_cuflt(f, fbR_reg, fbR_reg, 8); + spe_cuflt(f, fbG_reg, fbG_reg, 8); + spe_cuflt(f, fbB_reg, fbB_reg, 8); + spe_cuflt(f, fbA_reg, fbA_reg, 8); + + spe_release_register(f, mask0_reg); + spe_release_register(f, mask1_reg); + spe_release_register(f, mask2_reg); + spe_release_register(f, mask3_reg); +} + /** * Generate SPE code to implement the given blend mode for a quad of pixels. @@ -242,6 +378,7 @@ gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa, */ static void gen_blend(const struct pipe_blend_state *blend, + const struct pipe_blend_color *blend_color, struct spe_function *f, enum pipe_format color_format, int fragR_reg, int fragG_reg, int fragB_reg, int fragA_reg, @@ -262,211 +399,464 @@ gen_blend(const struct pipe_blend_state *blend, int fbB_reg = spe_allocate_available_register(f); int fbA_reg = spe_allocate_available_register(f); - int one_reg = spe_allocate_available_register(f); int tmp_reg = spe_allocate_available_register(f); - boolean one_reg_set = false; /* avoid setting one_reg more than once */ - - ASSERT(blend->blend_enable); - - /* Unpack/convert framebuffer colors from four 32-bit packed colors - * (fbRGBA) to four float RGBA vectors (fbR, fbG, fbB, fbA). - * Each 8-bit color component is expanded into a float in [0.0, 1.0]. + /* Optional constant registers we might or might not end up using; + * if we do use them, make sure we only allocate them once by + * keeping a flag on each one. */ - { - int mask_reg = spe_allocate_available_register(f); - - /* mask = {0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff} */ - spe_load_int(f, mask_reg, 0xff); - - /* XXX there may be more clever ways to implement the following code */ - switch (color_format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - /* fbB = fbB & mask */ - spe_and(f, fbB_reg, fbRGBA_reg, mask_reg); - /* mask = mask << 8 */ - spe_roti(f, mask_reg, mask_reg, 8); - - /* fbG = fbRGBA & mask */ - spe_and(f, fbG_reg, fbRGBA_reg, mask_reg); - /* fbG = fbG >> 8 */ - spe_roti(f, fbG_reg, fbG_reg, -8); - /* mask = mask << 8 */ - spe_roti(f, mask_reg, mask_reg, 8); - - /* fbR = fbRGBA & mask */ - spe_and(f, fbR_reg, fbRGBA_reg, mask_reg); - /* fbR = fbR >> 16 */ - spe_roti(f, fbR_reg, fbR_reg, -16); - /* mask = mask << 8 */ - spe_roti(f, mask_reg, mask_reg, 8); - - /* fbA = fbRGBA & mask */ - spe_and(f, fbA_reg, fbRGBA_reg, mask_reg); - /* fbA = fbA >> 24 */ - spe_roti(f, fbA_reg, fbA_reg, -24); - break; - - case PIPE_FORMAT_B8G8R8A8_UNORM: - /* fbA = fbA & mask */ - spe_and(f, fbA_reg, fbRGBA_reg, mask_reg); - /* mask = mask << 8 */ - spe_roti(f, mask_reg, mask_reg, 8); - - /* fbR = fbRGBA & mask */ - spe_and(f, fbR_reg, fbRGBA_reg, mask_reg); - /* fbR = fbR >> 8 */ - spe_roti(f, fbR_reg, fbR_reg, -8); - /* mask = mask << 8 */ - spe_roti(f, mask_reg, mask_reg, 8); - - /* fbG = fbRGBA & mask */ - spe_and(f, fbG_reg, fbRGBA_reg, mask_reg); - /* fbG = fbG >> 16 */ - spe_roti(f, fbG_reg, fbG_reg, -16); - /* mask = mask << 8 */ - spe_roti(f, mask_reg, mask_reg, 8); - - /* fbB = fbRGBA & mask */ - spe_and(f, fbB_reg, fbRGBA_reg, mask_reg); - /* fbB = fbB >> 24 */ - spe_roti(f, fbB_reg, fbB_reg, -24); - break; + int one_reg = -1; + int constR_reg = -1, constG_reg = -1, constB_reg = -1, constA_reg = -1; - default: - ASSERT(0); - } - - /* convert int[4] in [0,255] to float[4] in [0.0, 1.0] */ - spe_cuflt(f, fbR_reg, fbR_reg, 8); - spe_cuflt(f, fbG_reg, fbG_reg, 8); - spe_cuflt(f, fbB_reg, fbB_reg, 8); - spe_cuflt(f, fbA_reg, fbA_reg, 8); - - spe_release_register(f, mask_reg); - } + ASSERT(blend->blend_enable); + /* packed RGBA -> float colors */ + unpack_colors(f, color_format, fbRGBA_reg, + fbR_reg, fbG_reg, fbB_reg, fbA_reg); /* - * Compute Src RGB terms + * Compute Src RGB terms. We're actually looking for the value + * of (the appropriate RGB factors) * (the incoming source RGB color), + * because in some cases (like PIPE_BLENDFACTOR_ONE and + * PIPE_BLENDFACTOR_ZERO) we can avoid doing unnecessary math. */ switch (blend->rgb_src_factor) { case PIPE_BLENDFACTOR_ONE: + /* factors = (1,1,1), so term = (R,G,B) */ spe_move(f, term1R_reg, fragR_reg); spe_move(f, term1G_reg, fragG_reg); spe_move(f, term1B_reg, fragB_reg); break; case PIPE_BLENDFACTOR_ZERO: - spe_zero(f, term1R_reg); - spe_zero(f, term1G_reg); - spe_zero(f, term1B_reg); + /* factors = (0,0,0), so term = (0,0,0) */ + spe_load_float(f, term1R_reg, 0.0f); + spe_load_float(f, term1G_reg, 0.0f); + spe_load_float(f, term1B_reg, 0.0f); break; case PIPE_BLENDFACTOR_SRC_COLOR: + /* factors = (R,G,B), so term = (R*R, G*G, B*B) */ spe_fm(f, term1R_reg, fragR_reg, fragR_reg); spe_fm(f, term1G_reg, fragG_reg, fragG_reg); spe_fm(f, term1B_reg, fragB_reg, fragB_reg); break; case PIPE_BLENDFACTOR_SRC_ALPHA: + /* factors = (A,A,A), so term = (R*A, G*A, B*A) */ spe_fm(f, term1R_reg, fragR_reg, fragA_reg); spe_fm(f, term1G_reg, fragG_reg, fragA_reg); spe_fm(f, term1B_reg, fragB_reg, fragA_reg); break; - /* XXX more cases */ + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + /* factors = (1-R,1-G,1-B), so term = (R*(1-R), G*(1-G), B*(1-B)) + * or in other words term = (R-R*R, G-G*G, B-B*B) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term1R_reg, fragR_reg, fragR_reg, fragR_reg); + spe_fnms(f, term1G_reg, fragG_reg, fragG_reg, fragG_reg); + spe_fnms(f, term1B_reg, fragB_reg, fragB_reg, fragB_reg); + break; + case PIPE_BLENDFACTOR_DST_COLOR: + /* factors = (Rfb,Gfb,Bfb), so term = (R*Rfb, G*Gfb, B*Bfb) */ + spe_fm(f, term1R_reg, fragR_reg, fbR_reg); + spe_fm(f, term1G_reg, fragG_reg, fbG_reg); + spe_fm(f, term1B_reg, fragB_reg, fbB_reg); + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (R*(1-Rfb),G*(1-Gfb),B*(1-Bfb)) + * or term = (R-R*Rfb, G-G*Gfb, B-B*Bfb) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term1R_reg, fragR_reg, fbR_reg, fragR_reg); + spe_fnms(f, term1G_reg, fragG_reg, fbG_reg, fragG_reg); + spe_fnms(f, term1B_reg, fragB_reg, fbB_reg, fragB_reg); + break; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + /* factors = (1-A,1-A,1-A), so term = (R*(1-A),G*(1-A),B*(1-A)) + * or term = (R-R*A,G-G*A,B-B*A) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term1R_reg, fragR_reg, fragA_reg, fragR_reg); + spe_fnms(f, term1G_reg, fragG_reg, fragA_reg, fragG_reg); + spe_fnms(f, term1B_reg, fragB_reg, fragA_reg, fragB_reg); + break; + case PIPE_BLENDFACTOR_DST_ALPHA: + /* factors = (Afb, Afb, Afb), so term = (R*Afb, G*Afb, B*Afb) */ + spe_fm(f, term1R_reg, fragR_reg, fbA_reg); + spe_fm(f, term1G_reg, fragG_reg, fbA_reg); + spe_fm(f, term1B_reg, fragB_reg, fbA_reg); + break; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (R*(1-Afb),G*(1-Afb),B*(1-Afb)) + * or term = (R-R*Afb,G-G*Afb,b-B*Afb) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term1R_reg, fragR_reg, fbA_reg, fragR_reg); + spe_fnms(f, term1G_reg, fragG_reg, fbA_reg, fragG_reg); + spe_fnms(f, term1B_reg, fragB_reg, fbA_reg, fragB_reg); + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + /* We need the optional constant color registers */ + setup_const_register(f, &constR_reg, blend_color->color[0]); + setup_const_register(f, &constG_reg, blend_color->color[1]); + setup_const_register(f, &constB_reg, blend_color->color[2]); + /* now, factor = (Rc,Gc,Bc), so term = (R*Rc,G*Gc,B*Bc) */ + spe_fm(f, term1R_reg, fragR_reg, constR_reg); + spe_fm(f, term1G_reg, fragG_reg, constG_reg); + spe_fm(f, term1B_reg, fragB_reg, constB_reg); + break; + case PIPE_BLENDFACTOR_CONST_ALPHA: + /* we'll need the optional constant alpha register */ + setup_const_register(f, &constA_reg, blend_color->color[3]); + /* factor = (Ac,Ac,Ac), so term = (R*Ac,G*Ac,B*Ac) */ + spe_fm(f, term1R_reg, fragR_reg, constA_reg); + spe_fm(f, term1G_reg, fragG_reg, constA_reg); + spe_fm(f, term1B_reg, fragB_reg, constA_reg); + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + /* We need the optional constant color registers */ + setup_const_register(f, &constR_reg, blend_color->color[0]); + setup_const_register(f, &constG_reg, blend_color->color[1]); + setup_const_register(f, &constB_reg, blend_color->color[2]); + /* factor = (1-Rc,1-Gc,1-Bc), so term = (R*(1-Rc),G*(1-Gc),B*(1-Bc)) + * or term = (R-R*Rc, G-G*Gc, B-B*Bc) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term1R_reg, fragR_reg, constR_reg, fragR_reg); + spe_fnms(f, term1G_reg, fragG_reg, constG_reg, fragG_reg); + spe_fnms(f, term1B_reg, fragB_reg, constB_reg, fragB_reg); + break; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + /* We need the optional constant color registers */ + setup_const_register(f, &constR_reg, blend_color->color[0]); + setup_const_register(f, &constG_reg, blend_color->color[1]); + setup_const_register(f, &constB_reg, blend_color->color[2]); + /* factor = (1-Ac,1-Ac,1-Ac), so term = (R*(1-Ac),G*(1-Ac),B*(1-Ac)) + * or term = (R-R*Ac,G-G*Ac,B-B*Ac) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term1R_reg, fragR_reg, constA_reg, fragR_reg); + spe_fnms(f, term1G_reg, fragG_reg, constA_reg, fragG_reg); + spe_fnms(f, term1B_reg, fragB_reg, constA_reg, fragB_reg); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + /* We'll need the optional {1,1,1,1} register */ + setup_const_register(f, &one_reg, 1.0f); + /* factor = (min(A,1-Afb),min(A,1-Afb),min(A,1-Afb)), so + * term = (R*min(A,1-Afb), G*min(A,1-Afb), B*min(A,1-Afb)) + * We could expand the term (as a*min(b,c) == min(a*b,a*c) + * as long as a is positive), but then we'd have to do three + * spe_float_min() functions instead of one, so this is simpler. + */ + /* tmp = 1 - Afb */ + spe_fs(f, tmp_reg, one_reg, fbA_reg); + /* tmp = min(A,tmp) */ + spe_float_min(f, tmp_reg, fragA_reg, tmp_reg); + /* term = R*tmp */ + spe_fm(f, term1R_reg, fragR_reg, tmp_reg); + spe_fm(f, term1G_reg, fragG_reg, tmp_reg); + spe_fm(f, term1B_reg, fragB_reg, tmp_reg); + break; + + /* These are special D3D cases involving a second color output + * from the fragment shader. I'm not sure we can support them + * yet... XXX + */ + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + default: ASSERT(0); } /* - * Compute Src Alpha term + * Compute Src Alpha term. Like the above, we're looking for + * the full term A*factor, not just the factor itself, because + * in many cases we can avoid doing unnecessary multiplies. */ switch (blend->alpha_src_factor) { + case PIPE_BLENDFACTOR_ZERO: + /* factor = 0, so term = 0 */ + spe_load_float(f, term1A_reg, 0.0f); + break; + + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* fall through */ case PIPE_BLENDFACTOR_ONE: + /* factor = 1, so term = A */ spe_move(f, term1A_reg, fragA_reg); break; + case PIPE_BLENDFACTOR_SRC_COLOR: + /* factor = A, so term = A*A */ spe_fm(f, term1A_reg, fragA_reg, fragA_reg); break; case PIPE_BLENDFACTOR_SRC_ALPHA: spe_fm(f, term1A_reg, fragA_reg, fragA_reg); break; - /* XXX more cases */ + + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + /* factor = 1-A, so term = A*(1-A) = A-A*A */ + /* fnms(a,b,c,d) computes a = d - b*c */ + spe_fnms(f, term1A_reg, fragA_reg, fragA_reg, fragA_reg); + break; + + case PIPE_BLENDFACTOR_DST_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_DST_COLOR: + /* factor = Afb, so term = A*Afb */ + spe_fm(f, term1A_reg, fragA_reg, fbA_reg); + break; + + case PIPE_BLENDFACTOR_INV_DST_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_INV_DST_COLOR: + /* factor = 1-Afb, so term = A*(1-Afb) = A - A*Afb */ + /* fnms(a,b,c,d) computes a = d - b*c */ + spe_fnms(f, term1A_reg, fragA_reg, fbA_reg, fragA_reg); + break; + + case PIPE_BLENDFACTOR_CONST_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_CONST_COLOR: + /* We need the optional constA_reg register */ + setup_const_register(f, &constA_reg, blend_color->color[3]); + /* factor = Ac, so term = A*Ac */ + spe_fm(f, term1A_reg, fragA_reg, constA_reg); + break; + + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + /* We need the optional constA_reg register */ + setup_const_register(f, &constA_reg, blend_color->color[3]); + /* factor = 1-Ac, so term = A*(1-Ac) = A-A*Ac */ + /* fnms(a,b,c,d) computes a = d - b*c */ + spe_fnms(f, term1A_reg, fragA_reg, constA_reg, fragA_reg); + break; + + /* These are special D3D cases involving a second color output + * from the fragment shader. I'm not sure we can support them + * yet... XXX + */ + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: default: ASSERT(0); } /* - * Compute Dest RGB terms + * Compute Dest RGB term. Like the above, we're looking for + * the full term (Rfb,Gfb,Bfb)*(factor), not just the factor itself, because + * in many cases we can avoid doing unnecessary multiplies. */ switch (blend->rgb_dst_factor) { case PIPE_BLENDFACTOR_ONE: + /* factors = (1,1,1), so term = (Rfb,Gfb,Bfb) */ spe_move(f, term2R_reg, fbR_reg); spe_move(f, term2G_reg, fbG_reg); spe_move(f, term2B_reg, fbB_reg); break; case PIPE_BLENDFACTOR_ZERO: - spe_zero(f, term2R_reg); - spe_zero(f, term2G_reg); - spe_zero(f, term2B_reg); + /* factor s= (0,0,0), so term = (0,0,0) */ + spe_load_float(f, term2R_reg, 0.0f); + spe_load_float(f, term2G_reg, 0.0f); + spe_load_float(f, term2B_reg, 0.0f); break; case PIPE_BLENDFACTOR_SRC_COLOR: + /* factors = (R,G,B), so term = (R*Rfb, G*Gfb, B*Bfb) */ spe_fm(f, term2R_reg, fbR_reg, fragR_reg); spe_fm(f, term2G_reg, fbG_reg, fragG_reg); spe_fm(f, term2B_reg, fbB_reg, fragB_reg); break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + /* factors = (1-R,1-G,1-B), so term = (Rfb*(1-R), Gfb*(1-G), Bfb*(1-B)) + * or in other words term = (Rfb-Rfb*R, Gfb-Gfb*G, Bfb-Bfb*B) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term2R_reg, fragR_reg, fbR_reg, fbR_reg); + spe_fnms(f, term2G_reg, fragG_reg, fbG_reg, fbG_reg); + spe_fnms(f, term2B_reg, fragB_reg, fbB_reg, fbB_reg); + break; case PIPE_BLENDFACTOR_SRC_ALPHA: + /* factors = (A,A,A), so term = (Rfb*A, Gfb*A, Bfb*A) */ spe_fm(f, term2R_reg, fbR_reg, fragA_reg); spe_fm(f, term2G_reg, fbG_reg, fragA_reg); spe_fm(f, term2B_reg, fbB_reg, fragA_reg); break; case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - /* one = {1.0, 1.0, 1.0, 1.0} */ - if (!one_reg_set) { - spe_load_float(f, one_reg, 1.0f); - one_reg_set = true; - } - /* tmp = one - fragA */ - spe_fs(f, tmp_reg, one_reg, fragA_reg); - /* term = fb * tmp */ - spe_fm(f, term2R_reg, fbR_reg, tmp_reg); - spe_fm(f, term2G_reg, fbG_reg, tmp_reg); - spe_fm(f, term2B_reg, fbB_reg, tmp_reg); - break; - /* XXX more cases */ + /* factors = (1-A,1-A,1-A) so term = (Rfb-Rfb*A,Gfb-Gfb*A,Bfb-Bfb*A) */ + /* fnms(a,b,c,d) computes a = d - b*c */ + spe_fnms(f, term2R_reg, fbR_reg, fragA_reg, fbR_reg); + spe_fnms(f, term2G_reg, fbG_reg, fragA_reg, fbG_reg); + spe_fnms(f, term2B_reg, fbB_reg, fragA_reg, fbB_reg); + break; + case PIPE_BLENDFACTOR_DST_COLOR: + /* factors = (Rfb,Gfb,Bfb), so term = (Rfb*Rfb, Gfb*Gfb, Bfb*Bfb) */ + spe_fm(f, term2R_reg, fbR_reg, fbR_reg); + spe_fm(f, term2G_reg, fbG_reg, fbG_reg); + spe_fm(f, term2B_reg, fbB_reg, fbB_reg); + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (Rfb*(1-Rfb),Gfb*(1-Gfb),Bfb*(1-Bfb)) + * or term = (Rfb-Rfb*Rfb, Gfb-Gfb*Gfb, Bfb-Bfb*Bfb) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term2R_reg, fbR_reg, fbR_reg, fbR_reg); + spe_fnms(f, term2G_reg, fbG_reg, fbG_reg, fbG_reg); + spe_fnms(f, term2B_reg, fbB_reg, fbB_reg, fbB_reg); + break; + + case PIPE_BLENDFACTOR_DST_ALPHA: + /* factors = (Afb, Afb, Afb), so term = (Rfb*Afb, Gfb*Afb, Bfb*Afb) */ + spe_fm(f, term2R_reg, fbR_reg, fbA_reg); + spe_fm(f, term2G_reg, fbG_reg, fbA_reg); + spe_fm(f, term2B_reg, fbB_reg, fbA_reg); + break; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (Rfb*(1-Afb),Gfb*(1-Afb),Bfb*(1-Afb)) + * or term = (Rfb-Rfb*Afb,Gfb-Gfb*Afb,Bfb-Bfb*Afb) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term2R_reg, fbR_reg, fbA_reg, fbR_reg); + spe_fnms(f, term2G_reg, fbG_reg, fbA_reg, fbG_reg); + spe_fnms(f, term2B_reg, fbB_reg, fbA_reg, fbB_reg); + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + /* We need the optional constant color registers */ + setup_const_register(f, &constR_reg, blend_color->color[0]); + setup_const_register(f, &constG_reg, blend_color->color[1]); + setup_const_register(f, &constB_reg, blend_color->color[2]); + /* now, factor = (Rc,Gc,Bc), so term = (Rfb*Rc,Gfb*Gc,Bfb*Bc) */ + spe_fm(f, term2R_reg, fbR_reg, constR_reg); + spe_fm(f, term2G_reg, fbG_reg, constG_reg); + spe_fm(f, term2B_reg, fbB_reg, constB_reg); + break; + case PIPE_BLENDFACTOR_CONST_ALPHA: + /* we'll need the optional constant alpha register */ + setup_const_register(f, &constA_reg, blend_color->color[3]); + /* factor = (Ac,Ac,Ac), so term = (Rfb*Ac,Gfb*Ac,Bfb*Ac) */ + spe_fm(f, term2R_reg, fbR_reg, constA_reg); + spe_fm(f, term2G_reg, fbG_reg, constA_reg); + spe_fm(f, term2B_reg, fbB_reg, constA_reg); + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + /* We need the optional constant color registers */ + setup_const_register(f, &constR_reg, blend_color->color[0]); + setup_const_register(f, &constG_reg, blend_color->color[1]); + setup_const_register(f, &constB_reg, blend_color->color[2]); + /* factor = (1-Rc,1-Gc,1-Bc), so term = (Rfb*(1-Rc),Gfb*(1-Gc),Bfb*(1-Bc)) + * or term = (Rfb-Rfb*Rc, Gfb-Gfb*Gc, Bfb-Bfb*Bc) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term2R_reg, fbR_reg, constR_reg, fbR_reg); + spe_fnms(f, term2G_reg, fbG_reg, constG_reg, fbG_reg); + spe_fnms(f, term2B_reg, fbB_reg, constB_reg, fbB_reg); + break; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + /* We need the optional constant color registers */ + setup_const_register(f, &constR_reg, blend_color->color[0]); + setup_const_register(f, &constG_reg, blend_color->color[1]); + setup_const_register(f, &constB_reg, blend_color->color[2]); + /* factor = (1-Ac,1-Ac,1-Ac), so term = (Rfb*(1-Ac),Gfb*(1-Ac),Bfb*(1-Ac)) + * or term = (Rfb-Rfb*Ac,Gfb-Gfb*Ac,Bfb-Bfb*Ac) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term2R_reg, fbR_reg, constA_reg, fbR_reg); + spe_fnms(f, term2G_reg, fbG_reg, constA_reg, fbG_reg); + spe_fnms(f, term2B_reg, fbB_reg, constA_reg, fbB_reg); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* not supported for dest RGB */ + ASSERT(0); + break; + + /* These are special D3D cases involving a second color output + * from the fragment shader. I'm not sure we can support them + * yet... XXX + */ + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + default: ASSERT(0); } /* - * Compute Dest Alpha term + * Compute Dest Alpha term. Like the above, we're looking for + * the full term Afb*factor, not just the factor itself, because + * in many cases we can avoid doing unnecessary multiplies. */ switch (blend->alpha_dst_factor) { case PIPE_BLENDFACTOR_ONE: + /* factor = 1, so term = Afb */ spe_move(f, term2A_reg, fbA_reg); break; case PIPE_BLENDFACTOR_ZERO: - spe_zero(f, term2A_reg); + /* factor = 0, so term = 0 */ + spe_load_float(f, term2A_reg, 0.0f); break; - case PIPE_BLENDFACTOR_SRC_ALPHA: + + case PIPE_BLENDFACTOR_SRC_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_SRC_COLOR: + /* factor = A, so term = Afb*A */ spe_fm(f, term2A_reg, fbA_reg, fragA_reg); break; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - /* one = {1.0, 1.0, 1.0, 1.0} */ - if (!one_reg_set) { - spe_load_float(f, one_reg, 1.0f); - one_reg_set = true; - } - /* tmp = one - fragA */ - spe_fs(f, tmp_reg, one_reg, fragA_reg); - /* termA = fbA * tmp */ - spe_fm(f, term2A_reg, fbA_reg, tmp_reg); + + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + /* factor = 1-A, so term = Afb*(1-A) = Afb-Afb*A */ + /* fnms(a,b,c,d) computes a = d - b*c */ + spe_fnms(f, term2A_reg, fbA_reg, fragA_reg, fbA_reg); + break; + + case PIPE_BLENDFACTOR_DST_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_DST_COLOR: + /* factor = Afb, so term = Afb*Afb */ + spe_fm(f, term2A_reg, fbA_reg, fbA_reg); + break; + + case PIPE_BLENDFACTOR_INV_DST_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_INV_DST_COLOR: + /* factor = 1-Afb, so term = Afb*(1-Afb) = Afb - Afb*Afb */ + /* fnms(a,b,c,d) computes a = d - b*c */ + spe_fnms(f, term2A_reg, fbA_reg, fbA_reg, fbA_reg); + break; + + case PIPE_BLENDFACTOR_CONST_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_CONST_COLOR: + /* We need the optional constA_reg register */ + setup_const_register(f, &constA_reg, blend_color->color[3]); + /* factor = Ac, so term = Afb*Ac */ + spe_fm(f, term2A_reg, fbA_reg, constA_reg); break; - /* XXX more cases */ + + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + /* We need the optional constA_reg register */ + setup_const_register(f, &constA_reg, blend_color->color[3]); + /* factor = 1-Ac, so term = Afb*(1-Ac) = Afb-Afb*Ac */ + /* fnms(a,b,c,d) computes a = d - b*c */ + spe_fnms(f, term2A_reg, fbA_reg, constA_reg, fbA_reg); + break; + + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* not supported for dest alpha */ + ASSERT(0); + break; + + /* These are special D3D cases involving a second color output + * from the fragment shader. I'm not sure we can support them + * yet... XXX + */ + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: default: ASSERT(0); } /* - * Combine Src/Dest RGB terms + * Combine Src/Dest RGB terms as per the blend equation. */ switch (blend->rgb_func) { case PIPE_BLEND_ADD: @@ -479,7 +869,21 @@ gen_blend(const struct pipe_blend_state *blend, spe_fs(f, fragG_reg, term1G_reg, term2G_reg); spe_fs(f, fragB_reg, term1B_reg, term2B_reg); break; - /* XXX more cases */ + case PIPE_BLEND_REVERSE_SUBTRACT: + spe_fs(f, fragR_reg, term2R_reg, term1R_reg); + spe_fs(f, fragG_reg, term2G_reg, term1G_reg); + spe_fs(f, fragB_reg, term2B_reg, term1B_reg); + break; + case PIPE_BLEND_MIN: + spe_float_min(f, fragR_reg, term1R_reg, term2R_reg); + spe_float_min(f, fragG_reg, term1G_reg, term2G_reg); + spe_float_min(f, fragB_reg, term1B_reg, term2B_reg); + break; + case PIPE_BLEND_MAX: + spe_float_max(f, fragR_reg, term1R_reg, term2R_reg); + spe_float_max(f, fragG_reg, term1G_reg, term2G_reg); + spe_float_max(f, fragB_reg, term1B_reg, term2B_reg); + break; default: ASSERT(0); } @@ -494,7 +898,15 @@ gen_blend(const struct pipe_blend_state *blend, case PIPE_BLEND_SUBTRACT: spe_fs(f, fragA_reg, term1A_reg, term2A_reg); break; - /* XXX more cases */ + case PIPE_BLEND_REVERSE_SUBTRACT: + spe_fs(f, fragA_reg, term2A_reg, term1A_reg); + break; + case PIPE_BLEND_MIN: + spe_float_min(f, fragA_reg, term1A_reg, term2A_reg); + break; + case PIPE_BLEND_MAX: + spe_float_max(f, fragA_reg, term1A_reg, term2A_reg); + break; default: ASSERT(0); } @@ -514,8 +926,14 @@ gen_blend(const struct pipe_blend_state *blend, spe_release_register(f, fbB_reg); spe_release_register(f, fbA_reg); - spe_release_register(f, one_reg); spe_release_register(f, tmp_reg); + + /* Free any optional registers that actually got used */ + release_const_register(f, one_reg); + release_const_register(f, constR_reg); + release_const_register(f, constG_reg); + release_const_register(f, constB_reg); + release_const_register(f, constA_reg); } @@ -524,24 +942,74 @@ gen_logicop(const struct pipe_blend_state *blend, struct spe_function *f, int fragRGBA_reg, int fbRGBA_reg) { - /* XXX to-do */ - /* operate on 32-bit packed pixels, not float colors */ -} - - -static void -gen_colormask(uint colormask, - struct spe_function *f, - int fragRGBA_reg, int fbRGBA_reg) -{ - /* XXX to-do */ - /* operate on 32-bit packed pixels, not float colors */ + /* We've got four 32-bit RGBA packed pixels in each of + * fragRGBA_reg and fbRGBA_reg, not sets of floating-point + * reds, greens, blues, and alphas. + * */ + ASSERT(blend->logicop_enable); + + switch(blend->logicop_func) { + case PIPE_LOGICOP_CLEAR: /* 0 */ + spe_zero(f, fragRGBA_reg); + break; + case PIPE_LOGICOP_NOR: /* ~(s | d) */ + spe_nor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_AND_INVERTED: /* ~s & d */ + /* andc R, A, B computes R = A & ~B */ + spe_andc(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg); + break; + case PIPE_LOGICOP_COPY_INVERTED: /* ~s */ + spe_complement(f, fragRGBA_reg, fragRGBA_reg); + break; + case PIPE_LOGICOP_AND_REVERSE: /* s & ~d */ + /* andc R, A, B computes R = A & ~B */ + spe_andc(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_INVERT: /* ~d */ + /* Note that (A nor A) == ~(A|A) == ~A */ + spe_nor(f, fragRGBA_reg, fbRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_XOR: /* s ^ d */ + spe_xor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_NAND: /* ~(s & d) */ + spe_nand(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_AND: /* s & d */ + spe_and(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_EQUIV: /* ~(s ^ d) */ + spe_xor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); + spe_complement(f, fragRGBA_reg, fragRGBA_reg); + break; + case PIPE_LOGICOP_NOOP: /* d */ + spe_move(f, fragRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_OR_INVERTED: /* ~s | d */ + /* orc R, A, B computes R = A | ~B */ + spe_orc(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg); + break; + case PIPE_LOGICOP_COPY: /* s */ + break; + case PIPE_LOGICOP_OR_REVERSE: /* s | ~d */ + /* orc R, A, B computes R = A | ~B */ + spe_orc(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_OR: /* s | d */ + spe_or(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_SET: /* 1 */ + spe_load_int(f, fragRGBA_reg, 0xffffffff); + break; + default: + ASSERT(0); + } } - /** - * Generate code to pack a quad of float colors into a four 32-bit integers. + * Generate code to pack a quad of float colors into four 32-bit integers. * * \param f SPE function to append instruction onto. * \param color_format the dest color packing format @@ -557,13 +1025,16 @@ gen_pack_colors(struct spe_function *f, int r_reg, int g_reg, int b_reg, int a_reg, int rgba_reg) { + int rg_reg = spe_allocate_available_register(f); + int ba_reg = spe_allocate_available_register(f); + /* Convert float[4] in [0.0,1.0] to int[4] in [0,~0], with clamping */ spe_cfltu(f, r_reg, r_reg, 32); spe_cfltu(f, g_reg, g_reg, 32); spe_cfltu(f, b_reg, b_reg, 32); spe_cfltu(f, a_reg, a_reg, 32); - /* Shift the most significant bytes to least the significant positions. + /* Shift the most significant bytes to the least significant positions. * I.e.: reg = reg >> 24 */ spe_rotmi(f, r_reg, r_reg, -24); @@ -595,12 +1066,936 @@ gen_pack_colors(struct spe_function *f, * OR-ing all those together gives us four packed colors: * RGBA = {0xffffffff, 0xaa114477, 0xbb225588, 0xcc336699} */ - spe_or(f, rgba_reg, r_reg, g_reg); - spe_or(f, rgba_reg, rgba_reg, b_reg); - spe_or(f, rgba_reg, rgba_reg, a_reg); + spe_or(f, rg_reg, r_reg, g_reg); + spe_or(f, ba_reg, a_reg, b_reg); + spe_or(f, rgba_reg, rg_reg, ba_reg); + + spe_release_register(f, rg_reg); + spe_release_register(f, ba_reg); } +static void +gen_colormask(struct spe_function *f, + uint colormask, + enum pipe_format color_format, + int fragRGBA_reg, int fbRGBA_reg) +{ + /* We've got four 32-bit RGBA packed pixels in each of + * fragRGBA_reg and fbRGBA_reg, not sets of floating-point + * reds, greens, blues, and alphas. Further, the pixels + * are packed according to the given color format, not + * necessarily RGBA... + */ + uint r_mask; + uint g_mask; + uint b_mask; + uint a_mask; + + /* Calculate exactly where the bits for any particular color + * end up, so we can mask them correctly. + */ + switch(color_format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + /* ARGB */ + a_mask = 0xff000000; + r_mask = 0x00ff0000; + g_mask = 0x0000ff00; + b_mask = 0x000000ff; + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + /* BGRA */ + b_mask = 0xff000000; + g_mask = 0x00ff0000; + r_mask = 0x0000ff00; + a_mask = 0x000000ff; + break; + default: + ASSERT(0); + } + + /* For each R, G, B, and A component we're supposed to mask out, + * clear its bits. Then our mask operation later will work + * as expected. + */ + if (!(colormask & PIPE_MASK_R)) { + r_mask = 0; + } + if (!(colormask & PIPE_MASK_G)) { + g_mask = 0; + } + if (!(colormask & PIPE_MASK_B)) { + b_mask = 0; + } + if (!(colormask & PIPE_MASK_A)) { + a_mask = 0; + } + + /* Get a temporary register to hold the mask that will be applied + * to the fragment + */ + int colormask_reg = spe_allocate_available_register(f); + + /* The actual mask we're going to use is an OR of the remaining R, G, B, + * and A masks. Load the result value into our temporary register. + */ + spe_load_uint(f, colormask_reg, r_mask | g_mask | b_mask | a_mask); + + /* Use the mask register to select between the fragment color + * values and the frame buffer color values. Wherever the + * mask has a 0 bit, the current frame buffer color should override + * the fragment color. Wherever the mask has a 1 bit, the + * fragment color should persevere. The Select Bits (selb rt, rA, rB, rM) + * instruction will select bits from its first operand rA wherever the + * the mask bits rM are 0, and from its second operand rB wherever the + * mask bits rM are 1. That means that the frame buffer color is the + * first operand, and the fragment color the second. + */ + spe_selb(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg, colormask_reg); + + /* Release the temporary register and we're done */ + spe_release_register(f, colormask_reg); +} + + +/** + * This function is annoyingly similar to gen_depth_test(), above, except + * that instead of comparing two varying values (i.e. fragment and buffer), + * we're comparing a varying value with a static value. As such, we have + * access to the Compare Immediate instructions where we don't in + * gen_depth_test(), which is what makes us very different. + * + * There's some added complexity if there's a non-trivial state->mask + * value; then stencil and reference both must be masked + * + * The return value in the stencil_pass_reg is a bitmask of valid + * fragments that also passed the stencil test. The bitmask of valid + * fragments that failed would be found in + * (fragment_mask_reg & ~stencil_pass_reg). + */ +static void +gen_stencil_test(struct spe_function *f, + const struct pipe_stencil_state *state, + uint stencil_max_value, + int fragment_mask_reg, + int fbS_reg, + int stencil_pass_reg) +{ + /* Generate code that puts the set of passing fragments into the + * stencil_pass_reg register, taking into account whether each fragment + * was active to begin with. + */ + switch (state->func) { + case PIPE_FUNC_EQUAL: + if (state->valuemask == stencil_max_value) { + /* stencil_pass = fragment_mask & (s == reference) */ + spe_compare_equal_uint(f, stencil_pass_reg, fbS_reg, state->ref_value); + spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); + } + else { + /* stencil_pass = fragment_mask & ((s&mask) == (reference&mask)) */ + uint tmp_masked_stencil = spe_allocate_available_register(f); + spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask); + spe_compare_equal_uint(f, stencil_pass_reg, tmp_masked_stencil, + state->valuemask & state->ref_value); + spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); + spe_release_register(f, tmp_masked_stencil); + } + break; + + case PIPE_FUNC_NOTEQUAL: + if (state->valuemask == stencil_max_value) { + /* stencil_pass = fragment_mask & ~(s == reference) */ + spe_compare_equal_uint(f, stencil_pass_reg, fbS_reg, state->ref_value); + spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); + } + else { + /* stencil_pass = fragment_mask & ~((s&mask) == (reference&mask)) */ + int tmp_masked_stencil = spe_allocate_available_register(f); + spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask); + spe_compare_equal_uint(f, stencil_pass_reg, tmp_masked_stencil, + state->valuemask & state->ref_value); + spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); + spe_release_register(f, tmp_masked_stencil); + } + break; + + case PIPE_FUNC_LESS: + if (state->valuemask == stencil_max_value) { + /* stencil_pass = fragment_mask & (reference < s) */ + spe_compare_greater_uint(f, stencil_pass_reg, fbS_reg, state->ref_value); + spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); + } + else { + /* stencil_pass = fragment_mask & ((reference&mask) < (s & mask)) */ + int tmp_masked_stencil = spe_allocate_available_register(f); + spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask); + spe_compare_greater_uint(f, stencil_pass_reg, tmp_masked_stencil, + state->valuemask & state->ref_value); + spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); + spe_release_register(f, tmp_masked_stencil); + } + break; + + case PIPE_FUNC_GREATER: + if (state->valuemask == stencil_max_value) { + /* stencil_pass = fragment_mask & (reference > s) */ + /* There's no convenient Compare Less Than Immediate instruction, so + * we'll have to do this one the harder way, by loading a register and + * comparing directly. Compare Logical Greater Than Word (clgt) + * treats its operands as unsigned - no sign extension. + */ + int tmp_reg = spe_allocate_available_register(f); + spe_load_uint(f, tmp_reg, state->ref_value); + spe_clgt(f, stencil_pass_reg, tmp_reg, fbS_reg); + spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); + spe_release_register(f, tmp_reg); + } + else { + /* stencil_pass = fragment_mask & ((reference&mask) > (s&mask)) */ + int tmp_reg = spe_allocate_available_register(f); + int tmp_masked_stencil = spe_allocate_available_register(f); + spe_load_uint(f, tmp_reg, state->valuemask & state->ref_value); + spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask); + spe_clgt(f, stencil_pass_reg, tmp_reg, tmp_masked_stencil); + spe_and(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); + spe_release_register(f, tmp_reg); + spe_release_register(f, tmp_masked_stencil); + } + break; + + case PIPE_FUNC_GEQUAL: + if (state->valuemask == stencil_max_value) { + /* stencil_pass = fragment_mask & (reference >= s) + * = fragment_mask & ~(s > reference) */ + spe_compare_greater_uint(f, stencil_pass_reg, fbS_reg, + state->ref_value); + spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); + } + else { + /* stencil_pass = fragment_mask & ~((s&mask) > (reference&mask)) */ + int tmp_masked_stencil = spe_allocate_available_register(f); + spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask); + spe_compare_greater_uint(f, stencil_pass_reg, tmp_masked_stencil, + state->valuemask & state->ref_value); + spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); + spe_release_register(f, tmp_masked_stencil); + } + break; + + case PIPE_FUNC_LEQUAL: + if (state->valuemask == stencil_max_value) { + /* stencil_pass = fragment_mask & (reference <= s) ] + * = fragment_mask & ~(reference > s) */ + /* As above, we have to do this by loading a register */ + int tmp_reg = spe_allocate_available_register(f); + spe_load_uint(f, tmp_reg, state->ref_value); + spe_clgt(f, stencil_pass_reg, tmp_reg, fbS_reg); + spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); + spe_release_register(f, tmp_reg); + } + else { + /* stencil_pass = fragment_mask & ~((reference&mask) > (s&mask)) */ + int tmp_reg = spe_allocate_available_register(f); + int tmp_masked_stencil = spe_allocate_available_register(f); + spe_load_uint(f, tmp_reg, state->ref_value & state->valuemask); + spe_and_uint(f, tmp_masked_stencil, fbS_reg, state->valuemask); + spe_clgt(f, stencil_pass_reg, tmp_reg, tmp_masked_stencil); + spe_andc(f, stencil_pass_reg, fragment_mask_reg, stencil_pass_reg); + spe_release_register(f, tmp_reg); + spe_release_register(f, tmp_masked_stencil); + } + break; + + case PIPE_FUNC_NEVER: + /* stencil_pass = fragment_mask & 0 = 0 */ + spe_load_uint(f, stencil_pass_reg, 0); + break; + + case PIPE_FUNC_ALWAYS: + /* stencil_pass = fragment_mask & 1 = fragment_mask */ + spe_move(f, stencil_pass_reg, fragment_mask_reg); + break; + } + + /* The fragments that passed the stencil test are now in stencil_pass_reg. + * The fragments that failed would be (fragment_mask_reg & ~stencil_pass_reg). + */ +} + + +/** + * This function generates code that calculates a set of new stencil values + * given the earlier values and the operation to apply. It does not + * apply any tests. It is intended to be called up to 3 times + * (for the stencil fail operation, for the stencil pass-z fail operation, + * and for the stencil pass-z pass operation) to collect up to three + * possible sets of values, and for the caller to combine them based + * on the result of the tests. + * + * stencil_max_value should be (2^n - 1) where n is the number of bits + * in the stencil buffer - in other words, it should be usable as a mask. + */ +static void +gen_stencil_values(struct spe_function *f, + uint stencil_op, + uint stencil_ref_value, + uint stencil_max_value, + int fbS_reg, + int newS_reg) +{ + /* The code below assumes that newS_reg and fbS_reg are not the same + * register; if they can be, the calculations below will have to use + * an additional temporary register. For now, mark the assumption + * with an assertion that will fail if they are the same. + */ + ASSERT(fbS_reg != newS_reg); + + /* The code also assumes the the stencil_max_value is of the form + * 2^n-1 and can therefore be used as a mask for the valid bits in + * addition to a maximum. Make sure this is the case as well. + * The clever math below exploits the fact that incrementing a + * binary number serves to flip all the bits of a number starting at + * the LSB and continuing to (and including) the first zero bit + * found. That means that a number and its increment will always + * have at least one bit in common (the high order bit, if nothing + * else) *unless* the number is zero, *or* the number is of a form + * consisting of some number of 1s in the low-order bits followed + * by nothing but 0s in the high-order bits. The latter case + * implies it's of the form 2^n-1. + */ + ASSERT(stencil_max_value > 0 && ((stencil_max_value + 1) & stencil_max_value) == 0); + + switch(stencil_op) { + case PIPE_STENCIL_OP_KEEP: + /* newS = S */ + spe_move(f, newS_reg, fbS_reg); + break; + + case PIPE_STENCIL_OP_ZERO: + /* newS = 0 */ + spe_zero(f, newS_reg); + break; + + case PIPE_STENCIL_OP_REPLACE: + /* newS = stencil reference value */ + spe_load_uint(f, newS_reg, stencil_ref_value); + break; + + case PIPE_STENCIL_OP_INCR: { + /* newS = (s == max ? max : s + 1) */ + int equals_reg = spe_allocate_available_register(f); + + spe_compare_equal_uint(f, equals_reg, fbS_reg, stencil_max_value); + /* Add Word Immediate computes rT = rA + 10-bit signed immediate */ + spe_ai(f, newS_reg, fbS_reg, 1); + /* Select from the current value or the new value based on the equality test */ + spe_selb(f, newS_reg, newS_reg, fbS_reg, equals_reg); + + spe_release_register(f, equals_reg); + break; + } + case PIPE_STENCIL_OP_DECR: { + /* newS = (s == 0 ? 0 : s - 1) */ + int equals_reg = spe_allocate_available_register(f); + + spe_compare_equal_uint(f, equals_reg, fbS_reg, 0); + /* Add Word Immediate with a (-1) value works */ + spe_ai(f, newS_reg, fbS_reg, -1); + /* Select from the current value or the new value based on the equality test */ + spe_selb(f, newS_reg, newS_reg, fbS_reg, equals_reg); + + spe_release_register(f, equals_reg); + break; + } + case PIPE_STENCIL_OP_INCR_WRAP: + /* newS = (s == max ? 0 : s + 1), but since max is 2^n-1, we can + * do a normal add and mask off the correct bits + */ + spe_ai(f, newS_reg, fbS_reg, 1); + spe_and_uint(f, newS_reg, newS_reg, stencil_max_value); + break; + + case PIPE_STENCIL_OP_DECR_WRAP: + /* newS = (s == 0 ? max : s - 1), but we'll pull the same mask trick as above */ + spe_ai(f, newS_reg, fbS_reg, -1); + spe_and_uint(f, newS_reg, newS_reg, stencil_max_value); + break; + + case PIPE_STENCIL_OP_INVERT: + /* newS = ~s. We take advantage of the mask/max value to invert only + * the valid bits for the field so we don't have to do an extra "and". + */ + spe_xor_uint(f, newS_reg, fbS_reg, stencil_max_value); + break; + + default: + ASSERT(0); + } +} + + +/** + * This function generates code to get all the necessary possible + * stencil values. For each of the output registers (fail_reg, + * zfail_reg, and zpass_reg), it either allocates a new register + * and calculates a new set of values based on the stencil operation, + * or it reuses a register allocation and calculation done for an + * earlier (matching) operation, or it reuses the fbS_reg register + * (if the stencil operation is KEEP, which doesn't change the + * stencil buffer). + * + * Since this function allocates a variable number of registers, + * to avoid incurring complex logic to free them, they should + * be allocated after a spe_allocate_register_set() call + * and released by the corresponding spe_release_register_set() call. + */ +static void +gen_get_stencil_values(struct spe_function *f, + const struct pipe_stencil_state *stencil, + const uint depth_enabled, + int fbS_reg, + int *fail_reg, + int *zfail_reg, + int *zpass_reg) +{ + uint zfail_op; + + /* Stenciling had better be enabled here */ + ASSERT(stencil->enabled); + + /* If the depth test is not enabled, it is treated as though it always + * passes, which means that the zfail_op is not considered - a + * failing stencil test triggers the fail_op, and a passing one + * triggers the zpass_op + * + * As an optimization, override calculation of the zfail_op values + * if they aren't going to be used. By setting the value of + * the operation to PIPE_STENCIL_OP_KEEP, its value will be assumed + * to match the incoming stencil values, and no calculation will + * be done. + */ + if (depth_enabled) { + zfail_op = stencil->zfail_op; + } + else { + zfail_op = PIPE_STENCIL_OP_KEEP; + } + + /* One-sided or front-facing stencil */ + if (stencil->fail_op == PIPE_STENCIL_OP_KEEP) { + *fail_reg = fbS_reg; + } + else { + *fail_reg = spe_allocate_available_register(f); + gen_stencil_values(f, stencil->fail_op, stencil->ref_value, + 0xff, fbS_reg, *fail_reg); + } + + /* Check the possibly overridden value, not the structure value */ + if (zfail_op == PIPE_STENCIL_OP_KEEP) { + *zfail_reg = fbS_reg; + } + else if (zfail_op == stencil->fail_op) { + *zfail_reg = *fail_reg; + } + else { + *zfail_reg = spe_allocate_available_register(f); + gen_stencil_values(f, stencil->zfail_op, stencil->ref_value, + 0xff, fbS_reg, *zfail_reg); + } + + if (stencil->zpass_op == PIPE_STENCIL_OP_KEEP) { + *zpass_reg = fbS_reg; + } + else if (stencil->zpass_op == stencil->fail_op) { + *zpass_reg = *fail_reg; + } + else if (stencil->zpass_op == zfail_op) { + *zpass_reg = *zfail_reg; + } + else { + *zpass_reg = spe_allocate_available_register(f); + gen_stencil_values(f, stencil->zpass_op, stencil->ref_value, + 0xff, fbS_reg, *zpass_reg); + } +} + +/** + * Note that fbZ_reg may *not* be set on entry, if in fact + * the depth test is not enabled. This function must not use + * the register if depth is not enabled. + */ +static boolean +gen_stencil_depth_test(struct spe_function *f, + const struct pipe_depth_stencil_alpha_state *dsa, + const uint facing, + const int mask_reg, const int fragZ_reg, + const int fbZ_reg, const int fbS_reg) +{ + /* True if we've generated code that could require writeback to the + * depth and/or stencil buffers + */ + boolean modified_buffers = FALSE; + + boolean need_to_calculate_stencil_values; + boolean need_to_writemask_stencil_values; + + struct pipe_stencil_state *stencil; + + /* Registers. We may or may not actually allocate these, depending + * on whether the state values indicate that we need them. + */ + int stencil_pass_reg, stencil_fail_reg; + int stencil_fail_values, stencil_pass_depth_fail_values, stencil_pass_depth_pass_values; + int stencil_writemask_reg; + int zmask_reg; + int newS_reg; + + /* Stenciling is quite complex: up to six different configurable stencil + * operations/calculations can be required (three each for front-facing + * and back-facing fragments). Many of those operations will likely + * be identical, so there's good reason to try to avoid calculating + * the same values more than once (which unfortunately makes the code less + * straightforward). + * + * To make register management easier, we start a new + * register set; we can release all the registers in the set at + * once, and avoid having to keep track of exactly which registers + * we allocate. We can still allocate and free registers as + * desired (if we know we no longer need a register), but we don't + * have to spend the complexity to track the more difficult variant + * register usage scenarios. + */ + spe_comment(f, 0, "Allocating stencil register set"); + spe_allocate_register_set(f); + + /* The facing we're given is the fragment facing; it doesn't + * exactly match the stencil facing. If stencil is enabled, + * but two-sided stencil is *not* enabled, we use the same + * stencil settings for both front- and back-facing fragments. + * We only use the "back-facing" stencil for backfacing fragments + * if two-sided stenciling is enabled. + */ + if (facing == CELL_FACING_BACK && dsa->stencil[1].enabled) { + stencil = &dsa->stencil[1]; + } + else { + stencil = &dsa->stencil[0]; + } + + /* Calculate the writemask. If the writemask is trivial (either + * all 0s, meaning that we don't need to calculate any stencil values + * because they're not going to change the stencil anyway, or all 1s, + * meaning that we have to calculate the stencil values but do not + * need to mask them), we can avoid generating code. Don't forget + * that we need to consider backfacing stencil, if enabled. + * + * Note that if the backface stencil is *not* enabled, the backface + * stencil will have the same values as the frontface stencil. + */ + if (stencil->fail_op == PIPE_STENCIL_OP_KEEP && + stencil->zfail_op == PIPE_STENCIL_OP_KEEP && + stencil->zpass_op == PIPE_STENCIL_OP_KEEP) { + need_to_calculate_stencil_values = FALSE; + need_to_writemask_stencil_values = FALSE; + } + else if (stencil->writemask == 0x0) { + /* All changes are writemasked out, so no need to calculate + * what those changes might be, and no need to write anything back. + */ + need_to_calculate_stencil_values = FALSE; + need_to_writemask_stencil_values = FALSE; + } + else if (stencil->writemask == 0xff) { + /* Still trivial, but a little less so. We need to write the stencil + * values, but we don't need to mask them. + */ + need_to_calculate_stencil_values = TRUE; + need_to_writemask_stencil_values = FALSE; + } + else { + /* The general case: calculate, mask, and write */ + need_to_calculate_stencil_values = TRUE; + need_to_writemask_stencil_values = TRUE; + + /* While we're here, generate code that calculates what the + * writemask should be. If backface stenciling is enabled, + * and the backface writemask is not the same as the frontface + * writemask, we'll have to generate code that merges the + * two masks into a single effective mask based on fragment facing. + */ + spe_comment(f, 0, "Computing stencil writemask"); + stencil_writemask_reg = spe_allocate_available_register(f); + spe_load_uint(f, stencil_writemask_reg, dsa->stencil[facing].writemask); + } + + /* At least one-sided stenciling must be on. Generate code that + * runs the stencil test on the basic/front-facing stencil, leaving + * the mask of passing stencil bits in stencil_pass_reg. This mask will + * be used both to mask the set of active pixels, and also to + * determine how the stencil buffer changes. + * + * This test will *not* change the value in mask_reg (because we don't + * yet know whether to apply the two-sided stencil or one-sided stencil). + */ + spe_comment(f, 0, "Running basic stencil test"); + stencil_pass_reg = spe_allocate_available_register(f); + gen_stencil_test(f, stencil, 0xff, mask_reg, fbS_reg, stencil_pass_reg); + + /* Generate code that, given the mask of valid fragments and the + * mask of valid fragments that passed the stencil test, computes + * the mask of valid fragments that failed the stencil test. We + * have to do this before we run a depth test (because the + * depth test should not be performed on fragments that failed the + * stencil test, and because the depth test will update the + * mask of valid fragments based on the results of the depth test). + */ + spe_comment(f, 0, "Computing stencil fail mask and updating fragment mask"); + stencil_fail_reg = spe_allocate_available_register(f); + spe_andc(f, stencil_fail_reg, mask_reg, stencil_pass_reg); + /* Now remove the stenciled-out pixels from the valid fragment mask, + * so we can later use the valid fragment mask in the depth test. + */ + spe_and(f, mask_reg, mask_reg, stencil_pass_reg); + + /* We may not need to calculate stencil values, if the writemask is off */ + if (need_to_calculate_stencil_values) { + /* Generate code that calculates exactly which stencil values we need, + * without calculating the same value twice (say, if two different + * stencil ops have the same value). This code will work for one-sided + * and two-sided stenciling (so that we take into account that operations + * may match between front and back stencils), and will also take into + * account whether the depth test is enabled (if the depth test is off, + * we don't need any of the zfail results, because the depth test always + * is considered to pass if it is disabled). Any register value that + * does not need to be calculated will come back with the same value + * that's in fbS_reg. + * + * This function will allocate a variant number of registers that + * will be released as part of the register set. + */ + spe_comment(f, 0, facing == CELL_FACING_FRONT + ? "Computing front-facing stencil values" + : "Computing back-facing stencil values"); + gen_get_stencil_values(f, stencil, dsa->depth.enabled, fbS_reg, + &stencil_fail_values, &stencil_pass_depth_fail_values, + &stencil_pass_depth_pass_values); + } + + /* We now have all the stencil values we need. We also need + * the results of the depth test to figure out which + * stencil values will become the new stencil values. (Even if + * we aren't actually calculating stencil values, we need to apply + * the depth test if it's enabled.) + * + * The code generated by gen_depth_test() returns the results of the + * test in the given register, but also alters the mask_reg based + * on the results of the test. + */ + if (dsa->depth.enabled) { + spe_comment(f, 0, "Running stencil depth test"); + zmask_reg = spe_allocate_available_register(f); + modified_buffers |= gen_depth_test(f, dsa, mask_reg, fragZ_reg, + fbZ_reg, zmask_reg); + } + + if (need_to_calculate_stencil_values) { + + /* If we need to writemask the stencil values before going into + * the stencil buffer, we'll have to use a new register to + * hold the new values. If not, we can just keep using the + * current register. + */ + if (need_to_writemask_stencil_values) { + newS_reg = spe_allocate_available_register(f); + spe_comment(f, 0, "Saving current stencil values for writemasking"); + spe_move(f, newS_reg, fbS_reg); + } + else { + newS_reg = fbS_reg; + } + + /* Merge in the selected stencil fail values */ + if (stencil_fail_values != fbS_reg) { + spe_comment(f, 0, "Loading stencil fail values"); + spe_selb(f, newS_reg, newS_reg, stencil_fail_values, stencil_fail_reg); + modified_buffers = TRUE; + } + + /* Same for the stencil pass/depth fail values. If this calculation + * is not needed (say, if depth test is off), then the + * stencil_pass_depth_fail_values register will be equal to fbS_reg + * and we'll skip the calculation. + */ + if (stencil_pass_depth_fail_values != fbS_reg) { + /* We don't actually have a stencil pass/depth fail mask yet. + * Calculate it here from the stencil passing mask and the + * depth passing mask. Note that zmask_reg *must* have been + * set above if we're here. + */ + uint stencil_pass_depth_fail_mask = + spe_allocate_available_register(f); + + spe_comment(f, 0, "Loading stencil pass/depth fail values"); + spe_andc(f, stencil_pass_depth_fail_mask, stencil_pass_reg, zmask_reg); + + spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_fail_values, + stencil_pass_depth_fail_mask); + + spe_release_register(f, stencil_pass_depth_fail_mask); + modified_buffers = TRUE; + } + + /* Same for the stencil pass/depth pass mask. Note that we + * *can* get here with zmask_reg being unset (if the depth + * test is off but the stencil test is on). In this case, + * we assume the depth test passes, and don't need to mask + * the stencil pass mask with the Z mask. + */ + if (stencil_pass_depth_pass_values != fbS_reg) { + if (dsa->depth.enabled) { + uint stencil_pass_depth_pass_mask = spe_allocate_available_register(f); + /* We'll need a separate register */ + spe_comment(f, 0, "Loading stencil pass/depth pass values"); + spe_and(f, stencil_pass_depth_pass_mask, stencil_pass_reg, zmask_reg); + spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_pass_values, stencil_pass_depth_pass_mask); + spe_release_register(f, stencil_pass_depth_pass_mask); + } + else { + /* We can use the same stencil-pass register */ + spe_comment(f, 0, "Loading stencil pass values"); + spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_pass_values, stencil_pass_reg); + } + modified_buffers = TRUE; + } + + /* Almost done. If we need to writemask, do it now, leaving the + * results in the fbS_reg register passed in. If we don't need + * to writemask, then the results are *already* in the fbS_reg, + * so there's nothing more to do. + */ + + if (need_to_writemask_stencil_values && modified_buffers) { + /* The Select Bytes command makes a fine writemask. Where + * the mask is 0, the first (original) values are retained, + * effectively masking out changes. Where the mask is 1, the + * second (new) values are retained, incorporating changes. + */ + spe_comment(f, 0, "Writemasking new stencil values"); + spe_selb(f, fbS_reg, fbS_reg, newS_reg, stencil_writemask_reg); + } + + } /* done calculating stencil values */ + + /* The stencil and/or depth values have been applied, and the + * mask_reg, fbS_reg, and fbZ_reg values have been updated. + * We're all done, except that we've allocated a fair number + * of registers that we didn't bother tracking. Release all + * those registers as part of the register set, and go home. + */ + spe_comment(f, 0, "Releasing stencil register set"); + spe_release_register_set(f); + + /* Return TRUE if we could have modified the stencil and/or + * depth buffers. + */ + return modified_buffers; +} + + +/** + * Generate depth and/or stencil test code. + * \param cell context + * \param dsa depth/stencil/alpha state + * \param f spe function to emit + * \param facing either CELL_FACING_FRONT or CELL_FACING_BACK + * \param mask_reg register containing the pixel alive/dead mask + * \param depth_tile_reg register containing address of z/stencil tile + * \param quad_offset_reg offset to quad from start of tile + * \param fragZ_reg register containg fragment Z values + */ +static void +gen_depth_stencil(struct cell_context *cell, + const struct pipe_depth_stencil_alpha_state *dsa, + struct spe_function *f, + uint facing, + int mask_reg, + int depth_tile_reg, + int quad_offset_reg, + int fragZ_reg) + +{ + const enum pipe_format zs_format = cell->framebuffer.zsbuf->format; + boolean write_depth_stencil; + + /* framebuffer's combined z/stencil values register */ + int fbZS_reg = spe_allocate_available_register(f); + + /* Framebufer Z values register */ + int fbZ_reg = spe_allocate_available_register(f); + + /* Framebuffer stencil values register (may not be used) */ + int fbS_reg = spe_allocate_available_register(f); + + /* 24-bit mask register (may not be used) */ + int zmask_reg = spe_allocate_available_register(f); + + /** + * The following code: + * 1. fetch quad of packed Z/S values from the framebuffer tile. + * 2. extract the separate the Z and S values from packed values + * 3. convert fragment Z values from float in [0,1] to 32/24/16-bit ints + * + * The instructions for doing this are interleaved for better performance. + */ + spe_comment(f, 0, "Fetch Z/stencil quad from tile"); + + switch(zs_format) { + case PIPE_FORMAT_S8Z24_UNORM: /* fall through */ + case PIPE_FORMAT_X8Z24_UNORM: + /* prepare mask to extract Z vals from ZS vals */ + spe_load_uint(f, zmask_reg, 0x00ffffff); + + /* convert fragment Z from [0,1] to 32-bit ints */ + spe_cfltu(f, fragZ_reg, fragZ_reg, 32); + + /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */ + spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); + + /* right shift 32-bit fragment Z to 24 bits */ + spe_rotmi(f, fragZ_reg, fragZ_reg, -8); + + /* extract 24-bit Z values from ZS values by masking */ + spe_and(f, fbZ_reg, fbZS_reg, zmask_reg); + + /* extract 8-bit stencil values by shifting */ + spe_rotmi(f, fbS_reg, fbZS_reg, -24); + break; + + case PIPE_FORMAT_Z24S8_UNORM: /* fall through */ + case PIPE_FORMAT_Z24X8_UNORM: + /* convert fragment Z from [0,1] to 32-bit ints */ + spe_cfltu(f, fragZ_reg, fragZ_reg, 32); + + /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */ + spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); + + /* right shift 32-bit fragment Z to 24 bits */ + spe_rotmi(f, fragZ_reg, fragZ_reg, -8); + + /* extract 24-bit Z values from ZS values by shifting */ + spe_rotmi(f, fbZ_reg, fbZS_reg, -8); + + /* extract 8-bit stencil values by masking */ + spe_and_uint(f, fbS_reg, fbZS_reg, 0x000000ff); + break; + + case PIPE_FORMAT_Z32_UNORM: + /* Load: fbZ_reg = memory[depth_tile_reg + offset_reg] */ + spe_lqx(f, fbZ_reg, depth_tile_reg, quad_offset_reg); + + /* convert fragment Z from [0,1] to 32-bit ints */ + spe_cfltu(f, fragZ_reg, fragZ_reg, 32); + + /* No stencil, so can't do anything there */ + break; + + case PIPE_FORMAT_Z16_UNORM: + /* XXX This code for 16bpp Z is broken! */ + + /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */ + spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); + + /* Copy over 4 32-bit values */ + spe_move(f, fbZ_reg, fbZS_reg); + + /* convert Z from [0,1] to 16-bit ints */ + spe_cfltu(f, fragZ_reg, fragZ_reg, 32); + spe_rotmi(f, fragZ_reg, fragZ_reg, -16); + /* No stencil */ + break; + + default: + ASSERT(0); /* invalid format */ + } + + /* If stencil is enabled, use the stencil-specific code + * generator to generate both the stencil and depth (if needed) + * tests. Otherwise, if only depth is enabled, generate + * a quick depth test. The test generators themselves will + * report back whether the depth/stencil buffer has to be + * written back. + */ + if (dsa->stencil[0].enabled) { + /* This will perform the stencil and depth tests, and update + * the mask_reg, fbZ_reg, and fbS_reg as required by the + * tests. + */ + ASSERT(fbS_reg >= 0); + spe_comment(f, 0, "Perform stencil test"); + + /* Note that fbZ_reg may not be set on entry, if stenciling + * is enabled but there's no Z-buffer. The + * gen_stencil_depth_test() function must ignore the + * fbZ_reg register if depth is not enabled. + */ + write_depth_stencil = gen_stencil_depth_test(f, dsa, facing, + mask_reg, fragZ_reg, + fbZ_reg, fbS_reg); + } + else if (dsa->depth.enabled) { + int zmask_reg = spe_allocate_available_register(f); + ASSERT(fbZ_reg >= 0); + spe_comment(f, 0, "Perform depth test"); + write_depth_stencil = gen_depth_test(f, dsa, mask_reg, fragZ_reg, + fbZ_reg, zmask_reg); + spe_release_register(f, zmask_reg); + } + else { + write_depth_stencil = FALSE; + } + + if (write_depth_stencil) { + /* Merge latest Z and Stencil values into fbZS_reg. + * fbZ_reg has four Z vals in bits [23..0] or bits [15..0]. + * fbS_reg has four 8-bit Z values in bits [7..0]. + */ + spe_comment(f, 0, "Store quad's depth/stencil values in tile"); + if (zs_format == PIPE_FORMAT_S8Z24_UNORM || + zs_format == PIPE_FORMAT_X8Z24_UNORM) { + spe_shli(f, fbS_reg, fbS_reg, 24); /* fbS = fbS << 24 */ + spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ + } + else if (zs_format == PIPE_FORMAT_Z24S8_UNORM || + zs_format == PIPE_FORMAT_Z24X8_UNORM) { + spe_shli(f, fbZ_reg, fbZ_reg, 8); /* fbZ = fbZ << 8 */ + spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ + } + else if (zs_format == PIPE_FORMAT_Z32_UNORM) { + spe_move(f, fbZS_reg, fbZ_reg); /* fbZS = fbZ */ + } + else if (zs_format == PIPE_FORMAT_Z16_UNORM) { + spe_move(f, fbZS_reg, fbZ_reg); /* fbZS = fbZ */ + } + else if (zs_format == PIPE_FORMAT_S8_UNORM) { + ASSERT(0); /* XXX to do */ + } + else { + ASSERT(0); /* bad zs_format */ + } + + /* Store: memory[depth_tile_reg + quad_offset_reg] = fbZS */ + spe_stqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); + } + + /* Don't need these any more */ + spe_release_register(f, fbZS_reg); + spe_release_register(f, fbZ_reg); + spe_release_register(f, fbS_reg); + spe_release_register(f, zmask_reg); +} + /** @@ -621,14 +2016,21 @@ gen_pack_colors(struct spe_function *f, * should be much faster. * * \param cell the rendering context (in) - * \param f the generated function (out) + * \param facing whether the generated code is for front-facing or + * back-facing fragments + * \param f the generated function (in/out); on input, the function + * must already have been initialized. On exit, whatever + * instructions within the generated function have had + * the fragment ops appended. */ void -cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) +cell_gen_fragment_function(struct cell_context *cell, + const uint facing, + struct spe_function *f) { - const struct pipe_depth_stencil_alpha_state *dsa = - &cell->depth_stencil->base; - const struct pipe_blend_state *blend = &cell->blend->base; + const struct pipe_depth_stencil_alpha_state *dsa = cell->depth_stencil; + const struct pipe_blend_state *blend = cell->blend; + const struct pipe_blend_color *blend_color = &cell->blend_color; const enum pipe_format color_format = cell->framebuffer.cbufs[0]->format; /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */ @@ -643,15 +2045,23 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) const int fragA_reg = 11; /* vector float */ const int mask_reg = 12; /* vector uint */ + ASSERT(facing == CELL_FACING_FRONT || facing == CELL_FACING_BACK); + /* offset of quad from start of tile * XXX assuming 4-byte pixels for color AND Z/stencil!!!! */ int quad_offset_reg; int fbRGBA_reg; /**< framebuffer's RGBA colors for quad */ - int fbZS_reg; /**< framebuffer's combined z/stencil values for quad */ - spe_init_func(f, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE); + if (cell->debug_flags & CELL_DEBUG_ASM) { + spe_print_code(f, TRUE); + spe_indent(f, 8); + spe_comment(f, -4, facing == CELL_FACING_FRONT + ? "Begin front-facing per-fragment ops" + : "Begin back-facing per-fragment ops"); + } + spe_allocate_register(f, x_reg); spe_allocate_register(f, y_reg); spe_allocate_register(f, color_tile_reg); @@ -665,7 +2075,6 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) quad_offset_reg = spe_allocate_available_register(f); fbRGBA_reg = spe_allocate_available_register(f); - fbZS_reg = spe_allocate_available_register(f); /* compute offset of quad from start of tile, in bytes */ { @@ -674,8 +2083,9 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) ASSERT(TILE_SIZE == 32); - spe_rotmi(f, x2_reg, x_reg, -1); /* x2 = x / 2 */ + spe_comment(f, 0, "Compute quad offset within tile"); spe_rotmi(f, y2_reg, y_reg, -1); /* y2 = y / 2 */ + spe_rotmi(f, x2_reg, x_reg, -1); /* x2 = x / 2 */ spe_shli(f, y2_reg, y2_reg, 4); /* y2 *= 16 */ spe_a(f, quad_offset_reg, y2_reg, x2_reg); /* offset = y2 + x2 */ spe_shli(f, quad_offset_reg, quad_offset_reg, 4); /* offset *= 16 */ @@ -684,139 +2094,33 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) spe_release_register(f, y2_reg); } - + /* Generate the alpha test, if needed. */ if (dsa->alpha.enabled) { gen_alpha_test(dsa, f, mask_reg, fragA_reg); } + /* generate depth and/or stencil test code */ if (dsa->depth.enabled || dsa->stencil[0].enabled) { - const enum pipe_format zs_format = cell->framebuffer.zsbuf->format; - boolean write_depth_stencil; - - int fbZ_reg = spe_allocate_available_register(f); /* Z values */ - int fbS_reg = spe_allocate_available_register(f); /* Stencil values */ - - /* fetch quad of depth/stencil values from tile at (x,y) */ - /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */ - spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); - - if (dsa->depth.enabled) { - /* Extract Z bits from fbZS_reg into fbZ_reg */ - if (zs_format == PIPE_FORMAT_S8Z24_UNORM || - zs_format == PIPE_FORMAT_X8Z24_UNORM) { - int mask_reg = spe_allocate_available_register(f); - spe_fsmbi(f, mask_reg, 0x7777); /* mask[0,1,2,3] = 0x00ffffff */ - spe_and(f, fbZ_reg, fbZS_reg, mask_reg); /* fbZ = fbZS & mask */ - spe_release_register(f, mask_reg); - /* OK, fbZ_reg has four 24-bit Z values now */ - } - else { - /* XXX handle other z/stencil formats */ - ASSERT(0); - } - - /* Convert fragZ values from float[4] to uint[4] */ - if (zs_format == PIPE_FORMAT_S8Z24_UNORM || - zs_format == PIPE_FORMAT_X8Z24_UNORM || - zs_format == PIPE_FORMAT_Z24S8_UNORM || - zs_format == PIPE_FORMAT_Z24X8_UNORM) { - /* 24-bit Z values */ - int scale_reg = spe_allocate_available_register(f); - - /* scale_reg[0,1,2,3] = float(2^24-1) */ - spe_load_float(f, scale_reg, (float) 0xffffff); - - /* XXX these two instructions might be combined */ - spe_fm(f, fragZ_reg, fragZ_reg, scale_reg); /* fragZ *= scale */ - spe_cfltu(f, fragZ_reg, fragZ_reg, 0); /* fragZ = (int) fragZ */ - - spe_release_register(f, scale_reg); - } - else { - /* XXX handle 16-bit Z format */ - ASSERT(0); - } - } - - if (dsa->stencil[0].enabled) { - /* Extract Stencil bit sfrom fbZS_reg into fbS_reg */ - if (zs_format == PIPE_FORMAT_S8Z24_UNORM || - zs_format == PIPE_FORMAT_X8Z24_UNORM) { - /* XXX extract with a shift */ - ASSERT(0); - } - else if (zs_format == PIPE_FORMAT_Z24S8_UNORM || - zs_format == PIPE_FORMAT_Z24X8_UNORM) { - /* XXX extract with a mask */ - ASSERT(0); - } - } - - - if (dsa->stencil[0].enabled) { - /* XXX this may involve depth testing too */ - // gen_stencil_test(dsa, f, ... ); - ASSERT(0); - } - else if (dsa->depth.enabled) { - int zmask_reg = spe_allocate_available_register(f); - gen_depth_test(dsa, f, mask_reg, fragZ_reg, fbZ_reg, zmask_reg); - spe_release_register(f, zmask_reg); - } - - /* do we need to write Z and/or Stencil back into framebuffer? */ - write_depth_stencil = (dsa->depth.writemask | - dsa->stencil[0].write_mask | - dsa->stencil[1].write_mask); - - if (write_depth_stencil) { - /* Merge latest Z and Stencil values into fbZS_reg. - * fbZ_reg has four Z vals in bits [23..0] or bits [15..0]. - * fbS_reg has four 8-bit Z values in bits [7..0]. - */ - if (zs_format == PIPE_FORMAT_S8Z24_UNORM || - zs_format == PIPE_FORMAT_X8Z24_UNORM) { - spe_shli(f, fbS_reg, fbS_reg, 24); /* fbS = fbS << 24 */ - spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ - } - else if (zs_format == PIPE_FORMAT_S8Z24_UNORM || - zs_format == PIPE_FORMAT_X8Z24_UNORM) { - /* XXX to do */ - ASSERT(0); - } - else if (zs_format == PIPE_FORMAT_Z16_UNORM) { - /* XXX to do */ - ASSERT(0); - } - else if (zs_format == PIPE_FORMAT_S8_UNORM) { - /* XXX to do */ - ASSERT(0); - } - else { - /* bad zs_format */ - ASSERT(0); - } - - /* Store: memory[depth_tile_reg + quad_offset_reg] = fbZS */ - spe_stqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); - } - - spe_release_register(f, fbZ_reg); - spe_release_register(f, fbS_reg); + gen_depth_stencil(cell, dsa, f, + facing, + mask_reg, + depth_tile_reg, + quad_offset_reg, + fragZ_reg); } - /* Get framebuffer quad/colors. We'll need these for blending, * color masking, and to obey the quad/pixel mask. * Load: fbRGBA_reg = memory[color_tile + quad_offset] * Note: if mask={~0,~0,~0,~0} and we're not blending or colormasking * we could skip this load. */ + spe_comment(f, 0, "Fetch quad colors from tile"); spe_lqx(f, fbRGBA_reg, color_tile_reg, quad_offset_reg); - if (blend->blend_enable) { - gen_blend(blend, f, color_format, + spe_comment(f, 0, "Perform blending"); + gen_blend(blend, blend_color, f, color_format, fragR_reg, fragG_reg, fragB_reg, fragA_reg, fbRGBA_reg); } @@ -829,19 +2133,21 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) int rgba_reg = spe_allocate_available_register(f); /* Pack four float colors as four 32-bit int colors */ + spe_comment(f, 0, "Convert float quad colors to packed int framebuffer colors"); gen_pack_colors(f, color_format, fragR_reg, fragG_reg, fragB_reg, fragA_reg, rgba_reg); if (blend->logicop_enable) { + spe_comment(f, 0, "Compute logic op"); gen_logicop(blend, f, rgba_reg, fbRGBA_reg); } - if (blend->colormask != 0xf) { - gen_colormask(blend->colormask, f, rgba_reg, fbRGBA_reg); + if (blend->colormask != PIPE_MASK_RGBA) { + spe_comment(f, 0, "Compute color mask"); + gen_colormask(f, blend->colormask, color_format, rgba_reg, fbRGBA_reg); } - /* Mix fragment colors with framebuffer colors using the quad/pixel mask: * if (mask[i]) * rgba[i] = rgba[i]; @@ -853,6 +2159,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) /* Store updated quad in tile: * memory[color_tile + quad_offset] = rgba_reg; */ + spe_comment(f, 0, "Store quad colors into color tile"); spe_stqx(f, rgba_reg, color_tile_reg, quad_offset_reg); spe_release_register(f, rgba_reg); @@ -862,9 +2169,13 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) spe_bi(f, SPE_REG_RA, 0, 0); /* return from function call */ - spe_release_register(f, fbRGBA_reg); - spe_release_register(f, fbZS_reg); spe_release_register(f, quad_offset_reg); -} + if (cell->debug_flags & CELL_DEBUG_ASM) { + char buffer[1024]; + sprintf(buffer, "End %s-facing per-fragment ops: %d instructions", + facing == CELL_FACING_FRONT ? "front" : "back", f->num_inst); + spe_comment(f, -4, buffer); + } +} diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.h b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h index b59de198dc..21b35d1faf 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.h +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h @@ -31,7 +31,7 @@ extern void -cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f); +cell_gen_fragment_function(struct cell_context *cell, const uint facing, struct spe_function *f); #endif /* CELL_GEN_FRAGMENT_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c index 475c6ef0ce..facd9551fe 100644 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -35,9 +35,9 @@ #include "draw/draw_context.h" #include "cell_context.h" #include "cell_flush.h" +#include "cell_pipe_state.h" #include "cell_state.h" #include "cell_texture.h" -#include "cell_state_per_fragment.h" @@ -45,24 +45,18 @@ static void * cell_create_blend_state(struct pipe_context *pipe, const struct pipe_blend_state *blend) { - struct cell_blend_state *cb = MALLOC(sizeof(struct cell_blend_state)); - - (void) memcpy(cb, blend, sizeof(*blend)); -#if 0 - cell_generate_alpha_blend(cb); -#endif - return cb; + return mem_dup(blend, sizeof(*blend)); } static void -cell_bind_blend_state(struct pipe_context *pipe, void *state) +cell_bind_blend_state(struct pipe_context *pipe, void *blend) { struct cell_context *cell = cell_context(pipe); draw_flush(cell->draw); - cell->blend = (struct cell_blend_state *) state; + cell->blend = (struct pipe_blend_state *) blend; cell->dirty |= CELL_NEW_BLEND; } @@ -70,10 +64,7 @@ cell_bind_blend_state(struct pipe_context *pipe, void *state) static void cell_delete_blend_state(struct pipe_context *pipe, void *blend) { - struct cell_blend_state *cb = (struct cell_blend_state *) blend; - - spe_release_func(& cb->code); - FREE(cb); + FREE(blend); } @@ -95,41 +86,29 @@ cell_set_blend_color(struct pipe_context *pipe, static void * cell_create_depth_stencil_alpha_state(struct pipe_context *pipe, - const struct pipe_depth_stencil_alpha_state *depth_stencil) + const struct pipe_depth_stencil_alpha_state *dsa) { - struct cell_depth_stencil_alpha_state *cdsa = - MALLOC(sizeof(struct cell_depth_stencil_alpha_state)); - - (void) memcpy(cdsa, depth_stencil, sizeof(*depth_stencil)); -#if 0 - cell_generate_depth_stencil_test(cdsa); -#endif - return cdsa; + return mem_dup(dsa, sizeof(*dsa)); } static void cell_bind_depth_stencil_alpha_state(struct pipe_context *pipe, - void *depth_stencil) + void *dsa) { struct cell_context *cell = cell_context(pipe); draw_flush(cell->draw); - cell->depth_stencil = - (struct cell_depth_stencil_alpha_state *) depth_stencil; + cell->depth_stencil = (struct pipe_depth_stencil_alpha_state *) dsa; cell->dirty |= CELL_NEW_DEPTH_STENCIL; } static void -cell_delete_depth_stencil_alpha_state(struct pipe_context *pipe, void *depth) +cell_delete_depth_stencil_alpha_state(struct pipe_context *pipe, void *dsa) { - struct cell_depth_stencil_alpha_state *cdsa = - (struct cell_depth_stencil_alpha_state *) depth; - - spe_release_func(& cdsa->code); - FREE(cdsa); + FREE(dsa); } @@ -191,24 +170,23 @@ cell_set_polygon_stipple( struct pipe_context *pipe, static void * cell_create_rasterizer_state(struct pipe_context *pipe, - const struct pipe_rasterizer_state *setup) + const struct pipe_rasterizer_state *rasterizer) { - struct pipe_rasterizer_state *state - = MALLOC(sizeof(struct pipe_rasterizer_state)); - memcpy(state, setup, sizeof(struct pipe_rasterizer_state)); - return state; + return mem_dup(rasterizer, sizeof(*rasterizer)); } static void -cell_bind_rasterizer_state(struct pipe_context *pipe, void *setup) +cell_bind_rasterizer_state(struct pipe_context *pipe, void *rast) { + struct pipe_rasterizer_state *rasterizer = + (struct pipe_rasterizer_state *) rast; struct cell_context *cell = cell_context(pipe); /* pass-through to draw module */ - draw_set_rasterizer_state(cell->draw, setup); + draw_set_rasterizer_state(cell->draw, rasterizer); - cell->rasterizer = (struct pipe_rasterizer_state *)setup; + cell->rasterizer = rasterizer; cell->dirty |= CELL_NEW_RASTERIZER; } @@ -235,17 +213,24 @@ cell_bind_sampler_states(struct pipe_context *pipe, unsigned num, void **samplers) { struct cell_context *cell = cell_context(pipe); + uint i, changed = 0x0; assert(num <= CELL_MAX_SAMPLERS); draw_flush(cell->draw); - memcpy(cell->sampler, samplers, num * sizeof(void *)); - memset(&cell->sampler[num], 0, (CELL_MAX_SAMPLERS - num) * - sizeof(void *)); - cell->num_samplers = num; + for (i = 0; i < CELL_MAX_SAMPLERS; i++) { + struct pipe_sampler_state *new_samp = i < num ? samplers[i] : NULL; + if (cell->sampler[i] != new_samp) { + cell->sampler[i] = new_samp; + changed |= (1 << i); + } + } - cell->dirty |= CELL_NEW_SAMPLER; + if (changed) { + cell->dirty |= CELL_NEW_SAMPLER; + cell->dirty_samplers |= changed; + } } @@ -263,30 +248,101 @@ cell_set_sampler_textures(struct pipe_context *pipe, unsigned num, struct pipe_texture **texture) { struct cell_context *cell = cell_context(pipe); - uint i; + uint i, changed = 0x0; assert(num <= CELL_MAX_SAMPLERS); - /* Check for no-op */ - if (num == cell->num_textures && - !memcmp(cell->texture, texture, num * sizeof(struct pipe_texture *))) - return; - - draw_flush(cell->draw); - for (i = 0; i < CELL_MAX_SAMPLERS; i++) { - struct pipe_texture *tex = i < num ? texture[i] : NULL; + struct cell_texture *new_tex = cell_texture(i < num ? texture[i] : NULL); + struct cell_texture *old_tex = cell->texture[i]; + if (old_tex != new_tex) { + + pipe_texture_reference((struct pipe_texture **) &cell->texture[i], + (struct pipe_texture *) new_tex); - pipe_texture_reference((struct pipe_texture **) &cell->texture[i], tex); + changed |= (1 << i); + } } + cell->num_textures = num; - cell_update_texture_mapping(cell); + if (changed) { + cell->dirty |= CELL_NEW_TEXTURE; + cell->dirty_textures |= changed; + } +} + - cell->dirty |= CELL_NEW_TEXTURE; +/** + * Map color and z/stencil framebuffer surfaces. + */ +static void +cell_map_surfaces(struct cell_context *cell) +{ + struct pipe_screen *screen = cell->pipe.screen; + uint i; + + for (i = 0; i < 1; i++) { + struct pipe_surface *ps = cell->framebuffer.cbufs[i]; + if (ps) { + cell->cbuf_transfer[i] = + screen->get_tex_transfer(screen, ps->texture, ps->face, + ps->level, ps->zslice, + PIPE_TRANSFER_READ_WRITE, + 0, 0, ps->width, ps->height); + + cell->cbuf_map[i] = + screen->transfer_map(screen, cell->cbuf_transfer[i]); + } + } + + { + struct pipe_surface *ps = cell->framebuffer.zsbuf; + if (ps) { + cell->zsbuf_transfer = + screen->get_tex_transfer(screen, ps->texture, ps->face, + ps->level, ps->zslice, + PIPE_TRANSFER_READ_WRITE, + 0, 0, ps->width, ps->height); + + cell->zsbuf_map = + screen->transfer_map(screen, cell->zsbuf_transfer); + } + } } +/** + * Unmap color and z/stencil framebuffer surfaces. + */ +static void +cell_unmap_surfaces(struct cell_context *cell) +{ + struct pipe_screen *screen = cell->pipe.screen; + uint i; + + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + if (cell->cbuf_transfer[i] && cell->cbuf_map[i]) { + /* unmap color buffer/surface [i] */ + screen->transfer_unmap(screen, cell->cbuf_transfer[i]); + cell->cbuf_map[i] = NULL; + + /* get rid of transfer object [i] */ + screen->tex_transfer_release(screen, &cell->cbuf_transfer[i]); + assert(cell->cbuf_transfer[i] == NULL); + } + } + + if (cell->zsbuf_transfer && cell->zsbuf_map) { + screen->transfer_unmap(screen, cell->zsbuf_transfer); + cell->zsbuf_map = NULL; + + /* get rid of transfer object */ + screen->tex_transfer_release(screen, &cell->zsbuf_transfer); + assert(cell->zsbuf_transfer == NULL); + } +} + static void cell_set_framebuffer_state(struct pipe_context *pipe, @@ -295,24 +351,10 @@ cell_set_framebuffer_state(struct pipe_context *pipe, struct cell_context *cell = cell_context(pipe); if (1 /*memcmp(&cell->framebuffer, fb, sizeof(*fb))*/) { - struct pipe_surface *csurf = fb->cbufs[0]; - struct pipe_surface *zsurf = fb->zsbuf; uint i; - uint flags = (PIPE_BUFFER_USAGE_GPU_WRITE | - PIPE_BUFFER_USAGE_GPU_READ); /* unmap old surfaces */ - for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { - if (cell->framebuffer.cbufs[i] && cell->cbuf_map[i]) { - pipe_surface_unmap(cell->framebuffer.cbufs[i]); - cell->cbuf_map[i] = NULL; - } - } - - if (cell->framebuffer.zsbuf && cell->zsbuf_map) { - pipe_surface_unmap(cell->framebuffer.zsbuf); - cell->zsbuf_map = NULL; - } + cell_unmap_surfaces(cell); /* Finish any pending rendering to the current surface before * installing a new surface! @@ -324,18 +366,14 @@ cell_set_framebuffer_state(struct pipe_context *pipe, */ cell->framebuffer.width = fb->width; cell->framebuffer.height = fb->height; - cell->framebuffer.num_cbufs = fb->num_cbufs; + cell->framebuffer.nr_cbufs = fb->nr_cbufs; for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { pipe_surface_reference(&cell->framebuffer.cbufs[i], fb->cbufs[i]); } pipe_surface_reference(&cell->framebuffer.zsbuf, fb->zsbuf); /* map new surfaces */ - if (csurf) - cell->cbuf_map[0] = pipe_surface_map(csurf, flags); - - if (zsurf) - cell->zsbuf_map = pipe_surface_map(zsurf, flags); + cell_map_surfaces(cell); cell->dirty |= CELL_NEW_FRAMEBUFFER; } diff --git a/src/gallium/drivers/cell/ppu/cell_render.c b/src/gallium/drivers/cell/ppu/cell_render.c index dd25ae880e..79cb8df82f 100644 --- a/src/gallium/drivers/cell/ppu/cell_render.c +++ b/src/gallium/drivers/cell/ppu/cell_render.c @@ -152,6 +152,7 @@ cell_flush_prim_buffer(struct cell_context *cell) struct cell_command_render *render = &cell_global.command[i].render; render->prim_type = PIPE_PRIM_TRIANGLES; render->num_verts = cell->prim_buffer.num_verts; + render->front_winding = cell->rasterizer->front_winding; render->vertex_size = cell->vertex_info->size * 4; render->xmin = cell->prim_buffer.xmin; render->ymin = cell->prim_buffer.ymin; diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c index 139b3719b6..512d85d352 100644 --- a/src/gallium/drivers/cell/ppu/cell_screen.c +++ b/src/gallium/drivers/cell/ppu/cell_screen.c @@ -27,7 +27,8 @@ #include "util/u_memory.h" -#include "pipe/p_winsys.h" +#include "util/u_simple_screen.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_defines.h" #include "pipe/p_screen.h" @@ -58,9 +59,9 @@ cell_get_param(struct pipe_screen *screen, int param) case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: return CELL_MAX_SAMPLERS; case PIPE_CAP_NPOT_TEXTURES: - return 0; + return 1; case PIPE_CAP_TWO_SIDED_STENCIL: - return 0; + return 1; case PIPE_CAP_GLSL: return 1; case PIPE_CAP_S3TC: @@ -68,19 +69,23 @@ cell_get_param(struct pipe_screen *screen, int param) case PIPE_CAP_ANISOTROPIC_FILTER: return 0; case PIPE_CAP_POINT_SPRITE: - return 0; + return 1; case PIPE_CAP_MAX_RENDER_TARGETS: return 1; case PIPE_CAP_OCCLUSION_QUERY: - return 0; + return 1; case PIPE_CAP_TEXTURE_SHADOW_MAP: - return 0; + return 10; case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: - return 12; /* max 2Kx2K */ + return CELL_MAX_TEXTURE_LEVELS; case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: return 8; /* max 128x128x128 */ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: - return 12; /* max 2Kx2K */ + return CELL_MAX_TEXTURE_LEVELS; + case PIPE_CAP_TEXTURE_MIRROR_REPEAT: + return 1; /* XXX not really true */ + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + return 0; /* XXX to do */ default: return 0; } @@ -165,6 +170,7 @@ cell_create_screen(struct pipe_winsys *winsys) screen->is_format_supported = cell_is_format_supported; cell_init_screen_texture_funcs(screen); + u_simple_screen_init(screen); return screen; } diff --git a/src/gallium/drivers/cell/ppu/cell_spu.c b/src/gallium/drivers/cell/ppu/cell_spu.c index 9508227e29..28e5e6d706 100644 --- a/src/gallium/drivers/cell/ppu/cell_spu.c +++ b/src/gallium/drivers/cell/ppu/cell_spu.c @@ -36,6 +36,7 @@ #include "cell_spu.h" #include "pipe/p_format.h" #include "pipe/p_state.h" +#include "util/u_memory.h" #include "cell/common.h" @@ -52,6 +53,35 @@ struct cell_global_info cell_global; /** + * Scan /proc/cpuinfo to determine the timebase for the system. + * This is used by the SPUs to convert 'decrementer' ticks to seconds. + * There may be a better way to get this value... + */ +static unsigned +get_timebase(void) +{ + FILE *f = fopen("/proc/cpuinfo", "r"); + unsigned timebase; + + assert(f); + while (!feof(f)) { + char line[80]; + fgets(line, sizeof(line), f); + if (strncmp(line, "timebase", 8) == 0) { + char *colon = strchr(line, ':'); + if (colon) { + timebase = atoi(colon + 2); + break; + } + } + } + fclose(f); + + return timebase; +} + + +/** * Write a 1-word message to the given SPE mailbox. */ void @@ -114,6 +144,7 @@ cell_start_spus(struct cell_context *cell) { static boolean one_time_init = FALSE; uint i, j; + uint timebase = get_timebase(); if (one_time_init) { fprintf(stderr, "PPU: Multiple rendering contexts not yet supported " @@ -123,24 +154,29 @@ cell_start_spus(struct cell_context *cell) one_time_init = TRUE; - assert(cell->num_spus <= MAX_SPUS); - - ASSERT_ALIGN16(&cell_global.command[0]); - ASSERT_ALIGN16(&cell_global.command[1]); + assert(cell->num_spus <= CELL_MAX_SPUS); ASSERT_ALIGN16(&cell_global.inits[0]); ASSERT_ALIGN16(&cell_global.inits[1]); + /* + * Initialize the global 'inits' structure for each SPU. + * A pointer to the init struct will be passed to each SPU. + * The SPUs will then each grab their init info with mfc_get(). + */ for (i = 0; i < cell->num_spus; i++) { cell_global.inits[i].id = i; cell_global.inits[i].num_spus = cell->num_spus; cell_global.inits[i].debug_flags = cell->debug_flags; - cell_global.inits[i].cmd = &cell_global.command[i]; + cell_global.inits[i].inv_timebase = 1000.0f / timebase; + for (j = 0; j < CELL_NUM_BUFFERS; j++) { cell_global.inits[i].buffers[j] = cell->buffer[j]; } cell_global.inits[i].buffer_status = &cell->buffer_status[0][0][0]; + cell_global.inits[i].spu_functions = &cell->spu_functions; + cell_global.spe_contexts[i] = spe_context_create(0, NULL); if (!cell_global.spe_contexts[i]) { fprintf(stderr, "spe_context_create() failed\n"); diff --git a/src/gallium/drivers/cell/ppu/cell_spu.h b/src/gallium/drivers/cell/ppu/cell_spu.h index 137f26612e..c93958a9ed 100644 --- a/src/gallium/drivers/cell/ppu/cell_spu.h +++ b/src/gallium/drivers/cell/ppu/cell_spu.h @@ -30,14 +30,12 @@ #include <libspe2.h> -#include <libmisc.h> +#include <pthread.h> #include "cell/common.h" #include "cell_context.h" -#define MAX_SPUS 8 - /** * Global vars, for now anyway. */ @@ -46,14 +44,13 @@ struct cell_global_info /** * SPU/SPE handles, etc */ - spe_context_ptr_t spe_contexts[MAX_SPUS]; - pthread_t spe_threads[MAX_SPUS]; + spe_context_ptr_t spe_contexts[CELL_MAX_SPUS]; + pthread_t spe_threads[CELL_MAX_SPUS]; /** - * Data sent to SPUs + * Data sent to SPUs at start-up */ - struct cell_init_info inits[MAX_SPUS]; - struct cell_command command[MAX_SPUS]; + struct cell_init_info inits[CELL_MAX_SPUS]; }; diff --git a/src/gallium/drivers/cell/ppu/cell_state.h b/src/gallium/drivers/cell/ppu/cell_state.h index a7771a55a3..b193170f9c 100644 --- a/src/gallium/drivers/cell/ppu/cell_state.h +++ b/src/gallium/drivers/cell/ppu/cell_state.h @@ -44,8 +44,9 @@ #define CELL_NEW_TEXTURE 0x800 #define CELL_NEW_VERTEX 0x1000 #define CELL_NEW_VS 0x2000 -#define CELL_NEW_CONSTANTS 0x4000 -#define CELL_NEW_VERTEX_INFO 0x8000 +#define CELL_NEW_VS_CONSTANTS 0x4000 +#define CELL_NEW_FS_CONSTANTS 0x8000 +#define CELL_NEW_VERTEX_INFO 0x10000 extern void diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 2da3097983..ff529fe22c 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -25,26 +25,163 @@ * **************************************************************************/ +#include "pipe/p_inlines.h" #include "util/u_memory.h" #include "cell_context.h" #include "cell_gen_fragment.h" #include "cell_state.h" #include "cell_state_emit.h" -#include "cell_state_per_fragment.h" #include "cell_batch.h" #include "cell_texture.h" #include "draw/draw_context.h" #include "draw/draw_private.h" +/** + * Find/create a cell_command_fragment_ops object corresponding to the + * current blend/stencil/z/colormask/etc. state. + */ +static struct cell_command_fragment_ops * +lookup_fragment_ops(struct cell_context *cell) +{ + struct cell_fragment_ops_key key; + struct cell_command_fragment_ops *ops; + + /* + * Build key + */ + memset(&key, 0, sizeof(key)); + key.blend = *cell->blend; + key.blend_color = cell->blend_color; + key.dsa = *cell->depth_stencil; + + if (cell->framebuffer.cbufs[0]) + key.color_format = cell->framebuffer.cbufs[0]->format; + else + key.color_format = PIPE_FORMAT_NONE; + + if (cell->framebuffer.zsbuf) + key.zs_format = cell->framebuffer.zsbuf->format; + else + key.zs_format = PIPE_FORMAT_NONE; + + /* + * Look up key in cache. + */ + ops = (struct cell_command_fragment_ops *) + util_keymap_lookup(cell->fragment_ops_cache, &key); + + /* + * If not found, create/save new fragment ops command. + */ + if (!ops) { + struct spe_function spe_code_front, spe_code_back; + unsigned int facing_dependent, total_code_size; + + if (0) + debug_printf("**** Create New Fragment Ops\n"); + + /* Prepare the buffer that will hold the generated code. The + * "0" passed in for the size means that the SPE code will + * use a default size. + */ + spe_init_func(&spe_code_front, 0); + spe_init_func(&spe_code_back, 0); + + /* Generate new code. Always generate new code for both front-facing + * and back-facing fragments, even if it's the same code in both + * cases. + */ + cell_gen_fragment_function(cell, CELL_FACING_FRONT, &spe_code_front); + cell_gen_fragment_function(cell, CELL_FACING_BACK, &spe_code_back); + + /* Make sure the code is a multiple of 8 bytes long; this is + * required to ensure that the dual pipe instruction alignment + * is correct. It's also important for the SPU unpacking, + * which assumes 8-byte boundaries. + */ + unsigned int front_code_size = spe_code_size(&spe_code_front); + while (front_code_size % 8 != 0) { + spe_lnop(&spe_code_front); + front_code_size = spe_code_size(&spe_code_front); + } + unsigned int back_code_size = spe_code_size(&spe_code_back); + while (back_code_size % 8 != 0) { + spe_lnop(&spe_code_back); + back_code_size = spe_code_size(&spe_code_back); + } + + /* Determine whether the code we generated is facing-dependent, by + * determining whether the generated code is different for the front- + * and back-facing fragments. + */ + if (front_code_size == back_code_size && memcmp(spe_code_front.store, spe_code_back.store, front_code_size) == 0) { + /* Code is identical; only need one copy. */ + facing_dependent = 0; + total_code_size = front_code_size; + } + else { + /* Code is different for front-facing and back-facing fragments. + * Need to send both copies. + */ + facing_dependent = 1; + total_code_size = front_code_size + back_code_size; + } + + /* alloc new fragment ops command. Note that this structure + * has variant length based on the total code size required. + */ + ops = CALLOC_VARIANT_LENGTH_STRUCT(cell_command_fragment_ops, total_code_size); + /* populate the new cell_command_fragment_ops object */ + ops->opcode[0] = CELL_CMD_STATE_FRAGMENT_OPS; + ops->total_code_size = total_code_size; + ops->front_code_index = 0; + memcpy(ops->code, spe_code_front.store, front_code_size); + if (facing_dependent) { + /* We have separate front- and back-facing code. Append the + * back-facing code to the buffer. Be careful because the code + * size is in bytes, but the buffer is of unsigned elements. + */ + ops->back_code_index = front_code_size / sizeof(spe_code_front.store[0]); + memcpy(ops->code + ops->back_code_index, spe_code_back.store, back_code_size); + } + else { + /* Use the same code for front- and back-facing fragments */ + ops->back_code_index = ops->front_code_index; + } + + /* Set the fields for the fallback case. Note that these fields + * (and the whole fallback case) will eventually go away. + */ + ops->dsa = *cell->depth_stencil; + ops->blend = *cell->blend; + ops->blend_color = cell->blend_color; + + /* insert cell_command_fragment_ops object into keymap/cache */ + util_keymap_insert(cell->fragment_ops_cache, &key, ops, NULL); + + /* release rtasm buffer */ + spe_release_func(&spe_code_front); + spe_release_func(&spe_code_back); + } + else { + if (0) + debug_printf("**** Re-use Fragment Ops\n"); + } + + return ops; +} + + + static void emit_state_cmd(struct cell_context *cell, uint cmd, const void *state, uint state_size) { - uint64_t *dst = (uint64_t *) - cell_batch_alloc(cell, ROUNDUP8(sizeof(uint64_t) + state_size)); + uint32_t *dst = (uint32_t *) + cell_batch_alloc16(cell, ROUNDUP16(sizeof(opcode_t) + state_size)); *dst = cmd; - memcpy(dst + 1, state, state_size); + memcpy(dst + 4, state, state_size); } @@ -58,9 +195,10 @@ cell_emit_state(struct cell_context *cell) if (cell->dirty & CELL_NEW_FRAMEBUFFER) { struct pipe_surface *cbuf = cell->framebuffer.cbufs[0]; struct pipe_surface *zbuf = cell->framebuffer.zsbuf; + STATIC_ASSERT(sizeof(struct cell_command_framebuffer) % 16 == 0); struct cell_command_framebuffer *fb - = cell_batch_alloc(cell, sizeof(*fb)); - fb->opcode = CELL_CMD_STATE_FRAMEBUFFER; + = cell_batch_alloc16(cell, sizeof(*fb)); + fb->opcode[0] = CELL_CMD_STATE_FRAMEBUFFER; fb->color_start = cell->cbuf_map[0]; fb->color_format = cbuf->format; fb->depth_start = cell->zsbuf_map; @@ -73,11 +211,20 @@ cell_emit_state(struct cell_context *cell) #endif } + if (cell->dirty & (CELL_NEW_RASTERIZER)) { + STATIC_ASSERT(sizeof(struct cell_command_rasterizer) % 16 == 0); + struct cell_command_rasterizer *rast = + cell_batch_alloc16(cell, sizeof(*rast)); + rast->opcode[0] = CELL_CMD_STATE_RASTERIZER; + rast->rasterizer = *cell->rasterizer; + } + if (cell->dirty & (CELL_NEW_FS)) { /* Send new fragment program to SPUs */ + STATIC_ASSERT(sizeof(struct cell_command_fragment_program) % 16 == 0); struct cell_command_fragment_program *fp - = cell_batch_alloc(cell, sizeof(*fp)); - fp->opcode = CELL_CMD_STATE_FRAGMENT_PROGRAM; + = cell_batch_alloc16(cell, sizeof(*fp)); + fp->opcode[0] = CELL_CMD_STATE_FRAGMENT_PROGRAM; fp->num_inst = cell->fs->code.num_inst; memcpy(&fp->code, cell->fs->code.store, SPU_MAX_FRAGMENT_PROGRAM_INSTS * SPE_INST_SIZE); @@ -90,59 +237,83 @@ cell_emit_state(struct cell_context *cell) } } + if (cell->dirty & (CELL_NEW_FS_CONSTANTS)) { + const uint shader = PIPE_SHADER_FRAGMENT; + const uint num_const = cell->constants[shader].buffer->size / sizeof(float); + uint i, j; + float *buf = cell_batch_alloc16(cell, ROUNDUP16(32 + num_const * sizeof(float))); + uint32_t *ibuf = (uint32_t *) buf; + const float *constants = pipe_buffer_map(cell->pipe.screen, + cell->constants[shader].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + ibuf[0] = CELL_CMD_STATE_FS_CONSTANTS; + ibuf[4] = num_const; + j = 8; + for (i = 0; i < num_const; i++) { + buf[j++] = constants[i]; + } + pipe_buffer_unmap(cell->pipe.screen, cell->constants[shader].buffer); + } + if (cell->dirty & (CELL_NEW_FRAMEBUFFER | CELL_NEW_DEPTH_STENCIL | CELL_NEW_BLEND)) { - /* XXX we don't want to always do codegen here. We should have - * a hash/lookup table to cache previous results... - */ - struct cell_command_fragment_ops *fops - = cell_batch_alloc(cell, sizeof(*fops)); - struct spe_function spe_code; - - /* generate new code */ - cell_gen_fragment_function(cell, &spe_code); - /* put the new code into the batch buffer */ - fops->opcode = CELL_CMD_STATE_FRAGMENT_OPS; - memcpy(&fops->code, spe_code.store, - SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE); - fops->dsa = cell->depth_stencil->base; - fops->blend = cell->blend->base; - /* free codegen buffer */ - spe_release_func(&spe_code); + struct cell_command_fragment_ops *fops, *fops_cmd; + /* Note that cell_command_fragment_ops is a variant-sized record */ + fops = lookup_fragment_ops(cell); + fops_cmd = cell_batch_alloc16(cell, ROUNDUP16(sizeof(*fops_cmd) + fops->total_code_size)); + memcpy(fops_cmd, fops, sizeof(*fops) + fops->total_code_size); } if (cell->dirty & CELL_NEW_SAMPLER) { uint i; for (i = 0; i < CELL_MAX_SAMPLERS; i++) { - if (cell->sampler[i]) { - struct cell_command_sampler *sampler - = cell_batch_alloc(cell, sizeof(*sampler)); - sampler->opcode = CELL_CMD_STATE_SAMPLER; - sampler->unit = i; - sampler->state = *cell->sampler[i]; + if (cell->dirty_samplers & (1 << i)) { + if (cell->sampler[i]) { + STATIC_ASSERT(sizeof(struct cell_command_sampler) % 16 == 0); + struct cell_command_sampler *sampler + = cell_batch_alloc16(cell, sizeof(*sampler)); + sampler->opcode[0] = CELL_CMD_STATE_SAMPLER; + sampler->unit = i; + sampler->state = *cell->sampler[i]; + } } } + cell->dirty_samplers = 0x0; } if (cell->dirty & CELL_NEW_TEXTURE) { uint i; for (i = 0;i < CELL_MAX_SAMPLERS; i++) { - struct cell_command_texture *texture - = cell_batch_alloc(cell, sizeof(*texture)); - texture->opcode = CELL_CMD_STATE_TEXTURE; - texture->unit = i; - if (cell->texture[i]) { - texture->start = cell->texture[i]->tiled_data; - texture->width = cell->texture[i]->base.width[0]; - texture->height = cell->texture[i]->base.height[0]; - } - else { - texture->start = NULL; - texture->width = 1; - texture->height = 1; + if (cell->dirty_textures & (1 << i)) { + STATIC_ASSERT(sizeof(struct cell_command_texture) % 16 == 0); + struct cell_command_texture *texture + = (struct cell_command_texture *)cell_batch_alloc16(cell, sizeof(*texture)); + texture->opcode[0] = CELL_CMD_STATE_TEXTURE; + texture->unit = i; + if (cell->texture[i]) { + uint level; + for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) { + texture->start[level] = cell->texture[i]->tiled_mapped[level]; + texture->width[level] = cell->texture[i]->base.width[level]; + texture->height[level] = cell->texture[i]->base.height[level]; + texture->depth[level] = cell->texture[i]->base.depth[level]; + } + texture->target = cell->texture[i]->base.target; + } + else { + uint level; + for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) { + texture->start[level] = NULL; + texture->width[level] = 0; + texture->height[level] = 0; + texture->depth[level] = 0; + } + texture->target = 0; + } } } + cell->dirty_textures = 0x0; } if (cell->dirty & CELL_NEW_VERTEX_INFO) { diff --git a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c index 78cb446c14..d97c22b2ef 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_state_per_fragment.c @@ -297,7 +297,7 @@ emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa, int face_stencil = spe_allocate_available_register(f); int stencil_src = stencil; const unsigned ref = (dsa->stencil[face].ref_value - & dsa->stencil[face].value_mask); + & dsa->stencil[face].valuemask); boolean complement = FALSE; int stored; int tmp = spe_allocate_available_register(f); @@ -305,9 +305,9 @@ emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa, if ((dsa->stencil[face].func != PIPE_FUNC_NEVER) && (dsa->stencil[face].func != PIPE_FUNC_ALWAYS) - && (dsa->stencil[face].value_mask != 0x0ff)) { + && (dsa->stencil[face].valuemask != 0x0ff)) { stored = spe_allocate_available_register(f); - spe_andi(f, stored, stencil, dsa->stencil[face].value_mask); + spe_andi(f, stored, stencil, dsa->stencil[face].valuemask); } else { stored = stencil; } @@ -395,7 +395,7 @@ emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa, * - For depth-pass if the stencil test is NEVER * - Any of the 3 conditions if the operation is KEEP */ - if (dsa->stencil[face].write_mask != 0) { + if (dsa->stencil[face].writemask != 0) { if ((dsa->stencil[face].func != PIPE_FUNC_ALWAYS) && (dsa->stencil[face].fail_op != PIPE_STENCIL_OP_KEEP)) { if (complement) { @@ -449,10 +449,10 @@ emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa, */ if (stencil_src == stencil) { spe_release_register(f, face_stencil); - } else if (dsa->stencil[face].write_mask != 0x0ff) { + } else if (dsa->stencil[face].writemask != 0x0ff) { int tmp = spe_allocate_available_register(f); - spe_il(f, tmp, dsa->stencil[face].write_mask); + spe_il(f, tmp, dsa->stencil[face].writemask); spe_selb(f, stencil_src, stencil, stencil_src, tmp); spe_release_register(f, tmp); @@ -580,8 +580,8 @@ cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa) dsa->stencil[i].zpass_op); printf("# ref value / value mask / write mask: %02x %02x %02x\n", dsa->stencil[i].ref_value, - dsa->stencil[i].value_mask, - dsa->stencil[i].write_mask); + dsa->stencil[i].valuemask, + dsa->stencil[i].writemask); } printf("\t.text\n"); diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c index 3a0d066da2..bf517ea563 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c @@ -28,7 +28,7 @@ #include "pipe/p_defines.h" #include "util/u_memory.h" #include "pipe/p_inlines.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "draw/draw_context.h" #include "tgsi/tgsi_parse.h" @@ -186,18 +186,21 @@ cell_set_constant_buffer(struct pipe_context *pipe, const struct pipe_constant_buffer *buf) { struct cell_context *cell = cell_context(pipe); - struct pipe_winsys *ws = pipe->winsys; assert(shader < PIPE_SHADER_TYPES); assert(index == 0); - /* note: reference counting */ - winsys_buffer_reference(ws, - &cell->constants[shader].buffer, - buf->buffer); - cell->constants[shader].size = buf->size; + draw_flush(cell->draw); - cell->dirty |= CELL_NEW_CONSTANTS; + /* note: reference counting */ + pipe_buffer_reference(pipe->screen, + &cell->constants[shader].buffer, + buf->buffer); + + if (shader == PIPE_SHADER_VERTEX) + cell->dirty |= CELL_NEW_VS_CONSTANTS; + else if (shader == PIPE_SHADER_FRAGMENT) + cell->dirty |= CELL_NEW_FS_CONSTANTS; } diff --git a/src/gallium/drivers/cell/ppu/cell_surface.c b/src/gallium/drivers/cell/ppu/cell_surface.c index 732c64082e..c9203fee08 100644 --- a/src/gallium/drivers/cell/ppu/cell_surface.c +++ b/src/gallium/drivers/cell/ppu/cell_surface.c @@ -27,6 +27,7 @@ #include "util/u_rect.h" #include "cell_context.h" +#include "cell_surface.h" void diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index b6590dfb86..fa52e2cbea 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -28,12 +28,13 @@ * Authors: * Keith Whitwell <keith@tungstengraphics.com> * Michel Dänzer <michel@tungstengraphics.com> + * Brian Paul */ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -42,30 +43,31 @@ #include "cell_texture.h" -/* Simple, maximally packed layout. - */ -static unsigned minify( unsigned d ) +static unsigned +minify(unsigned d) { return MAX2(1, d>>1); } static void -cell_texture_layout(struct cell_texture * spt) +cell_texture_layout(struct cell_texture *ct) { - struct pipe_texture *pt = &spt->base; + struct pipe_texture *pt = &ct->base; unsigned level; unsigned width = pt->width[0]; unsigned height = pt->height[0]; unsigned depth = pt->depth[0]; - spt->buffer_size = 0; + ct->buffer_size = 0; for ( level = 0 ; level <= pt->last_level ; level++ ) { unsigned size; unsigned w_tile, h_tile; + assert(level < CELL_MAX_TEXTURE_LEVELS); + /* width, height, rounded up to tile size */ w_tile = align(width, TILE_SIZE); h_tile = align(height, TILE_SIZE); @@ -76,9 +78,9 @@ cell_texture_layout(struct cell_texture * spt) pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w_tile); pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h_tile); - spt->stride[level] = pt->nblocksx[level] * pt->block.size; + ct->stride[level] = pt->nblocksx[level] * pt->block.size; - spt->level_offset[level] = spt->buffer_size; + ct->level_offset[level] = ct->buffer_size; size = pt->nblocksx[level] * pt->nblocksy[level] * pt->block.size; if (pt->target == PIPE_TEXTURE_CUBE) @@ -86,7 +88,7 @@ cell_texture_layout(struct cell_texture * spt) else size *= depth; - spt->buffer_size += size; + ct->buffer_size += size; width = minify(width); height = minify(height); @@ -100,26 +102,25 @@ cell_texture_create(struct pipe_screen *screen, const struct pipe_texture *templat) { struct pipe_winsys *ws = screen->winsys; - struct cell_texture *spt = CALLOC_STRUCT(cell_texture); - if (!spt) + struct cell_texture *ct = CALLOC_STRUCT(cell_texture); + if (!ct) return NULL; - spt->base = *templat; - spt->base.refcount = 1; - spt->base.screen = screen; + ct->base = *templat; + ct->base.refcount = 1; + ct->base.screen = screen; - cell_texture_layout(spt); + cell_texture_layout(ct); - spt->buffer = ws->buffer_create(ws, 32, - PIPE_BUFFER_USAGE_PIXEL, - spt->buffer_size); + ct->buffer = ws->buffer_create(ws, 32, PIPE_BUFFER_USAGE_PIXEL, + ct->buffer_size); - if (!spt->buffer) { - FREE(spt); + if (!ct->buffer) { + FREE(ct); return NULL; } - return &spt->base; + return &ct->base; } @@ -135,244 +136,514 @@ cell_texture_release(struct pipe_screen *screen, __FUNCTION__, (void *) *pt, (*pt)->refcount - 1); */ if (--(*pt)->refcount <= 0) { - struct cell_texture *spt = cell_texture(*pt); + /* Delete this texture now. + * But note that the underlying pipe_buffer may linger... + */ + struct cell_texture *ct = cell_texture(*pt); + uint i; /* - DBG("%s deleting %p\n", __FUNCTION__, (void *) spt); + DBG("%s deleting %p\n", __FUNCTION__, (void *) ct); */ - pipe_buffer_reference(screen, &spt->buffer, NULL); + pipe_buffer_reference(screen, &ct->buffer, NULL); + + for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) { + /* Unreference the tiled image buffer. + * It may not actually be deleted until a fence is hit. + */ + if (ct->tiled_buffer[i]) { + ct->tiled_mapped[i] = NULL; + pipe_buffer_reference(screen, &ct->tiled_buffer[i], NULL); + } + } - FREE(spt); + FREE(ct); } *pt = NULL; } -#if 0 + +/** + * Convert image from linear layout to tiled layout. 4-byte pixels. + */ static void -cell_texture_update(struct pipe_context *pipe, struct pipe_texture *texture, - uint face, uint levelsMask) +twiddle_image_uint(uint w, uint h, uint tile_size, uint *dst, + uint src_stride, const uint *src) { - /* XXX TO DO: re-tile the texture data ... */ + const uint tile_size2 = tile_size * tile_size; + const uint h_t = (h + tile_size - 1) / tile_size; + const uint w_t = (w + tile_size - 1) / tile_size; -} -#endif + uint it, jt; /* tile counters */ + uint i, j; /* intra-tile counters */ + src_stride /= 4; /* convert from bytes to pixels */ -static struct pipe_surface * -cell_get_tex_surface(struct pipe_screen *screen, - struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice, - unsigned usage) -{ - struct pipe_winsys *ws = screen->winsys; - struct cell_texture *spt = cell_texture(pt); - struct pipe_surface *ps; + /* loop over dest tiles */ + for (it = 0; it < h_t; it++) { + for (jt = 0; jt < w_t; jt++) { + /* start of dest tile: */ + uint *tdst = dst + (it * w_t + jt) * tile_size2; - ps = ws->surface_alloc(ws); - if (ps) { - assert(ps->refcount); - assert(ps->winsys); - winsys_buffer_reference(ws, &ps->buffer, spt->buffer); - ps->format = pt->format; - ps->block = pt->block; - ps->width = pt->width[level]; - ps->height = pt->height[level]; - ps->nblocksx = pt->nblocksx[level]; - ps->nblocksy = pt->nblocksy[level]; - ps->stride = spt->stride[level]; - ps->offset = spt->level_offset[level]; - ps->usage = usage; + /* compute size of this tile (may be smaller than tile_size) */ + /* XXX note: a compiler bug was found here. That's why the code + * looks as it does. + */ + uint tile_width = w - jt * tile_size; + tile_width = MIN2(tile_width, tile_size); + uint tile_height = h - it * tile_size; + tile_height = MIN2(tile_height, tile_size); - /* XXX may need to override usage flags (see sp_texture.c) */ + /* loop over texels in the tile */ + for (i = 0; i < tile_height; i++) { + for (j = 0; j < tile_width; j++) { + const uint srci = it * tile_size + i; + const uint srcj = jt * tile_size + j; + ASSERT(srci < h); + ASSERT(srcj < w); + tdst[i * tile_size + j] = src[srci * src_stride + srcj]; + } + } + } + } +} - pipe_texture_reference(&ps->texture, pt); - ps->face = face; - ps->level = level; - ps->zslice = zslice; - if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) { - ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) * - ps->nblocksy * - ps->stride; - } - else { - assert(face == 0); - assert(zslice == 0); +/** + * For Cell. Basically, rearrange the pixels/quads from this layout: + * +--+--+--+--+ + * |p0|p1|p2|p3|.... + * +--+--+--+--+ + * + * to this layout: + * +--+--+ + * |p0|p1|.... + * +--+--+ + * |p2|p3| + * +--+--+ + */ +static void +twiddle_tile(const uint *tileIn, uint *tileOut) +{ + int y, x; + + for (y = 0; y < TILE_SIZE; y+=2) { + for (x = 0; x < TILE_SIZE; x+=2) { + int k = 4 * (y/2 * TILE_SIZE/2 + x/2); + tileOut[y * TILE_SIZE + (x + 0)] = tileIn[k]; + tileOut[y * TILE_SIZE + (x + 1)] = tileIn[k+1]; + tileOut[(y + 1) * TILE_SIZE + (x + 0)] = tileIn[k+2]; + tileOut[(y + 1) * TILE_SIZE + (x + 1)] = tileIn[k+3]; } } - return ps; } - /** - * Copy tile data from linear layout to tiled layout. - * XXX this should be rolled into the future surface-creation code. - * XXX also need "untile" code... + * Convert image from tiled layout to linear layout. 4-byte pixels. */ static void -tile_copy_data(uint w, uint h, uint tile_size, uint *dst, const uint *src) +untwiddle_image_uint(uint w, uint h, uint tile_size, uint *dst, + uint dst_stride, const uint *src) { const uint tile_size2 = tile_size * tile_size; - const uint h_t = h / tile_size, w_t = w / tile_size; - + const uint h_t = (h + tile_size - 1) / tile_size; + const uint w_t = (w + tile_size - 1) / tile_size; + uint *tile_buf; uint it, jt; /* tile counters */ uint i, j; /* intra-tile counters */ - /* loop over dest tiles */ + dst_stride /= 4; /* convert from bytes to pixels */ + + tile_buf = align_malloc(tile_size * tile_size * 4, 16); + + /* loop over src tiles */ for (it = 0; it < h_t; it++) { for (jt = 0; jt < w_t; jt++) { - /* start of dest tile: */ - uint *tdst = dst + (it * w_t + jt) * tile_size2; + /* start of src tile: */ + const uint *tsrc = src + (it * w_t + jt) * tile_size2; + + twiddle_tile(tsrc, tile_buf); + tsrc = tile_buf; + + /* compute size of this tile (may be smaller than tile_size) */ + /* XXX note: a compiler bug was found here. That's why the code + * looks as it does. + */ + uint tile_width = w - jt * tile_size; + tile_width = MIN2(tile_width, tile_size); + uint tile_height = h - it * tile_size; + tile_height = MIN2(tile_height, tile_size); + /* loop over texels in the tile */ - for (i = 0; i < tile_size; i++) { - for (j = 0; j < tile_size; j++) { - const uint srci = it * tile_size + i; - const uint srcj = jt * tile_size + j; - *tdst++ = src[srci * w + srcj]; + for (i = 0; i < tile_height; i++) { + for (j = 0; j < tile_width; j++) { + uint dsti = it * tile_size + i; + uint dstj = jt * tile_size + j; + ASSERT(dsti < h); + ASSERT(dstj < w); + dst[dsti * dst_stride + dstj] = tsrc[i * tile_size + j]; } } } } -} + align_free(tile_buf); +} /** * Convert linear texture image data to tiled format for SPU usage. - * XXX recast this in terms of pipe_surfaces (aka texture views). */ static void -cell_tile_texture(struct cell_context *cell, - struct cell_texture *texture) +cell_twiddle_texture(struct pipe_screen *screen, + struct pipe_surface *surface) { - struct pipe_screen *screen = cell->pipe.screen; - uint face = 0, level = 0, zslice = 0; - struct pipe_surface *surf; - const uint w = texture->base.width[0], h = texture->base.height[0]; - const uint *src; - - /* temporary restrictions: */ - assert(w >= TILE_SIZE); - assert(h >= TILE_SIZE); - assert(w % TILE_SIZE == 0); - assert(h % TILE_SIZE == 0); - - surf = screen->get_tex_surface(screen, &texture->base, face, level, zslice, - PIPE_BUFFER_USAGE_CPU_WRITE); - ASSERT(surf); - - src = (const uint *) pipe_surface_map(surf, PIPE_BUFFER_USAGE_CPU_WRITE); - - if (texture->tiled_data) { - align_free(texture->tiled_data); +#if 0 // XXX fix me + struct cell_texture *ct = cell_texture(surface->texture); + const uint level = surface->level; + const uint texWidth = ct->base.width[level]; + const uint texHeight = ct->base.height[level]; + const uint bufWidth = align(texWidth, TILE_SIZE); + const uint bufHeight = align(texHeight, TILE_SIZE); + const void *map = screen->surface_map(screen, surface, PIPE_BUFFER_USAGE_CPU_READ); + const uint *src = (const uint *) map; + + switch (ct->base.format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: + { + int numFaces = ct->base.target == PIPE_TEXTURE_CUBE ? 6 : 1; + int offset = bufWidth * bufHeight * 4 * surface->face; + uint *dst; + + if (!ct->tiled_buffer[level]) { + /* allocate buffer for tiled data now */ + struct pipe_winsys *ws = screen->winsys; + uint bytes = bufWidth * bufHeight * 4 * numFaces; + ct->tiled_buffer[level] = + ws->buffer_create(ws, 16, PIPE_BUFFER_USAGE_PIXEL, bytes); + /* and map it */ + ct->tiled_mapped[level] = + ws->buffer_map(ws, ct->tiled_buffer[level], + PIPE_BUFFER_USAGE_GPU_READ); + } + dst = (uint *) ((ubyte *) ct->tiled_mapped[level] + offset); + + twiddle_image_uint(texWidth, texHeight, TILE_SIZE, dst, + surface->stride, src); + } + break; + default: + printf("Cell: twiddle unsupported texture format %s\n", + pf_name(ct->base.format)); } - texture->tiled_data = align_malloc(w * h * 4, 16); - tile_copy_data(w, h, TILE_SIZE, texture->tiled_data, src); + screen->surface_unmap(screen, surface); +#endif +} + - pipe_surface_unmap(surf); +/** + * Convert SPU tiled texture image data to linear format for app usage. + */ +static void +cell_untwiddle_texture(struct pipe_screen *screen, + struct pipe_surface *surface) +{ +#if 0 // XXX fix me + struct cell_texture *ct = cell_texture(surface->texture); + const uint level = surface->level; + const uint texWidth = ct->base.width[level]; + const uint texHeight = ct->base.height[level]; + const void *map = screen->surface_map(screen, surface, PIPE_BUFFER_USAGE_CPU_READ); + const uint *src = (const uint *) ((const ubyte *) map + surface->offset); + + switch (ct->base.format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: + { + int numFaces = ct->base.target == PIPE_TEXTURE_CUBE ? 6 : 1; + int offset = surface->stride * texHeight * 4 * surface->face; + uint *dst; + + if (!ct->untiled_data[level]) { + ct->untiled_data[level] = + align_malloc(surface->stride * texHeight * 4 * numFaces, 16); + } - pipe_surface_reference(&surf, NULL); + dst = (uint *) ((ubyte *) ct->untiled_data[level] + offset); + + untwiddle_image_uint(texWidth, texHeight, TILE_SIZE, dst, + surface->stride, src); + } + break; + default: + { + ct->untiled_data[level] = NULL; + printf("Cell: untwiddle unsupported texture format %s\n", + pf_name(ct->base.format)); + } + } + + screen->surface_unmap(screen, surface); +#endif } -void -cell_update_texture_mapping(struct cell_context *cell) +static struct pipe_surface * +cell_get_tex_surface(struct pipe_screen *screen, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice, + unsigned usage) { -#if 0 - uint face = 0, level = 0, zslice = 0; + struct cell_texture *ct = cell_texture(pt); + struct pipe_surface *ps; + + ps = CALLOC_STRUCT(pipe_surface); + if (ps) { + ps->refcount = 1; + pipe_texture_reference(&ps->texture, pt); + ps->format = pt->format; + //ps->block = pt->block; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + //ps->nblocksx = pt->nblocksx[level]; + //ps->nblocksy = pt->nblocksy[level]; + //ps->stride = ct->stride[level]; + ps->offset = ct->level_offset[level]; + ps->usage = usage; + + /* XXX may need to override usage flags (see sp_texture.c) */ + + pipe_texture_reference(&ps->texture, pt); + ps->face = face; + ps->level = level; + ps->zslice = zslice; + + if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) { +#if 0 // XXX fix me + ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) * + ps->nblocksy * + ps->stride; #endif - uint i; + } + else { + assert(face == 0); + assert(zslice == 0); + } - for (i = 0; i < CELL_MAX_SAMPLERS; i++) { - if (cell->texture[i]) - cell_tile_texture(cell, cell->texture[i]); + if (ps->usage & PIPE_BUFFER_USAGE_CPU_READ) { + /* convert from tiled to linear layout */ + cell_untwiddle_texture(screen, ps); + } } + return ps; +} -#if 0 - if (cell->tex_surf && cell->tex_map) { - pipe_surface_unmap(cell->tex_surf); - cell->tex_map = NULL; + +static void +cell_tex_surface_release(struct pipe_screen *screen, + struct pipe_surface **s) +{ + struct cell_texture *ct = cell_texture((*s)->texture); + const uint level = (*s)->level; + struct pipe_surface *surf = *s; + + if ((surf->usage & PIPE_BUFFER_USAGE_CPU_READ) && (ct->untiled_data[level])) + { + align_free(ct->untiled_data[level]); + ct->untiled_data[level] = NULL; } - /* XXX free old surface */ + if ((ct->base.tex_usage & PIPE_TEXTURE_USAGE_SAMPLER) && + (surf->usage & PIPE_BUFFER_USAGE_CPU_WRITE)) { + /* convert from linear to tiled layout */ + cell_twiddle_texture(screen, surf); + } - cell->tex_surf = cell_get_tex_surface(&cell->pipe, - &cell->texture[0]->base, - face, level, zslice); + /* XXX if done rendering to teximage, re-tile */ - cell->tex_map = pipe_surface_map(cell->tex_surf); -#endif + if (--surf->refcount == 0) { + pipe_texture_reference(&surf->texture, NULL); + FREE(surf); + } + *s = NULL; +} + + +static struct pipe_transfer * +cell_get_tex_transfer(struct pipe_screen *screen, + struct pipe_texture *texture, + unsigned face, unsigned level, unsigned zslice, + enum pipe_transfer_usage usage, + unsigned x, unsigned y, unsigned w, unsigned h) +{ + struct cell_texture *ct = cell_texture(texture); + struct cell_transfer *ctrans; + struct pipe_transfer *pt; + + assert(texture); + assert(level <= texture->last_level); + + ctrans = CALLOC_STRUCT(cell_transfer); + pt = &ctrans->base; + if (ctrans) { + pt->refcount = 1; + pipe_texture_reference(&pt->texture, texture); + pt->format = texture->format; + pt->block = texture->block; + pt->x = x; + pt->y = y; + pt->width = w; + pt->height = h; + pt->nblocksx = texture->nblocksx[level]; + pt->nblocksy = texture->nblocksy[level]; + pt->stride = ct->stride[level]; + ctrans->offset = ct->level_offset[level]; + pt->usage = usage; + pt->face = face; + pt->level = level; + pt->zslice = zslice; + + if (texture->target == PIPE_TEXTURE_CUBE || + texture->target == PIPE_TEXTURE_3D) { + ctrans->offset += ((texture->target == PIPE_TEXTURE_CUBE) ? face : + zslice) * pt->nblocksy * pt->stride; + } + else { + assert(face == 0); + assert(zslice == 0); + } + } + return pt; } static void -cell_tex_surface_release(struct pipe_screen *screen, - struct pipe_surface **s) +cell_tex_transfer_release(struct pipe_screen *screen, + struct pipe_transfer **t) { + struct cell_transfer *transfer = cell_transfer(*t); /* Effectively do the texture_update work here - if texture images * needed post-processing to put them into hardware layout, this is - * where it would happen. For softpipe, nothing to do. + * where it would happen. For cell, nothing to do. */ - assert ((*s)->texture); - pipe_texture_reference(&(*s)->texture, NULL); - - screen->winsys->surface_release(screen->winsys, s); + assert (transfer->base.texture); + if (--transfer->base.refcount == 0) { + pipe_texture_reference(&transfer->base.texture, NULL); + FREE(transfer); + } + *t = NULL; } static void * -cell_surface_map( struct pipe_screen *screen, - struct pipe_surface *surface, - unsigned flags ) +cell_transfer_map( struct pipe_screen *screen, + struct pipe_transfer *transfer ) { ubyte *map; + struct cell_texture *spt; + unsigned flags = 0; - if (flags & ~surface->usage) { - assert(0); - return NULL; + assert(transfer->texture); + spt = cell_texture(transfer->texture); + + if (transfer->usage != PIPE_TRANSFER_READ) { + flags |= PIPE_BUFFER_USAGE_CPU_WRITE; } - map = pipe_buffer_map( screen, surface->buffer, flags ); + if (transfer->usage != PIPE_TRANSFER_WRITE) { + flags |= PIPE_BUFFER_USAGE_CPU_READ; + } + + map = pipe_buffer_map(screen, spt->buffer, flags); if (map == NULL) return NULL; /* May want to different things here depending on read/write nature * of the map: */ - if (surface->texture && - (flags & PIPE_BUFFER_USAGE_CPU_WRITE)) + if (transfer->texture && transfer->usage != PIPE_TRANSFER_READ) { /* Do something to notify sharing contexts of a texture change. - * In softpipe, that would mean flushing the texture cache. + * In cell, that would mean flushing the texture cache. */ -#if 0 +#if 00 cell_screen(screen)->timestamp++; #endif } - return map + surface->offset; + return map + cell_transfer(transfer)->offset + + transfer->y / transfer->block.height * transfer->stride + + transfer->x / transfer->block.width * transfer->block.size; } static void -cell_surface_unmap(struct pipe_screen *screen, - struct pipe_surface *surface) +cell_transfer_unmap(struct pipe_screen *screen, + struct pipe_transfer *transfer) { - pipe_buffer_unmap( screen, surface->buffer ); + struct cell_texture *spt; + + assert(transfer->texture); + spt = cell_texture(transfer->texture); + + pipe_buffer_unmap( screen, spt->buffer ); } -void -cell_init_texture_functions(struct cell_context *cell) +static void * +cell_surface_map(struct pipe_screen *screen, + struct pipe_surface *surface, + unsigned flags) { - /*cell->pipe.texture_update = cell_texture_update;*/ + ubyte *map; + struct cell_texture *ct = cell_texture(surface->texture); + const uint level = surface->level; + + assert(ct); + +#if 0 + if (flags & ~surface->usage) { + assert(0); + return NULL; + } +#endif + + map = pipe_buffer_map( screen, ct->buffer, flags ); + if (map == NULL) { + return NULL; + } + else { + if ((surface->usage & PIPE_BUFFER_USAGE_CPU_READ) && + (ct->untiled_data[level])) { + return (void *) ((ubyte *) ct->untiled_data[level] + surface->offset); + } + else { + return (void *) (map + surface->offset); + } + } } +static void +cell_surface_unmap(struct pipe_screen *screen, + struct pipe_surface *surface) +{ + struct cell_texture *ct = cell_texture(surface->texture); + + assert(ct); + + pipe_buffer_unmap( screen, ct->buffer ); +} + + + void cell_init_screen_texture_funcs(struct pipe_screen *screen) { @@ -382,6 +653,8 @@ cell_init_screen_texture_funcs(struct pipe_screen *screen) screen->get_tex_surface = cell_get_tex_surface; screen->tex_surface_release = cell_tex_surface_release; - screen->surface_map = cell_surface_map; - screen->surface_unmap = cell_surface_unmap; + screen->get_tex_transfer = cell_get_tex_transfer; + screen->tex_transfer_release = cell_tex_transfer_release; + screen->transfer_map = cell_transfer_map; + screen->transfer_unmap = cell_transfer_unmap; } diff --git a/src/gallium/drivers/cell/ppu/cell_texture.h b/src/gallium/drivers/cell/ppu/cell_texture.h index 6d37e95ebc..fc6486adbe 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.h +++ b/src/gallium/drivers/cell/ppu/cell_texture.h @@ -40,15 +40,31 @@ struct cell_texture { struct pipe_texture base; - unsigned long level_offset[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long stride[PIPE_MAX_TEXTURE_LEVELS]; + unsigned long level_offset[CELL_MAX_TEXTURE_LEVELS]; + unsigned long stride[CELL_MAX_TEXTURE_LEVELS]; /* The data is held here: */ struct pipe_buffer *buffer; unsigned long buffer_size; - void *tiled_data; /* XXX this may be temporary */ /*ALIGN16*/ + /** Texture data in tiled layout is held here */ + struct pipe_buffer *tiled_buffer[CELL_MAX_TEXTURE_LEVELS]; + /** Mapped, tiled texture data */ + void *tiled_mapped[CELL_MAX_TEXTURE_LEVELS]; + + struct pipe_transfer *transfer; + + /** The original, linear texture data */ + void *untiled_data[CELL_MAX_TEXTURE_LEVELS]; +}; + + +struct cell_transfer +{ + struct pipe_transfer base; + + unsigned long offset; }; @@ -60,13 +76,12 @@ cell_texture(struct pipe_texture *pt) } - -extern void -cell_update_texture_mapping(struct cell_context *cell); - - -extern void -cell_init_texture_functions(struct cell_context *cell); +/** cast wrapper */ +static INLINE struct cell_transfer * +cell_transfer(struct pipe_transfer *pt) +{ + return (struct cell_transfer *) pt; +} extern void diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.c b/src/gallium/drivers/cell/ppu/cell_vbuf.c index aa63435b93..cfaffb52a8 100644 --- a/src/gallium/drivers/cell/ppu/cell_vbuf.c +++ b/src/gallium/drivers/cell/ppu/cell_vbuf.c @@ -38,6 +38,7 @@ #include "cell_batch.h" #include "cell_context.h" +#include "cell_fence.h" #include "cell_flush.h" #include "cell_spu.h" #include "cell_vbuf.h" @@ -61,6 +62,7 @@ struct cell_vbuf_render uint vertex_size; /**< in bytes */ void *vertex_buffer; /**< just for debug, really */ uint vertex_buf; /**< in [0, CELL_NUM_BUFFERS-1] */ + uint vertex_buffer_size; /**< size in bytes */ }; @@ -81,24 +83,26 @@ cell_vbuf_get_vertex_info(struct vbuf_render *vbr) } -static void * +static boolean cell_vbuf_allocate_vertices(struct vbuf_render *vbr, ushort vertex_size, ushort nr_vertices) { struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); + unsigned size = vertex_size * nr_vertices; /*printf("Alloc verts %u * %u\n", vertex_size, nr_vertices);*/ assert(cvbr->vertex_buf == ~0); cvbr->vertex_buf = cell_get_empty_buffer(cvbr->cell); cvbr->vertex_buffer = cvbr->cell->buffer[cvbr->vertex_buf]; + cvbr->vertex_buffer_size = size; cvbr->vertex_size = vertex_size; - return cvbr->vertex_buffer; + + return cvbr->vertex_buffer != NULL; } static void -cell_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices, - unsigned vertex_size, unsigned vertices_used) +cell_vbuf_release_vertices(struct vbuf_render *vbr) { struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); struct cell_context *cell = cvbr->cell; @@ -108,23 +112,47 @@ cell_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices, __FUNCTION__, cvbr->vertex_buf, vertices_used); */ + /* Make sure texture buffers aren't released until we're done rendering + * with them. + */ + cell_add_fenced_textures(cell); + /* Tell SPUs they can release the vert buf */ if (cvbr->vertex_buf != ~0U) { + STATIC_ASSERT(sizeof(struct cell_command_release_verts) % 16 == 0); struct cell_command_release_verts *release = (struct cell_command_release_verts *) - cell_batch_alloc(cell, sizeof(struct cell_command_release_verts)); - release->opcode = CELL_CMD_RELEASE_VERTS; + cell_batch_alloc16(cell, sizeof(struct cell_command_release_verts)); + release->opcode[0] = CELL_CMD_RELEASE_VERTS; release->vertex_buf = cvbr->vertex_buf; } cvbr->vertex_buf = ~0; cell_flush_int(cell, 0x0); - assert(vertices == cvbr->vertex_buffer); cvbr->vertex_buffer = NULL; } +static void * +cell_vbuf_map_vertices(struct vbuf_render *vbr) +{ + struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); + return cvbr->vertex_buffer; +} + + +static void +cell_vbuf_unmap_vertices(struct vbuf_render *vbr, + ushort min_index, + ushort max_index ) +{ + struct cell_vbuf_render *cvbr = cell_vbuf_render(vbr); + assert( cvbr->vertex_buffer_size >= (max_index+1) * cvbr->vertex_size ); + /* do nothing */ +} + + static boolean cell_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) @@ -204,15 +232,16 @@ cell_vbuf_draw(struct vbuf_render *vbr, /* build/insert batch RENDER command */ { - const uint index_bytes = ROUNDUP8(nr_indices * 2); - const uint vertex_bytes = nr_vertices * 4 * cell->vertex_info.size; + const uint index_bytes = ROUNDUP16(nr_indices * 2); + const uint vertex_bytes = ROUNDUP16(nr_vertices * 4 * cell->vertex_info.size); + STATIC_ASSERT(sizeof(struct cell_command_render) % 16 == 0); const uint batch_size = sizeof(struct cell_command_render) + index_bytes; struct cell_command_render *render = (struct cell_command_render *) - cell_batch_alloc(cell, batch_size); + cell_batch_alloc16(cell, batch_size); - render->opcode = CELL_CMD_RENDER; + render->opcode[0] = CELL_CMD_RENDER; render->prim_type = cvbr->prim; render->num_indexes = nr_indices; @@ -230,7 +259,7 @@ cell_vbuf_draw(struct vbuf_render *vbr, min_index == 0 && vertex_bytes + 16 <= cell_batch_free_space(cell)) { /* vertex data inlined, after indices, at 16-byte boundary */ - void *dst = cell_batch_alloc_aligned(cell, vertex_bytes, 16); + void *dst = cell_batch_alloc16(cell, vertex_bytes); memcpy(dst, vertices, vertex_bytes); render->inline_verts = TRUE; render->vertex_buf = ~0; @@ -287,6 +316,8 @@ cell_init_vbuf(struct cell_context *cell) cell->vbuf_render->base.get_vertex_info = cell_vbuf_get_vertex_info; cell->vbuf_render->base.allocate_vertices = cell_vbuf_allocate_vertices; + cell->vbuf_render->base.map_vertices = cell_vbuf_map_vertices; + cell->vbuf_render->base.unmap_vertices = cell_vbuf_unmap_vertices; cell->vbuf_render->base.set_primitive = cell_vbuf_set_primitive; cell->vbuf_render->base.draw = cell_vbuf_draw; cell->vbuf_render->base.release_vertices = cell_vbuf_release_vertices; diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c index 566df7f59e..9cba537d9e 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c @@ -73,8 +73,8 @@ emit_matrix_transpose(struct spe_function *p, int col3; - spe_lqd(p, shuf_hi, shuf_ptr, 3); - spe_lqd(p, shuf_lo, shuf_ptr, 4); + spe_lqd(p, shuf_hi, shuf_ptr, 3*16); + spe_lqd(p, shuf_lo, shuf_ptr, 4*16); spe_shufb(p, t1, row0, row2, shuf_hi); spe_shufb(p, t2, row0, row2, shuf_lo); @@ -122,13 +122,13 @@ emit_matrix_transpose(struct spe_function *p, */ switch (count) { case 4: - spe_stqd(p, col3, dest_ptr, 3); + spe_stqd(p, col3, dest_ptr, 3 * 16); case 3: - spe_stqd(p, col2, dest_ptr, 2); + spe_stqd(p, col2, dest_ptr, 2 * 16); case 2: - spe_stqd(p, col1, dest_ptr, 1); + spe_stqd(p, col1, dest_ptr, 1 * 16); case 1: - spe_stqd(p, col0, dest_ptr, 0); + spe_stqd(p, col0, dest_ptr, 0 * 16); } @@ -145,6 +145,8 @@ emit_matrix_transpose(struct spe_function *p, } +#if 0 +/* This appears to not be used currently */ static void emit_fetch(struct spe_function *p, unsigned in_ptr, unsigned *offset, @@ -166,17 +168,17 @@ emit_fetch(struct spe_function *p, float scale_signed = 0.0; float scale_unsigned = 0.0; - spe_lqd(p, v0, in_ptr, 0 + offset[0]); - spe_lqd(p, v1, in_ptr, 1 + offset[0]); - spe_lqd(p, v2, in_ptr, 2 + offset[0]); - spe_lqd(p, v3, in_ptr, 3 + offset[0]); + spe_lqd(p, v0, in_ptr, (0 + offset[0]) * 16); + spe_lqd(p, v1, in_ptr, (1 + offset[0]) * 16); + spe_lqd(p, v2, in_ptr, (2 + offset[0]) * 16); + spe_lqd(p, v3, in_ptr, (3 + offset[0]) * 16); offset[0] += 4; switch (bytes) { case 1: scale_signed = 1.0f / 127.0f; scale_unsigned = 1.0f / 255.0f; - spe_lqd(p, tmp, shuf_ptr, 1); + spe_lqd(p, tmp, shuf_ptr, 1 * 16); spe_shufb(p, v0, v0, v0, tmp); spe_shufb(p, v1, v1, v1, tmp); spe_shufb(p, v2, v2, v2, tmp); @@ -185,7 +187,7 @@ emit_fetch(struct spe_function *p, case 2: scale_signed = 1.0f / 32767.0f; scale_unsigned = 1.0f / 65535.0f; - spe_lqd(p, tmp, shuf_ptr, 2); + spe_lqd(p, tmp, shuf_ptr, 2 * 16); spe_shufb(p, v0, v0, v0, tmp); spe_shufb(p, v1, v1, v1, tmp); spe_shufb(p, v2, v2, v2, tmp); @@ -241,11 +243,11 @@ emit_fetch(struct spe_function *p, switch (count) { case 1: - spe_stqd(p, float_zero, out_ptr, 1); + spe_stqd(p, float_zero, out_ptr, 1 * 16); case 2: - spe_stqd(p, float_zero, out_ptr, 2); + spe_stqd(p, float_zero, out_ptr, 2 * 16); case 3: - spe_stqd(p, float_one, out_ptr, 3); + spe_stqd(p, float_one, out_ptr, 3 * 16); } if (float_zero != -1) { @@ -256,6 +258,7 @@ emit_fetch(struct spe_function *p, spe_release_register(p, float_one); } } +#endif void cell_update_vertex_fetch(struct draw_context *draw) diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c index 2b10c116fa..403cf6d50f 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c @@ -31,7 +31,7 @@ #include "pipe/p_defines.h" #include "pipe/p_context.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "util/u_math.h" #include "cell_context.h" diff --git a/src/gallium/drivers/cell/spu/.gitignore b/src/gallium/drivers/cell/spu/.gitignore new file mode 100644 index 0000000000..2be9a2d324 --- /dev/null +++ b/src/gallium/drivers/cell/spu/.gitignore @@ -0,0 +1 @@ +g3d_spu diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile index 1ae0dfb8c1..3cc52301da 100644 --- a/src/gallium/drivers/cell/spu/Makefile +++ b/src/gallium/drivers/cell/spu/Makefile @@ -16,8 +16,10 @@ PROG_SPU_EMBED_O = $(PROG)_spu-embed.o SOURCES = \ - spu_main.c \ + spu_command.c \ spu_dcache.c \ + spu_funcs.c \ + spu_main.c \ spu_per_fragment_op.c \ spu_render.c \ spu_texture.c \ @@ -31,9 +33,10 @@ OLD_SOURCES = \ spu_vertex_shader.c -SPU_OBJECTS = $(SOURCES:.c=.o) \ +SPU_OBJECTS = $(SOURCES:.c=.o) + +SPU_ASM_OUT = $(SOURCES:.c=.s) -SPU_ASM_OUT = $(SOURCES:.c=.s) \ INCLUDE_DIRS = \ -I$(TOP)/src/mesa \ diff --git a/src/gallium/drivers/cell/spu/spu_colorpack.h b/src/gallium/drivers/cell/spu/spu_colorpack.h index fd8dc6ded3..d7ce005524 100644 --- a/src/gallium/drivers/cell/spu/spu_colorpack.h +++ b/src/gallium/drivers/cell/spu/spu_colorpack.h @@ -31,6 +31,7 @@ #define SPU_COLORPACK_H +#include <transpose_matrix4x4.h> #include <spu_intrinsics.h> @@ -84,10 +85,10 @@ spu_unpack_B8G8R8A8(uint color) vector unsigned int color_u4 = spu_splats(color); color_u4 = spu_shuffle(color_u4, color_u4, ((vector unsigned char) { - 10, 10, 10, 10, - 5, 5, 5, 5, + 2, 2, 2, 2, + 1, 1, 1, 1, 0, 0, 0, 0, - 15, 15, 15, 15}) ); + 3, 3, 3, 3}) ); return spu_convtf(color_u4, 32); } @@ -98,13 +99,47 @@ spu_unpack_A8R8G8B8(uint color) vector unsigned int color_u4 = spu_splats(color); color_u4 = spu_shuffle(color_u4, color_u4, ((vector unsigned char) { - 5, 5, 5, 5, - 10, 10, 10, 10, - 15, 15, 15, 15, + 1, 1, 1, 1, + 2, 2, 2, 2, + 3, 3, 3, 3, 0, 0, 0, 0}) ); - return spu_convtf(color_u4, 32); } +/** + * \param color_in - array of 32-bit packed ARGB colors + * \param color_out - returns float colors in RRRR, GGGG, BBBB, AAAA order + */ +static INLINE void +spu_unpack_A8R8G8B8_transpose4(const vector unsigned int color_in[4], + vector float color_out[4]) +{ + vector unsigned int c0; + + c0 = spu_shuffle(color_in[0], color_in[0], + ((vector unsigned char) { + 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) ); + color_out[0] = spu_convtf(c0, 32); + + c0 = spu_shuffle(color_in[1], color_in[1], + ((vector unsigned char) { + 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) ); + color_out[1] = spu_convtf(c0, 32); + + c0 = spu_shuffle(color_in[2], color_in[2], + ((vector unsigned char) { + 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) ); + color_out[2] = spu_convtf(c0, 32); + + c0 = spu_shuffle(color_in[3], color_in[3], + ((vector unsigned char) { + 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 0, 0}) ); + color_out[3] = spu_convtf(c0, 32); + + _transpose_matrix4x4(color_out, color_out); +} + + + #endif /* SPU_COLORPACK_H */ diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c new file mode 100644 index 0000000000..5c0179d954 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -0,0 +1,815 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * SPU command processing code + */ + + +#include <stdio.h> +#include <libmisc.h> + +#include "pipe/p_defines.h" + +#include "spu_command.h" +#include "spu_main.h" +#include "spu_render.h" +#include "spu_per_fragment_op.h" +#include "spu_texture.h" +#include "spu_tile.h" +#include "spu_vertex_shader.h" +#include "spu_dcache.h" +#include "cell/common.h" + + +struct spu_vs_context draw; + + +/** + * Buffers containing dynamically generated SPU code: + */ +static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS] + ALIGN16_ATTRIB; + + + +static INLINE int +align(int value, int alignment) +{ + return (value + alignment - 1) & ~(alignment - 1); +} + + + +/** + * Tell the PPU that this SPU has finished copying a buffer to + * local store and that it may be reused by the PPU. + * This is done by writting a 16-byte batch-buffer-status block back into + * main memory (in cell_context->buffer_status[]). + */ +static void +release_buffer(uint buffer) +{ + /* Evidently, using less than a 16-byte status doesn't work reliably */ + static const vector unsigned int status = {CELL_BUFFER_STATUS_FREE, + CELL_BUFFER_STATUS_FREE, + CELL_BUFFER_STATUS_FREE, + CELL_BUFFER_STATUS_FREE}; + const uint index = 4 * (spu.init.id * CELL_NUM_BUFFERS + buffer); + uint *dst = spu.init.buffer_status + index; + + ASSERT(buffer < CELL_NUM_BUFFERS); + + mfc_put((void *) &status, /* src in local memory */ + (unsigned int) dst, /* dst in main memory */ + sizeof(status), /* size */ + TAG_MISC, /* tag is unimportant */ + 0, /* tid */ + 0 /* rid */); +} + + +/** + * Write CELL_FENCE_SIGNALLED back to the fence status qword in main memory. + * There's a qword of status per SPU. + */ +static void +cmd_fence(struct cell_command_fence *fence_cmd) +{ + static const vector unsigned int status = {CELL_FENCE_SIGNALLED, + CELL_FENCE_SIGNALLED, + CELL_FENCE_SIGNALLED, + CELL_FENCE_SIGNALLED}; + uint *dst = (uint *) fence_cmd->fence; + dst += 4 * spu.init.id; /* main store/memory address, not local store */ + ASSERT_ALIGN16(dst); + mfc_put((void *) &status, /* src in local memory */ + (unsigned int) dst, /* dst in main memory */ + sizeof(status), /* size */ + TAG_FENCE, /* tag */ + 0, /* tid */ + 0 /* rid */); +} + + +static void +cmd_clear_surface(const struct cell_command_clear_surface *clear) +{ + D_PRINTF(CELL_DEBUG_CMD, "CLEAR SURF %u to 0x%08x\n", clear->surface, clear->value); + + if (clear->surface == 0) { + spu.fb.color_clear_value = clear->value; + if (spu.init.debug_flags & CELL_DEBUG_CHECKER) { + uint x = (spu.init.id << 4) | (spu.init.id << 12) | + (spu.init.id << 20) | (spu.init.id << 28); + spu.fb.color_clear_value ^= x; + } + } + else { + spu.fb.depth_clear_value = clear->value; + } + +#define CLEAR_OPT 1 +#if CLEAR_OPT + + /* Simply set all tiles' status to CLEAR. + * When we actually begin rendering into a tile, we'll initialize it to + * the clear value. If any tiles go untouched during the frame, + * really_clear_tiles() will set them to the clear value. + */ + if (clear->surface == 0) { + memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status)); + } + else { + memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status)); + } + +#else + + /* + * This path clears the whole framebuffer to the clear color right now. + */ + + /* + printf("SPU: %s num=%d w=%d h=%d\n", + __FUNCTION__, num_tiles, spu.fb.width_tiles, spu.fb.height_tiles); + */ + + /* init a single tile to the clear value */ + if (clear->surface == 0) { + clear_c_tile(&spu.ctile); + } + else { + clear_z_tile(&spu.ztile); + } + + /* walk over my tiles, writing the 'clear' tile's data */ + { + const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; + uint i; + for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { + uint tx = i % spu.fb.width_tiles; + uint ty = i / spu.fb.width_tiles; + if (clear->surface == 0) + put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0); + else + put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1); + } + } + + if (spu.init.debug_flags & CELL_DEBUG_SYNC) { + wait_on_mask(1 << TAG_SURFACE_CLEAR); + } + +#endif /* CLEAR_OPT */ + + D_PRINTF(CELL_DEBUG_CMD, "CLEAR SURF done\n"); +} + + +static void +cmd_release_verts(const struct cell_command_release_verts *release) +{ + D_PRINTF(CELL_DEBUG_CMD, "RELEASE VERTS %u\n", release->vertex_buf); + ASSERT(release->vertex_buf != ~0U); + release_buffer(release->vertex_buf); +} + + +/** + * Process a CELL_CMD_STATE_FRAGMENT_OPS command. + * This involves installing new fragment ops SPU code. + * If this function is never called, we'll use a regular C fallback function + * for fragment processing. + */ +static void +cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) +{ + D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_OPS\n"); + + /* Copy state info (for fallback case only - this will eventually + * go away when the fallback case goes away) + */ + memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa)); + memcpy(&spu.blend, &fops->blend, sizeof(fops->blend)); + memcpy(&spu.blend_color, &fops->blend_color, sizeof(fops->blend_color)); + + /* Make sure the SPU knows which buffers it's expected to read when + * it's told to pull tiles. + */ + spu.read_depth_stencil = (spu.depth_stencil_alpha.depth.enabled || spu.depth_stencil_alpha.stencil[0].enabled); + + /* If we're forcing the fallback code to be used (for debug purposes), + * install that. Otherwise install the incoming SPU code. + */ + if ((spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) != 0) { + static unsigned int warned = 0; + if (!warned) { + fprintf(stderr, "Cell Warning: using fallback per-fragment code\n"); + warned = 1; + } + /* The following two lines aren't really necessary if you + * know the debug flags won't change during a run, and if you + * know that the function pointers are initialized correctly. + * We set them here to allow a person to change the debug + * flags during a run (from inside a debugger). + */ + spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops; + spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops; + return; + } + + /* Make sure the SPU code buffer is large enough to hold the incoming code. + * Note that we *don't* use align_malloc() and align_free(), because + * those utility functions are *not* available in SPU code. + * */ + if (spu.fragment_ops_code_size < fops->total_code_size) { + if (spu.fragment_ops_code != NULL) { + free(spu.fragment_ops_code); + } + spu.fragment_ops_code_size = fops->total_code_size; + spu.fragment_ops_code = malloc(fops->total_code_size); + if (spu.fragment_ops_code == NULL) { + /* Whoops. */ + fprintf(stderr, "CELL Warning: failed to allocate fragment ops code (%d bytes) - using fallback\n", fops->total_code_size); + spu.fragment_ops_code = NULL; + spu.fragment_ops_code_size = 0; + spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops; + spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops; + return; + } + } + + /* Copy the SPU code from the command buffer to the spu buffer */ + memcpy(spu.fragment_ops_code, fops->code, fops->total_code_size); + + /* Set the pointers for the front-facing and back-facing fragments + * to the specified offsets within the code. Note that if the + * front-facing and back-facing code are the same, they'll have + * the same offset. + */ + spu.fragment_ops[CELL_FACING_FRONT] = (spu_fragment_ops_func) &spu.fragment_ops_code[fops->front_code_index]; + spu.fragment_ops[CELL_FACING_BACK] = (spu_fragment_ops_func) &spu.fragment_ops_code[fops->back_code_index]; +} + +static void +cmd_state_fragment_program(const struct cell_command_fragment_program *fp) +{ + D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_PROGRAM\n"); + /* Copy SPU code from batch buffer to spu buffer */ + memcpy(spu.fragment_program_code, fp->code, + SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4); +#if 01 + /* Point function pointer at new code */ + spu.fragment_program = (spu_fragment_program_func)spu.fragment_program_code; +#endif +} + + +static uint +cmd_state_fs_constants(const qword *buffer, uint pos) +{ + const uint num_const = spu_extract((vector unsigned int)buffer[pos+1], 0); + const float *constants = (const float *) &buffer[pos+2]; + uint i; + + D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FS_CONSTANTS (%u)\n", num_const); + + /* Expand each float to float[4] for SOA execution */ + for (i = 0; i < num_const; i++) { + D_PRINTF(CELL_DEBUG_CMD, " const[%u] = %f\n", i, constants[i]); + spu.constants[i] = spu_splats(constants[i]); + } + + /* return new buffer pos (in 16-byte words) */ + return pos + 2 + (ROUNDUP16(num_const * sizeof(float)) / 16); +} + + +static void +cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) +{ + D_PRINTF(CELL_DEBUG_CMD, "FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n", + cmd->width, + cmd->height, + cmd->color_start, + cmd->color_format, + cmd->depth_format); + + ASSERT_ALIGN16(cmd->color_start); + ASSERT_ALIGN16(cmd->depth_start); + + spu.fb.color_start = cmd->color_start; + spu.fb.depth_start = cmd->depth_start; + spu.fb.color_format = cmd->color_format; + spu.fb.depth_format = cmd->depth_format; + spu.fb.width = cmd->width; + spu.fb.height = cmd->height; + spu.fb.width_tiles = (spu.fb.width + TILE_SIZE - 1) / TILE_SIZE; + spu.fb.height_tiles = (spu.fb.height + TILE_SIZE - 1) / TILE_SIZE; + + switch (spu.fb.depth_format) { + case PIPE_FORMAT_Z32_UNORM: + spu.fb.zsize = 4; + spu.fb.zscale = (float) 0xffffffffu; + break; + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + spu.fb.zsize = 4; + spu.fb.zscale = (float) 0x00ffffffu; + break; + case PIPE_FORMAT_Z16_UNORM: + spu.fb.zsize = 2; + spu.fb.zscale = (float) 0xffffu; + break; + default: + spu.fb.zsize = 0; + break; + } +} + + +/** + * Tex texture mask_s/t and scale_s/t fields depend on the texture size and + * sampler wrap modes. + */ +static void +update_tex_masks(struct spu_texture *texture, + const struct pipe_sampler_state *sampler) +{ + uint i; + + for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) { + int width = texture->level[i].width; + int height = texture->level[i].height; + + if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT) + texture->level[i].mask_s = spu_splats(width - 1); + else + texture->level[i].mask_s = spu_splats(~0); + + if (sampler->wrap_t == PIPE_TEX_WRAP_REPEAT) + texture->level[i].mask_t = spu_splats(height - 1); + else + texture->level[i].mask_t = spu_splats(~0); + + if (sampler->normalized_coords) { + texture->level[i].scale_s = spu_splats((float) width); + texture->level[i].scale_t = spu_splats((float) height); + } + else { + texture->level[i].scale_s = spu_splats(1.0f); + texture->level[i].scale_t = spu_splats(1.0f); + } + } +} + + +static void +cmd_state_sampler(const struct cell_command_sampler *sampler) +{ + uint unit = sampler->unit; + + D_PRINTF(CELL_DEBUG_CMD, "SAMPLER [%u]\n", unit); + + spu.sampler[unit] = sampler->state; + + switch (spu.sampler[unit].min_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + spu.min_sample_texture_2d[unit] = sample_texture_2d_bilinear; + break; + case PIPE_TEX_FILTER_ANISO: + /* fall-through, for now */ + case PIPE_TEX_FILTER_NEAREST: + spu.min_sample_texture_2d[unit] = sample_texture_2d_nearest; + break; + default: + ASSERT(0); + } + + switch (spu.sampler[sampler->unit].mag_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + spu.mag_sample_texture_2d[unit] = sample_texture_2d_bilinear; + break; + case PIPE_TEX_FILTER_ANISO: + /* fall-through, for now */ + case PIPE_TEX_FILTER_NEAREST: + spu.mag_sample_texture_2d[unit] = sample_texture_2d_nearest; + break; + default: + ASSERT(0); + } + + switch (spu.sampler[sampler->unit].min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + case PIPE_TEX_MIPFILTER_LINEAR: + spu.sample_texture_2d[unit] = sample_texture_2d_lod; + break; + case PIPE_TEX_MIPFILTER_NONE: + spu.sample_texture_2d[unit] = spu.mag_sample_texture_2d[unit]; + break; + default: + ASSERT(0); + } + + update_tex_masks(&spu.texture[unit], &spu.sampler[unit]); +} + + +static void +cmd_state_texture(const struct cell_command_texture *texture) +{ + const uint unit = texture->unit; + uint i; + + D_PRINTF(CELL_DEBUG_CMD, "TEXTURE [%u]\n", texture->unit); + + spu.texture[unit].max_level = 0; + spu.texture[unit].target = texture->target; + + for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) { + uint width = texture->width[i]; + uint height = texture->height[i]; + uint depth = texture->depth[i]; + + D_PRINTF(CELL_DEBUG_CMD, " LEVEL %u: at %p size[0] %u x %u\n", i, + texture->start[i], texture->width[i], texture->height[i]); + + spu.texture[unit].level[i].start = texture->start[i]; + spu.texture[unit].level[i].width = width; + spu.texture[unit].level[i].height = height; + spu.texture[unit].level[i].depth = depth; + + spu.texture[unit].level[i].tiles_per_row = + (width + TILE_SIZE - 1) / TILE_SIZE; + + spu.texture[unit].level[i].bytes_per_image = + 4 * align(width, TILE_SIZE) * align(height, TILE_SIZE) * depth; + + spu.texture[unit].level[i].max_s = spu_splats((int) width - 1); + spu.texture[unit].level[i].max_t = spu_splats((int) height - 1); + + if (texture->start[i]) + spu.texture[unit].max_level = i; + } + + update_tex_masks(&spu.texture[unit], &spu.sampler[unit]); +} + + +static void +cmd_state_vertex_info(const struct vertex_info *vinfo) +{ + D_PRINTF(CELL_DEBUG_CMD, "VERTEX_INFO num_attribs=%u\n", vinfo->num_attribs); + ASSERT(vinfo->num_attribs >= 1); + ASSERT(vinfo->num_attribs <= 8); + memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo)); +} + + +static void +cmd_state_vs_array_info(const struct cell_array_info *vs_info) +{ + const unsigned attr = vs_info->attr; + + ASSERT(attr < PIPE_MAX_ATTRIBS); + draw.vertex_fetch.src_ptr[attr] = vs_info->base; + draw.vertex_fetch.pitch[attr] = vs_info->pitch; + draw.vertex_fetch.size[attr] = vs_info->size; + draw.vertex_fetch.code_offset[attr] = vs_info->function_offset; + draw.vertex_fetch.dirty = 1; +} + + +static void +cmd_state_attrib_fetch(const struct cell_attribute_fetch_code *code) +{ + mfc_get(attribute_fetch_code_buffer, + (unsigned int) code->base, /* src */ + code->size, + TAG_BATCH_BUFFER, + 0, /* tid */ + 0 /* rid */); + wait_on_mask(1 << TAG_BATCH_BUFFER); + + draw.vertex_fetch.code = attribute_fetch_code_buffer; +} + + +static void +cmd_finish(void) +{ + D_PRINTF(CELL_DEBUG_CMD, "FINISH\n"); + really_clear_tiles(0); + /* wait for all outstanding DMAs to finish */ + mfc_write_tag_mask(~0); + mfc_read_tag_status_all(); + /* send mbox message to PPU */ + spu_write_out_mbox(CELL_CMD_FINISH); +} + + +/** + * Execute a batch of commands which was sent to us by the PPU. + * See the cell_emit_state.c code to see where the commands come from. + * + * The opcode param encodes the location of the buffer and its size. + */ +static void +cmd_batch(uint opcode) +{ + const uint buf = (opcode >> 8) & 0xff; + uint size = (opcode >> 16); + qword buffer[CELL_BUFFER_SIZE / 16] ALIGN16_ATTRIB; + const unsigned usize = ROUNDUP16(size) / sizeof(buffer[0]); + uint pos; + + D_PRINTF(CELL_DEBUG_CMD, "BATCH buffer %u, len %u, from %p\n", + buf, size, spu.init.buffers[buf]); + + ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH); + + ASSERT_ALIGN16(spu.init.buffers[buf]); + + size = ROUNDUP16(size); + + ASSERT_ALIGN16(spu.init.buffers[buf]); + + mfc_get(buffer, /* dest */ + (unsigned int) spu.init.buffers[buf], /* src */ + size, + TAG_BATCH_BUFFER, + 0, /* tid */ + 0 /* rid */); + wait_on_mask(1 << TAG_BATCH_BUFFER); + + /* Tell PPU we're done copying the buffer to local store */ + D_PRINTF(CELL_DEBUG_CMD, "release batch buf %u\n", buf); + release_buffer(buf); + + /* + * Loop over commands in the batch buffer + */ + for (pos = 0; pos < usize; /* no incr */) { + switch (si_to_uint(buffer[pos])) { + /* + * rendering commands + */ + case CELL_CMD_CLEAR_SURFACE: + { + struct cell_command_clear_surface *clr + = (struct cell_command_clear_surface *) &buffer[pos]; + cmd_clear_surface(clr); + pos += sizeof(*clr) / 16; + } + break; + case CELL_CMD_RENDER: + { + struct cell_command_render *render + = (struct cell_command_render *) &buffer[pos]; + uint pos_incr; + cmd_render(render, &pos_incr); + pos += ((pos_incr+1)&~1) / 2; // should 'fix' cmd_render return + } + break; + /* + * state-update commands + */ + case CELL_CMD_STATE_FRAMEBUFFER: + { + struct cell_command_framebuffer *fb + = (struct cell_command_framebuffer *) &buffer[pos]; + cmd_state_framebuffer(fb); + pos += sizeof(*fb) / 16; + } + break; + case CELL_CMD_STATE_FRAGMENT_OPS: + { + struct cell_command_fragment_ops *fops + = (struct cell_command_fragment_ops *) &buffer[pos]; + cmd_state_fragment_ops(fops); + /* This is a variant-sized command */ + pos += ROUNDUP16(sizeof(*fops) + fops->total_code_size) / 16; + } + break; + case CELL_CMD_STATE_FRAGMENT_PROGRAM: + { + struct cell_command_fragment_program *fp + = (struct cell_command_fragment_program *) &buffer[pos]; + cmd_state_fragment_program(fp); + pos += sizeof(*fp) / 16; + } + break; + case CELL_CMD_STATE_FS_CONSTANTS: + pos = cmd_state_fs_constants(buffer, pos); + break; + case CELL_CMD_STATE_RASTERIZER: + { + struct cell_command_rasterizer *rast = + (struct cell_command_rasterizer *) &buffer[pos]; + spu.rasterizer = rast->rasterizer; + pos += sizeof(*rast) / 16; + } + break; + case CELL_CMD_STATE_SAMPLER: + { + struct cell_command_sampler *sampler + = (struct cell_command_sampler *) &buffer[pos]; + cmd_state_sampler(sampler); + pos += sizeof(*sampler) / 16; + } + break; + case CELL_CMD_STATE_TEXTURE: + { + struct cell_command_texture *texture + = (struct cell_command_texture *) &buffer[pos]; + cmd_state_texture(texture); + pos += sizeof(*texture) / 16; + } + break; + case CELL_CMD_STATE_VERTEX_INFO: + cmd_state_vertex_info((struct vertex_info *) &buffer[pos+1]); + pos += 1 + ROUNDUP16(sizeof(struct vertex_info)) / 16; + break; + case CELL_CMD_STATE_VIEWPORT: + (void) memcpy(& draw.viewport, &buffer[pos+1], + sizeof(struct pipe_viewport_state)); + pos += 1 + ROUNDUP16(sizeof(struct pipe_viewport_state)) / 16; + break; + case CELL_CMD_STATE_UNIFORMS: + draw.constants = (const float (*)[4]) (uintptr_t)spu_extract((vector unsigned int)buffer[pos+1],0); + pos += 2; + break; + case CELL_CMD_STATE_VS_ARRAY_INFO: + cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]); + pos += 1 + ROUNDUP16(sizeof(struct cell_array_info)) / 16; + break; + case CELL_CMD_STATE_BIND_VS: +#if 0 + spu_bind_vertex_shader(&draw, + (struct cell_shader_info *) &buffer[pos+1]); +#endif + pos += 1 + ROUNDUP16(sizeof(struct cell_shader_info)) / 16; + break; + case CELL_CMD_STATE_ATTRIB_FETCH: + cmd_state_attrib_fetch((struct cell_attribute_fetch_code *) + &buffer[pos+1]); + pos += 1 + ROUNDUP16(sizeof(struct cell_attribute_fetch_code)) / 16; + break; + /* + * misc commands + */ + case CELL_CMD_FINISH: + cmd_finish(); + pos += 1; + break; + case CELL_CMD_FENCE: + { + struct cell_command_fence *fence_cmd = + (struct cell_command_fence *) &buffer[pos]; + cmd_fence(fence_cmd); + pos += sizeof(*fence_cmd) / 16; + } + break; + case CELL_CMD_RELEASE_VERTS: + { + struct cell_command_release_verts *release + = (struct cell_command_release_verts *) &buffer[pos]; + cmd_release_verts(release); + pos += sizeof(*release) / 16; + } + break; + case CELL_CMD_FLUSH_BUFFER_RANGE: { + struct cell_buffer_range *br = (struct cell_buffer_range *) + &buffer[pos+1]; + + spu_dcache_mark_dirty((unsigned) br->base, br->size); + pos += 1 + ROUNDUP16(sizeof(struct cell_buffer_range)) / 16; + break; + } + default: + printf("SPU %u: bad opcode: 0x%x\n", spu.init.id, si_to_uint(buffer[pos])); + ASSERT(0); + break; + } + } + + D_PRINTF(CELL_DEBUG_CMD, "BATCH complete\n"); +} + + +#define PERF 0 + + +/** + * Main loop for SPEs: Get a command, execute it, repeat. + */ +void +command_loop(void) +{ + int exitFlag = 0; + uint t0, t1; + + D_PRINTF(CELL_DEBUG_CMD, "Enter command loop\n"); + + while (!exitFlag) { + unsigned opcode; + + D_PRINTF(CELL_DEBUG_CMD, "Wait for cmd...\n"); + + if (PERF) + spu_write_decrementer(~0); + + /* read/wait from mailbox */ + opcode = (unsigned int) spu_read_in_mbox(); + D_PRINTF(CELL_DEBUG_CMD, "got cmd 0x%x\n", opcode); + + if (PERF) + t0 = spu_read_decrementer(); + + switch (opcode & CELL_CMD_OPCODE_MASK) { + case CELL_CMD_EXIT: + D_PRINTF(CELL_DEBUG_CMD, "EXIT\n"); + exitFlag = 1; + break; + case CELL_CMD_VS_EXECUTE: +#if 0 + spu_execute_vertex_shader(&draw, &cmd.vs); +#endif + break; + case CELL_CMD_BATCH: + cmd_batch(opcode); + break; + default: + printf("Bad opcode 0x%x!\n", opcode & CELL_CMD_OPCODE_MASK); + } + + if (PERF) { + t1 = spu_read_decrementer(); + printf("wait mbox time: %gms batch time: %gms\n", + (~0u - t0) * spu.init.inv_timebase, + (t0 - t1) * spu.init.inv_timebase); + } + } + + D_PRINTF(CELL_DEBUG_CMD, "Exit command loop\n"); + + if (spu.init.debug_flags & CELL_DEBUG_CACHE) + spu_dcache_report(); +} + +/* Initialize this module; we manage the fragment ops buffer here. */ +void +spu_command_init(void) +{ + /* Install default/fallback fragment processing function. + * This will normally be overriden by a code-gen'd function + * unless CELL_FORCE_FRAGMENT_OPS_FALLBACK is set. + */ + spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops; + spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops; + + /* Set up the basic empty buffer for code-gen'ed fragment ops */ + spu.fragment_ops_code = NULL; + spu.fragment_ops_code_size = 0; +} + +void +spu_command_close(void) +{ + /* Deallocate the code-gen buffer for fragment ops, and reset the + * fragment ops functions to their initial setting (just to leave + * things in a good state). + */ + if (spu.fragment_ops_code != NULL) { + free(spu.fragment_ops_code); + } + spu_command_init(); +} diff --git a/src/gallium/drivers/cell/spu/spu_command.h b/src/gallium/drivers/cell/spu/spu_command.h new file mode 100644 index 0000000000..83dcdade28 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_command.h @@ -0,0 +1,35 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +extern void +command_loop(void); + +extern void +spu_command_init(void); + +extern void +spu_command_close(void); diff --git a/src/gallium/drivers/cell/spu/spu_dcache.c b/src/gallium/drivers/cell/spu/spu_dcache.c index 167404cdc5..a6d67634fd 100644 --- a/src/gallium/drivers/cell/spu/spu_dcache.c +++ b/src/gallium/drivers/cell/spu/spu_dcache.c @@ -36,7 +36,9 @@ #define CACHE_SET_TAGID(set) (((set) & 0x03) + TAG_DCACHE0) #define CACHE_LOG2NNWAY 2 #define CACHE_LOG2NSETS 6 -/*#define CACHE_STATS 1*/ +#ifdef DEBUG +#define CACHE_STATS 1 +#endif #include <cache-api.h> /* Yes folks, this is ugly. diff --git a/src/gallium/drivers/cell/spu/spu_funcs.c b/src/gallium/drivers/cell/spu/spu_funcs.c new file mode 100644 index 0000000000..ff3d609d25 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_funcs.c @@ -0,0 +1,173 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * SPU functions accessed by shaders. + * + * Authors: Brian Paul + */ + + +#include <string.h> +#include <libmisc.h> +#include <math.h> +#include <cos14_v.h> +#include <sin14_v.h> +#include <simdmath/exp2f4.h> +#include <simdmath/log2f4.h> +#include <simdmath/powf4.h> + +#include "cell/common.h" +#include "spu_main.h" +#include "spu_funcs.h" +#include "spu_texture.h" + + +/** For "return"-ing four vectors */ +struct vec_4x4 +{ + vector float v[4]; +}; + + +static vector float +spu_cos(vector float x) +{ + return _cos14_v(x); +} + +static vector float +spu_sin(vector float x) +{ + return _sin14_v(x); +} + +static vector float +spu_pow(vector float x, vector float y) +{ + return _powf4(x, y); +} + +static vector float +spu_exp2(vector float x) +{ + return _exp2f4(x); +} + +static vector float +spu_log2(vector float x) +{ + return _log2f4(x); +} + + +static struct vec_4x4 +spu_tex_2d(vector float s, vector float t, vector float r, vector float q, + unsigned unit) +{ + struct vec_4x4 colors; + (void) r; + (void) q; + spu.sample_texture_2d[unit](s, t, unit, 0, 0, colors.v); + return colors; +} + +static struct vec_4x4 +spu_tex_3d(vector float s, vector float t, vector float r, vector float q, + unsigned unit) +{ + struct vec_4x4 colors; + (void) r; + (void) q; + spu.sample_texture_2d[unit](s, t, unit, 0, 0, colors.v); + return colors; +} + +static struct vec_4x4 +spu_tex_cube(vector float s, vector float t, vector float r, vector float q, + unsigned unit) +{ + struct vec_4x4 colors; + (void) q; + sample_texture_cube(s, t, r, unit, colors.v); + return colors; +} + + +/** + * Add named function to list of "exported" functions that will be + * made available to the PPU-hosted code generator. + */ +static void +export_func(struct cell_spu_function_info *spu_functions, + const char *name, void *addr) +{ + uint n = spu_functions->num; + ASSERT(strlen(name) < 16); + strcpy(spu_functions->names[n], name); + spu_functions->addrs[n] = (uint) addr; + spu_functions->num++; + ASSERT(spu_functions->num <= 16); +} + + +/** + * Return info about the SPU's function to the PPU / main memory. + * The PPU needs to know the address of some SPU-side functions so + * that we can generate shader code with function calls. + */ +void +return_function_info(void) +{ + struct cell_spu_function_info funcs ALIGN16_ATTRIB; + int tag = TAG_MISC; + + ASSERT(sizeof(funcs) == 256); /* must be multiple of 16 bytes */ + + funcs.num = 0; + export_func(&funcs, "spu_cos", &spu_cos); + export_func(&funcs, "spu_sin", &spu_sin); + export_func(&funcs, "spu_pow", &spu_pow); + export_func(&funcs, "spu_exp2", &spu_exp2); + export_func(&funcs, "spu_log2", &spu_log2); + export_func(&funcs, "spu_tex_2d", &spu_tex_2d); + export_func(&funcs, "spu_tex_3d", &spu_tex_3d); + export_func(&funcs, "spu_tex_cube", &spu_tex_cube); + + /* Send the function info back to the PPU / main memory */ + mfc_put((void *) &funcs, /* src in local store */ + (unsigned int) spu.init.spu_functions, /* dst in main memory */ + sizeof(funcs), /* bytes */ + tag, + 0, /* tid */ + 0 /* rid */); + wait_on_mask(1 << tag); +} + + + diff --git a/src/gallium/drivers/cell/ppu/cell_winsys.c b/src/gallium/drivers/cell/spu/spu_funcs.h index d570bbd2f9..3adb6ae99f 100644 --- a/src/gallium/drivers/cell/ppu/cell_winsys.c +++ b/src/gallium/drivers/cell/spu/spu_funcs.h @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -25,16 +25,11 @@ * **************************************************************************/ +#ifndef SPU_FUNCS_H +#define SPU_FUNCS_H -#include "util/u_memory.h" -#include "cell_winsys.h" +extern void +return_function_info(void); +#endif -struct cell_winsys * -cell_get_winsys(uint format) -{ - struct cell_winsys *cws = CALLOC_STRUCT(cell_winsys); - if (cws) - cws->preferredFormat = format; - return cws; -} diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index 78260c4259..97c86d194d 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -32,16 +32,15 @@ #include <stdio.h> #include <libmisc.h> +#include "pipe/p_defines.h" + +#include "spu_funcs.h" +#include "spu_command.h" #include "spu_main.h" -#include "spu_render.h" #include "spu_per_fragment_op.h" #include "spu_texture.h" -#include "spu_tile.h" //#include "spu_test.h" -#include "spu_vertex_shader.h" -#include "spu_dcache.h" #include "cell/common.h" -#include "pipe/p_defines.h" /* @@ -50,600 +49,8 @@ helpful headers: /opt/cell/sdk/usr/include/libmisc.h */ -boolean Debug = FALSE; - struct spu_global spu; -struct spu_vs_context draw; - - -/** - * Buffers containing dynamically generated SPU code: - */ -static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS] - ALIGN16_ATTRIB; - - - -/** - * Tell the PPU that this SPU has finished copying a buffer to - * local store and that it may be reused by the PPU. - * This is done by writting a 16-byte batch-buffer-status block back into - * main memory (in cell_context->buffer_status[]). - */ -static void -release_buffer(uint buffer) -{ - /* Evidently, using less than a 16-byte status doesn't work reliably */ - static const uint status[4] ALIGN16_ATTRIB - = {CELL_BUFFER_STATUS_FREE, 0, 0, 0}; - - const uint index = 4 * (spu.init.id * CELL_NUM_BUFFERS + buffer); - uint *dst = spu.init.buffer_status + index; - - ASSERT(buffer < CELL_NUM_BUFFERS); - - mfc_put((void *) &status, /* src in local memory */ - (unsigned int) dst, /* dst in main memory */ - sizeof(status), /* size */ - TAG_MISC, /* tag is unimportant */ - 0, /* tid */ - 0 /* rid */); -} - - -/** - * For tiles whose status is TILE_STATUS_CLEAR, write solid-filled - * tiles back to the main framebuffer. - */ -static void -really_clear_tiles(uint surfaceIndex) -{ - const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; - uint i; - - if (surfaceIndex == 0) { - clear_c_tile(&spu.ctile); - - for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { - uint tx = i % spu.fb.width_tiles; - uint ty = i / spu.fb.width_tiles; - if (spu.ctile_status[ty][tx] == TILE_STATUS_CLEAR) { - put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0); - } - } - } - else { - clear_z_tile(&spu.ztile); - - for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { - uint tx = i % spu.fb.width_tiles; - uint ty = i / spu.fb.width_tiles; - if (spu.ztile_status[ty][tx] == TILE_STATUS_CLEAR) - put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 1); - } - } - -#if 0 - wait_on_mask(1 << TAG_SURFACE_CLEAR); -#endif -} - - -static void -cmd_clear_surface(const struct cell_command_clear_surface *clear) -{ - if (Debug) - printf("SPU %u: CLEAR SURF %u to 0x%08x\n", spu.init.id, - clear->surface, clear->value); - - if (clear->surface == 0) { - spu.fb.color_clear_value = clear->value; - if (spu.init.debug_flags & CELL_DEBUG_CHECKER) { - uint x = (spu.init.id << 4) | (spu.init.id << 12) | - (spu.init.id << 20) | (spu.init.id << 28); - spu.fb.color_clear_value ^= x; - } - } - else { - spu.fb.depth_clear_value = clear->value; - } - -#define CLEAR_OPT 1 -#if CLEAR_OPT - - /* Simply set all tiles' status to CLEAR. - * When we actually begin rendering into a tile, we'll initialize it to - * the clear value. If any tiles go untouched during the frame, - * really_clear_tiles() will set them to the clear value. - */ - if (clear->surface == 0) { - memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status)); - } - else { - memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status)); - } - -#else - - /* - * This path clears the whole framebuffer to the clear color right now. - */ - - /* - printf("SPU: %s num=%d w=%d h=%d\n", - __FUNCTION__, num_tiles, spu.fb.width_tiles, spu.fb.height_tiles); - */ - - /* init a single tile to the clear value */ - if (clear->surface == 0) { - clear_c_tile(&spu.ctile); - } - else { - clear_z_tile(&spu.ztile); - } - - /* walk over my tiles, writing the 'clear' tile's data */ - { - const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; - uint i; - for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { - uint tx = i % spu.fb.width_tiles; - uint ty = i / spu.fb.width_tiles; - if (clear->surface == 0) - put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0); - else - put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1); - } - } - - if (spu.init.debug_flags & CELL_DEBUG_SYNC) { - wait_on_mask(1 << TAG_SURFACE_CLEAR); - } - -#endif /* CLEAR_OPT */ - - if (Debug) - printf("SPU %u: CLEAR SURF done\n", spu.init.id); -} - - -static void -cmd_release_verts(const struct cell_command_release_verts *release) -{ - if (Debug) - printf("SPU %u: RELEASE VERTS %u\n", - spu.init.id, release->vertex_buf); - ASSERT(release->vertex_buf != ~0U); - release_buffer(release->vertex_buf); -} - - -/** - * Process a CELL_CMD_STATE_FRAGMENT_OPS command. - * This involves installing new fragment ops SPU code. - * If this function is never called, we'll use a regular C fallback function - * for fragment processing. - */ -static void -cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) -{ - if (Debug) - printf("SPU %u: CMD_STATE_FRAGMENT_OPS\n", spu.init.id); - /* Copy SPU code from batch buffer to spu buffer */ - memcpy(spu.fragment_ops_code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4); - /* Copy state info (for fallback case only) */ - memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa)); - memcpy(&spu.blend, &fops->blend, sizeof(fops->blend)); - - /* Point function pointer at new code */ - spu.fragment_ops = (spu_fragment_ops_func) spu.fragment_ops_code; - - spu.read_depth = spu.depth_stencil_alpha.depth.enabled; - spu.read_stencil = spu.depth_stencil_alpha.stencil[0].enabled; -} - - -static void -cmd_state_fragment_program(const struct cell_command_fragment_program *fp) -{ - if (Debug) - printf("SPU %u: CMD_STATE_FRAGMENT_PROGRAM\n", spu.init.id); - /* Copy SPU code from batch buffer to spu buffer */ - memcpy(spu.fragment_program_code, fp->code, - SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4); -#if 01 - /* Point function pointer at new code */ - spu.fragment_program = (spu_fragment_program_func)spu.fragment_program_code; -#endif -} - - -static void -cmd_state_framebuffer(const struct cell_command_framebuffer *cmd) -{ - if (Debug) - printf("SPU %u: FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n", - spu.init.id, - cmd->width, - cmd->height, - cmd->color_start, - cmd->color_format, - cmd->depth_format); - - ASSERT_ALIGN16(cmd->color_start); - ASSERT_ALIGN16(cmd->depth_start); - - spu.fb.color_start = cmd->color_start; - spu.fb.depth_start = cmd->depth_start; - spu.fb.color_format = cmd->color_format; - spu.fb.depth_format = cmd->depth_format; - spu.fb.width = cmd->width; - spu.fb.height = cmd->height; - spu.fb.width_tiles = (spu.fb.width + TILE_SIZE - 1) / TILE_SIZE; - spu.fb.height_tiles = (spu.fb.height + TILE_SIZE - 1) / TILE_SIZE; - - switch (spu.fb.depth_format) { - case PIPE_FORMAT_Z32_UNORM: - spu.fb.zsize = 4; - spu.fb.zscale = (float) 0xffffffffu; - break; - case PIPE_FORMAT_Z24S8_UNORM: - case PIPE_FORMAT_S8Z24_UNORM: - case PIPE_FORMAT_Z24X8_UNORM: - case PIPE_FORMAT_X8Z24_UNORM: - spu.fb.zsize = 4; - spu.fb.zscale = (float) 0x00ffffffu; - break; - case PIPE_FORMAT_Z16_UNORM: - spu.fb.zsize = 2; - spu.fb.zscale = (float) 0xffffu; - break; - default: - spu.fb.zsize = 0; - break; - } -} - - -static void -cmd_state_sampler(const struct cell_command_sampler *sampler) -{ - if (Debug) - printf("SPU %u: SAMPLER [%u]\n", - spu.init.id, sampler->unit); - - spu.sampler[sampler->unit] = sampler->state; - if (spu.sampler[sampler->unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) - spu.sample_texture[sampler->unit] = sample_texture_bilinear; - else - spu.sample_texture[sampler->unit] = sample_texture_nearest; -} - - -static void -cmd_state_texture(const struct cell_command_texture *texture) -{ - const uint unit = texture->unit; - const uint width = texture->width; - const uint height = texture->height; - - if (Debug) { - printf("SPU %u: TEXTURE [%u] at %p size %u x %u\n", spu.init.id, - texture->unit, texture->start, - texture->width, texture->height); - } - - spu.texture[unit].start = texture->start; - spu.texture[unit].width = width; - spu.texture[unit].height = height; - - spu.texture[unit].tiles_per_row = width / TILE_SIZE; - - spu.texture[unit].tex_size = (vector float) { width, height, 0.0, 0.0}; - spu.texture[unit].tex_size_mask = (vector unsigned int) - { width - 1, height - 1, 0, 0 }; - spu.texture[unit].tex_size_x_mask = spu_splats(width - 1); - spu.texture[unit].tex_size_y_mask = spu_splats(height - 1); -} - - -static void -cmd_state_vertex_info(const struct vertex_info *vinfo) -{ - if (Debug) { - printf("SPU %u: VERTEX_INFO num_attribs=%u\n", spu.init.id, - vinfo->num_attribs); - } - ASSERT(vinfo->num_attribs >= 1); - ASSERT(vinfo->num_attribs <= 8); - memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo)); -} - - -static void -cmd_state_vs_array_info(const struct cell_array_info *vs_info) -{ - const unsigned attr = vs_info->attr; - - ASSERT(attr < PIPE_MAX_ATTRIBS); - draw.vertex_fetch.src_ptr[attr] = vs_info->base; - draw.vertex_fetch.pitch[attr] = vs_info->pitch; - draw.vertex_fetch.size[attr] = vs_info->size; - draw.vertex_fetch.code_offset[attr] = vs_info->function_offset; - draw.vertex_fetch.dirty = 1; -} - - -static void -cmd_state_attrib_fetch(const struct cell_attribute_fetch_code *code) -{ - mfc_get(attribute_fetch_code_buffer, - (unsigned int) code->base, /* src */ - code->size, - TAG_BATCH_BUFFER, - 0, /* tid */ - 0 /* rid */); - wait_on_mask(1 << TAG_BATCH_BUFFER); - - draw.vertex_fetch.code = attribute_fetch_code_buffer; -} - - -static void -cmd_finish(void) -{ - if (Debug) - printf("SPU %u: FINISH\n", spu.init.id); - really_clear_tiles(0); - /* wait for all outstanding DMAs to finish */ - mfc_write_tag_mask(~0); - mfc_read_tag_status_all(); - /* send mbox message to PPU */ - spu_write_out_mbox(CELL_CMD_FINISH); -} - - -/** - * Execute a batch of commands which was sent to us by the PPU. - * See the cell_emit_state.c code to see where the commands come from. - * - * The opcode param encodes the location of the buffer and its size. - */ -static void -cmd_batch(uint opcode) -{ - const uint buf = (opcode >> 8) & 0xff; - uint size = (opcode >> 16); - uint64_t buffer[CELL_BUFFER_SIZE / 8] ALIGN16_ATTRIB; - const unsigned usize = size / sizeof(buffer[0]); - uint pos; - - if (Debug) - printf("SPU %u: BATCH buffer %u, len %u, from %p\n", - spu.init.id, buf, size, spu.init.buffers[buf]); - - ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH); - - ASSERT_ALIGN16(spu.init.buffers[buf]); - - size = ROUNDUP16(size); - - ASSERT_ALIGN16(spu.init.buffers[buf]); - - mfc_get(buffer, /* dest */ - (unsigned int) spu.init.buffers[buf], /* src */ - size, - TAG_BATCH_BUFFER, - 0, /* tid */ - 0 /* rid */); - wait_on_mask(1 << TAG_BATCH_BUFFER); - - /* Tell PPU we're done copying the buffer to local store */ - if (Debug) - printf("SPU %u: release batch buf %u\n", spu.init.id, buf); - release_buffer(buf); - - /* - * Loop over commands in the batch buffer - */ - for (pos = 0; pos < usize; /* no incr */) { - switch (buffer[pos]) { - /* - * rendering commands - */ - case CELL_CMD_CLEAR_SURFACE: - { - struct cell_command_clear_surface *clr - = (struct cell_command_clear_surface *) &buffer[pos]; - cmd_clear_surface(clr); - pos += sizeof(*clr) / 8; - } - break; - case CELL_CMD_RENDER: - { - struct cell_command_render *render - = (struct cell_command_render *) &buffer[pos]; - uint pos_incr; - cmd_render(render, &pos_incr); - pos += pos_incr; - } - break; - /* - * state-update commands - */ - case CELL_CMD_STATE_FRAMEBUFFER: - { - struct cell_command_framebuffer *fb - = (struct cell_command_framebuffer *) &buffer[pos]; - cmd_state_framebuffer(fb); - pos += sizeof(*fb) / 8; - } - break; - case CELL_CMD_STATE_FRAGMENT_OPS: - { - struct cell_command_fragment_ops *fops - = (struct cell_command_fragment_ops *) &buffer[pos]; - cmd_state_fragment_ops(fops); - pos += sizeof(*fops) / 8; - } - break; - case CELL_CMD_STATE_FRAGMENT_PROGRAM: - { - struct cell_command_fragment_program *fp - = (struct cell_command_fragment_program *) &buffer[pos]; - cmd_state_fragment_program(fp); - pos += sizeof(*fp) / 8; - } - break; - case CELL_CMD_STATE_SAMPLER: - { - struct cell_command_sampler *sampler - = (struct cell_command_sampler *) &buffer[pos]; - cmd_state_sampler(sampler); - pos += sizeof(*sampler) / 8; - } - break; - case CELL_CMD_STATE_TEXTURE: - { - struct cell_command_texture *texture - = (struct cell_command_texture *) &buffer[pos]; - cmd_state_texture(texture); - pos += sizeof(*texture) / 8; - } - break; - case CELL_CMD_STATE_VERTEX_INFO: - cmd_state_vertex_info((struct vertex_info *) &buffer[pos+1]); - pos += (1 + ROUNDUP8(sizeof(struct vertex_info)) / 8); - break; - case CELL_CMD_STATE_VIEWPORT: - (void) memcpy(& draw.viewport, &buffer[pos+1], - sizeof(struct pipe_viewport_state)); - pos += (1 + ROUNDUP8(sizeof(struct pipe_viewport_state)) / 8); - break; - case CELL_CMD_STATE_UNIFORMS: - draw.constants = (const float (*)[4]) (uintptr_t) buffer[pos + 1]; - pos += 2; - break; - case CELL_CMD_STATE_VS_ARRAY_INFO: - cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]); - pos += (1 + ROUNDUP8(sizeof(struct cell_array_info)) / 8); - break; - case CELL_CMD_STATE_BIND_VS: -#if 0 - spu_bind_vertex_shader(&draw, - (struct cell_shader_info *) &buffer[pos+1]); -#endif - pos += (1 + ROUNDUP8(sizeof(struct cell_shader_info)) / 8); - break; - case CELL_CMD_STATE_ATTRIB_FETCH: - cmd_state_attrib_fetch((struct cell_attribute_fetch_code *) - &buffer[pos+1]); - pos += (1 + ROUNDUP8(sizeof(struct cell_attribute_fetch_code)) / 8); - break; - /* - * misc commands - */ - case CELL_CMD_FINISH: - cmd_finish(); - pos += 1; - break; - case CELL_CMD_RELEASE_VERTS: - { - struct cell_command_release_verts *release - = (struct cell_command_release_verts *) &buffer[pos]; - cmd_release_verts(release); - pos += sizeof(*release) / 8; - } - break; - case CELL_CMD_FLUSH_BUFFER_RANGE: { - struct cell_buffer_range *br = (struct cell_buffer_range *) - &buffer[pos+1]; - - spu_dcache_mark_dirty((unsigned) br->base, br->size); - pos += (1 + ROUNDUP8(sizeof(struct cell_buffer_range)) / 8); - break; - } - default: - printf("SPU %u: bad opcode: 0x%llx\n", spu.init.id, buffer[pos]); - ASSERT(0); - break; - } - } - - if (Debug) - printf("SPU %u: BATCH complete\n", spu.init.id); -} - - -/** - * Temporary/simple main loop for SPEs: Get a command, execute it, repeat. - */ -static void -main_loop(void) -{ - struct cell_command cmd; - int exitFlag = 0; - - if (Debug) - printf("SPU %u: Enter main loop\n", spu.init.id); - - ASSERT((sizeof(struct cell_command) & 0xf) == 0); - ASSERT_ALIGN16(&cmd); - - while (!exitFlag) { - unsigned opcode; - int tag = 0; - - if (Debug) - printf("SPU %u: Wait for cmd...\n", spu.init.id); - - /* read/wait from mailbox */ - opcode = (unsigned int) spu_read_in_mbox(); - - if (Debug) - printf("SPU %u: got cmd 0x%x\n", spu.init.id, opcode); - - /* command payload */ - mfc_get(&cmd, /* dest */ - (unsigned int) spu.init.cmd, /* src */ - sizeof(struct cell_command), /* bytes */ - tag, - 0, /* tid */ - 0 /* rid */); - wait_on_mask( 1 << tag ); - - /* - * NOTE: most commands should be contained in a batch buffer - */ - - switch (opcode & CELL_CMD_OPCODE_MASK) { - case CELL_CMD_EXIT: - if (Debug) - printf("SPU %u: EXIT\n", spu.init.id); - exitFlag = 1; - break; - case CELL_CMD_VS_EXECUTE: -#if 0 - spu_execute_vertex_shader(&draw, &cmd.vs); -#endif - break; - case CELL_CMD_BATCH: - cmd_batch(opcode); - break; - default: - printf("Bad opcode!\n"); - } - - } - - if (Debug) - printf("SPU %u: Exit main loop\n", spu.init.id); - - spu_dcache_report(); -} - - static void one_time_init(void) @@ -651,15 +58,8 @@ one_time_init(void) memset(spu.ctile_status, TILE_STATUS_DEFINED, sizeof(spu.ctile_status)); memset(spu.ztile_status, TILE_STATUS_DEFINED, sizeof(spu.ztile_status)); invalidate_tex_cache(); - - /* Install default/fallback fragment processing function. - * This will normally be overriden by a code-gen'd function. - */ - spu.fragment_ops = spu_fallback_fragment_ops; } - - /* In some versions of the SDK the SPE main takes 'unsigned long' as a * parameter. In others it takes 'unsigned long long'. Use a define to * select between the two. @@ -682,12 +82,16 @@ main(main_param_t speid, main_param_t argp) ASSERT(sizeof(tile_t) == TILE_SIZE * TILE_SIZE * 4); ASSERT(sizeof(struct cell_command_render) % 8 == 0); + ASSERT(sizeof(struct cell_command_fragment_ops) % 8 == 0); + ASSERT(((unsigned long) &spu.fragment_program_code) % 8 == 0); one_time_init(); + spu_command_init(); - if (Debug) - printf("SPU: main() speid=%lu\n", (unsigned long) speid); + D_PRINTF(CELL_DEBUG_CMD, "main() speid=%lu\n", (unsigned long) speid); + D_PRINTF(CELL_DEBUG_FRAGMENT_OP_FALLBACK, "using fragment op fallback\n"); + /* get initialization data */ mfc_get(&spu.init, /* dest */ (unsigned int) argp, /* src */ sizeof(struct cell_init_info), /* bytes */ @@ -696,12 +100,18 @@ main(main_param_t speid, main_param_t argp) 0 /* rid */); wait_on_mask( 1 << tag ); + if (spu.init.id == 0) { + return_function_info(); + } + #if 0 if (spu.init.id==0) - spu_test_misc(); + spu_test_misc(spu.init.id); #endif - main_loop(); + command_loop(); + + spu_command_close(); return 0; } diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 2c7b625840..33767e7c51 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -36,9 +36,18 @@ #include "pipe/p_state.h" - -#define MAX_WIDTH 1024 -#define MAX_HEIGHT 1024 +#if DEBUG +/* These debug macros use the unusual construction ", ##__VA_ARGS__" + * which expands to the expected comma + args if variadic arguments + * are supplied, but swallows the comma if there are no variadic + * arguments (which avoids syntax errors that would otherwise occur). + */ +#define D_PRINTF(flag, format,...) \ + if (spu.init.debug_flags & (flag)) \ + printf("SPU %u: " format, spu.init.id, ##__VA_ARGS__) +#else +#define D_PRINTF(...) +#endif /** @@ -61,8 +70,11 @@ typedef union { /** Function for sampling textures */ -typedef vector float (*spu_sample_texture_func)(uint unit, - vector float texcoord); +typedef void (*spu_sample_texture_2d_func)(vector float s, + vector float t, + uint unit, uint level, uint face, + vector float colors[4]); + /** Function for performing per-fragment ops */ typedef void (*spu_fragment_ops_func)(uint x, uint y, @@ -76,9 +88,9 @@ typedef void (*spu_fragment_ops_func)(uint x, uint y, vector unsigned int mask); /** Function for running fragment program */ -typedef void (*spu_fragment_program_func)(vector float *inputs, - vector float *outputs, - vector float *constants); +typedef vector unsigned int (*spu_fragment_program_func)(vector float *inputs, + vector float *outputs, + vector float *constants); struct spu_framebuffer @@ -98,15 +110,27 @@ struct spu_framebuffer } ALIGN16_ATTRIB; -struct spu_texture +/** per-texture level info */ +struct spu_texture_level { void *start; - ushort width, height; + ushort width, height, depth; ushort tiles_per_row; - vector float tex_size; - vector unsigned int tex_size_mask; /**< == int(size - 1) */ - vector unsigned int tex_size_x_mask; /**< == int(size - 1) */ - vector unsigned int tex_size_y_mask; /**< == int(size - 1) */ + uint bytes_per_image; + /** texcoord scale factors */ + vector float scale_s, scale_t, scale_r; + /** texcoord masks (if REPEAT then size-1, else ~0) */ + vector signed int mask_s, mask_t, mask_r; + /** texcoord clamp limits */ + vector signed int max_s, max_t, max_r; +} ALIGN16_ATTRIB; + + +struct spu_texture +{ + struct spu_texture_level level[CELL_MAX_TEXTURE_LEVELS]; + uint max_level; + uint target; /**< PIPE_TEXTURE_x */ } ALIGN16_ATTRIB; @@ -124,7 +148,9 @@ struct spu_global struct spu_framebuffer fb; struct pipe_depth_stencil_alpha_state depth_stencil_alpha; struct pipe_blend_state blend; + struct pipe_blend_color blend_color; struct pipe_sampler_state sampler[PIPE_MAX_SAMPLERS]; + struct pipe_rasterizer_state rasterizer; struct spu_texture texture[PIPE_MAX_SAMPLERS]; struct vertex_info vertex_info; @@ -133,39 +159,38 @@ struct spu_global tile_t ztile ALIGN16_ATTRIB; /** Read depth/stencil tiles? */ - boolean read_depth; - boolean read_stencil; + boolean read_depth_stencil; /** Current tiles' status */ ubyte cur_ctile_status, cur_ztile_status; /** Status of all tiles in framebuffer */ - ubyte ctile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; - ubyte ztile_status[MAX_HEIGHT/TILE_SIZE][MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; + ubyte ctile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; + ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; - /** Current fragment ops machine code */ - uint fragment_ops_code[SPU_MAX_FRAGMENT_OPS_INSTS]; - /** Current fragment ops function */ - spu_fragment_ops_func fragment_ops; + /** Current fragment ops machine code, at 8-byte boundary */ + uint *fragment_ops_code; + uint fragment_ops_code_size; + /** Current fragment ops functions, 0 = frontfacing, 1 = backfacing */ + spu_fragment_ops_func fragment_ops[2]; - /** Current fragment program machine code */ - uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS]; + /** Current fragment program machine code, at 8-byte boundary */ + uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS] ALIGN8_ATTRIB; /** Current fragment ops function */ spu_fragment_program_func fragment_program; /** Current texture sampler function */ - spu_sample_texture_func sample_texture[CELL_MAX_SAMPLERS]; + spu_sample_texture_2d_func sample_texture_2d[CELL_MAX_SAMPLERS]; + spu_sample_texture_2d_func min_sample_texture_2d[CELL_MAX_SAMPLERS]; + spu_sample_texture_2d_func mag_sample_texture_2d[CELL_MAX_SAMPLERS]; - /** Fragment program constants (XXX preliminary/used) */ -#define MAX_CONSTANTS 32 - vector float constants[MAX_CONSTANTS]; + /** Fragment program constants */ + vector float constants[4 * CELL_MAX_CONSTANTS]; } ALIGN16_ATTRIB; extern struct spu_global spu; -extern boolean Debug; - @@ -184,7 +209,7 @@ extern boolean Debug; #define TAG_DCACHE1 21 #define TAG_DCACHE2 22 #define TAG_DCACHE3 23 - +#define TAG_FENCE 24 static INLINE void diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c index 03dd547845..eba9f95cf1 100644 --- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c @@ -40,6 +40,24 @@ #define LINEAR_QUAD_LAYOUT 1 +static INLINE vector float +spu_min(vector float a, vector float b) +{ + vector unsigned int m; + m = spu_cmpgt(a, b); /* m = a > b ? ~0 : 0 */ + return spu_sel(a, b, m); +} + + +static INLINE vector float +spu_max(vector float a, vector float b) +{ + vector unsigned int m; + m = spu_cmpgt(a, b); /* m = a > b ? ~0 : 0 */ + return spu_sel(b, a, m); +} + + /** * Called by rasterizer for each quad after the shader has run. Do * all the per-fragment operations including alpha test, z test, @@ -60,11 +78,14 @@ spu_fallback_fragment_ops(uint x, uint y, vector unsigned int mask) { vector float frag_aos[4]; - unsigned int c0, c1, c2, c3; + unsigned int fbc0, fbc1, fbc2, fbc3 ; /* framebuffer/tile colors */ + unsigned int fragc0, fragc1, fragc2, fragc3; /* fragment colors */ - /* do alpha test */ + /* + * Do alpha test + */ if (spu.depth_stencil_alpha.alpha.enabled) { - vector float ref = spu_splats(spu.depth_stencil_alpha.alpha.ref); + vector float ref = spu_splats(spu.depth_stencil_alpha.alpha.ref_value); vector unsigned int amask; switch (spu.depth_stencil_alpha.alpha.func) { @@ -102,7 +123,10 @@ spu_fallback_fragment_ops(uint x, uint y, mask = spu_and(mask, amask); } - /* Z and/or stencil testing... */ + + /* + * Z and/or stencil testing... + */ if (spu.depth_stencil_alpha.depth.enabled || spu.depth_stencil_alpha.stencil[0].enabled) { @@ -178,6 +202,32 @@ spu_fallback_fragment_ops(uint x, uint y, } } + + /* + * If we'll need the current framebuffer/tile colors for blending + * or logicop or colormask, fetch them now. + */ + if (spu.blend.blend_enable || + spu.blend.logicop_enable || + spu.blend.colormask != 0xf) { + +#if LINEAR_QUAD_LAYOUT /* See comments/diagram below */ + fbc0 = colorTile->ui[y][x*2+0]; + fbc1 = colorTile->ui[y][x*2+1]; + fbc2 = colorTile->ui[y][x*2+2]; + fbc3 = colorTile->ui[y][x*2+3]; +#else + fbc0 = colorTile->ui[y+0][x+0]; + fbc1 = colorTile->ui[y+0][x+1]; + fbc2 = colorTile->ui[y+1][x+0]; + fbc3 = colorTile->ui[y+1][x+1]; +#endif + } + + + /* + * Do blending + */ if (spu.blend.blend_enable) { /* blending terms, misc regs */ vector float term1r, term1g, term1b, term1a; @@ -186,43 +236,30 @@ spu_fallback_fragment_ops(uint x, uint y, vector float fbRGBA[4]; /* current framebuffer colors */ - /* get colors from framebuffer/tile */ + /* convert framebuffer colors from packed int to vector float */ { - vector float fc[4]; - uint c0, c1, c2, c3; - -#if LINEAR_QUAD_LAYOUT /* See comments/diagram below */ - c0 = colorTile->ui[y][x*2+0]; - c1 = colorTile->ui[y][x*2+1]; - c2 = colorTile->ui[y][x*2+2]; - c3 = colorTile->ui[y][x*2+3]; -#else - c0 = colorTile->ui[y+0][x+0]; - c1 = colorTile->ui[y+0][x+1]; - c2 = colorTile->ui[y+1][x+0]; - c3 = colorTile->ui[y+1][x+1]; -#endif + vector float temp[4]; /* float colors in AOS form */ switch (spu.fb.color_format) { case PIPE_FORMAT_B8G8R8A8_UNORM: - fc[0] = spu_unpack_B8G8R8A8(c0); - fc[1] = spu_unpack_B8G8R8A8(c1); - fc[2] = spu_unpack_B8G8R8A8(c2); - fc[3] = spu_unpack_B8G8R8A8(c3); + temp[0] = spu_unpack_B8G8R8A8(fbc0); + temp[1] = spu_unpack_B8G8R8A8(fbc1); + temp[2] = spu_unpack_B8G8R8A8(fbc2); + temp[3] = spu_unpack_B8G8R8A8(fbc3); break; case PIPE_FORMAT_A8R8G8B8_UNORM: - fc[0] = spu_unpack_A8R8G8B8(c0); - fc[1] = spu_unpack_A8R8G8B8(c1); - fc[2] = spu_unpack_A8R8G8B8(c2); - fc[3] = spu_unpack_A8R8G8B8(c3); + temp[0] = spu_unpack_A8R8G8B8(fbc0); + temp[1] = spu_unpack_A8R8G8B8(fbc1); + temp[2] = spu_unpack_A8R8G8B8(fbc2); + temp[3] = spu_unpack_A8R8G8B8(fbc3); break; default: ASSERT(0); } - _transpose_matrix4x4(fbRGBA, fc); + _transpose_matrix4x4(fbRGBA, temp); /* fbRGBA = transpose(temp) */ } /* - * Compute Src RGB terms + * Compute Src RGB terms (fragment color * factor) */ switch (spu.blend.rgb_src_factor) { case PIPE_BLENDFACTOR_ONE: @@ -245,13 +282,33 @@ spu_fallback_fragment_ops(uint x, uint y, term1g = spu_mul(fragG, fragA); term1b = spu_mul(fragB, fragA); break; + case PIPE_BLENDFACTOR_DST_COLOR: + term1r = spu_mul(fragR, fbRGBA[0]); + term1g = spu_mul(fragG, fbRGBA[1]); + term1b = spu_mul(fragB, fbRGBA[1]); + break; + case PIPE_BLENDFACTOR_DST_ALPHA: + term1r = spu_mul(fragR, fbRGBA[3]); + term1g = spu_mul(fragG, fbRGBA[3]); + term1b = spu_mul(fragB, fbRGBA[3]); + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + term1r = spu_mul(fragR, spu_splats(spu.blend_color.color[0])); + term1g = spu_mul(fragG, spu_splats(spu.blend_color.color[1])); + term1b = spu_mul(fragB, spu_splats(spu.blend_color.color[2])); + break; + case PIPE_BLENDFACTOR_CONST_ALPHA: + term1r = spu_mul(fragR, spu_splats(spu.blend_color.color[3])); + term1g = spu_mul(fragG, spu_splats(spu.blend_color.color[3])); + term1b = spu_mul(fragB, spu_splats(spu.blend_color.color[3])); + break; /* XXX more cases */ default: ASSERT(0); } /* - * Compute Src Alpha term + * Compute Src Alpha term (fragment alpha * factor) */ switch (spu.blend.alpha_src_factor) { case PIPE_BLENDFACTOR_ONE: @@ -263,19 +320,29 @@ spu_fallback_fragment_ops(uint x, uint y, case PIPE_BLENDFACTOR_SRC_ALPHA: term1a = spu_mul(fragA, fragA); break; + case PIPE_BLENDFACTOR_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_DST_ALPHA: + term1a = spu_mul(fragA, fbRGBA[3]); + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_CONST_ALPHA: + term1a = spu_mul(fragR, spu_splats(spu.blend_color.color[3])); + break; /* XXX more cases */ default: ASSERT(0); } /* - * Compute Dest RGB terms + * Compute Dest RGB terms (framebuffer color * factor) */ switch (spu.blend.rgb_dst_factor) { case PIPE_BLENDFACTOR_ONE: - term2r = fragR; - term2g = fragG; - term2b = fragB; + term2r = fbRGBA[0]; + term2g = fbRGBA[1]; + term2b = fbRGBA[2]; break; case PIPE_BLENDFACTOR_ZERO: term2r = @@ -299,17 +366,37 @@ spu_fallback_fragment_ops(uint x, uint y, term2g = spu_mul(fbRGBA[1], tmp); term2b = spu_mul(fbRGBA[2], tmp); break; - /* XXX more cases */ + case PIPE_BLENDFACTOR_DST_COLOR: + term2r = spu_mul(fbRGBA[0], fbRGBA[0]); + term2g = spu_mul(fbRGBA[1], fbRGBA[1]); + term2b = spu_mul(fbRGBA[2], fbRGBA[2]); + break; + case PIPE_BLENDFACTOR_DST_ALPHA: + term2r = spu_mul(fbRGBA[0], fbRGBA[3]); + term2g = spu_mul(fbRGBA[1], fbRGBA[3]); + term2b = spu_mul(fbRGBA[2], fbRGBA[3]); + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + term2r = spu_mul(fbRGBA[0], spu_splats(spu.blend_color.color[0])); + term2g = spu_mul(fbRGBA[1], spu_splats(spu.blend_color.color[1])); + term2b = spu_mul(fbRGBA[2], spu_splats(spu.blend_color.color[2])); + break; + case PIPE_BLENDFACTOR_CONST_ALPHA: + term2r = spu_mul(fbRGBA[0], spu_splats(spu.blend_color.color[3])); + term2g = spu_mul(fbRGBA[1], spu_splats(spu.blend_color.color[3])); + term2b = spu_mul(fbRGBA[2], spu_splats(spu.blend_color.color[3])); + break; + /* XXX more cases */ default: ASSERT(0); } /* - * Compute Dest Alpha term + * Compute Dest Alpha term (framebuffer alpha * factor) */ switch (spu.blend.alpha_dst_factor) { case PIPE_BLENDFACTOR_ONE: - term2a = fragA; + term2a = fbRGBA[3]; break; case PIPE_BLENDFACTOR_SRC_COLOR: term2a = spu_splats(0.0f); @@ -322,6 +409,16 @@ spu_fallback_fragment_ops(uint x, uint y, tmp = spu_sub(one, fragA); term2a = spu_mul(fbRGBA[3], tmp); break; + case PIPE_BLENDFACTOR_DST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_DST_ALPHA: + term2a = spu_mul(fbRGBA[3], fbRGBA[3]); + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + /* fall-through */ + case PIPE_BLENDFACTOR_CONST_ALPHA: + term2a = spu_mul(fbRGBA[3], spu_splats(spu.blend_color.color[3])); + break; /* XXX more cases */ default: ASSERT(0); @@ -341,7 +438,21 @@ spu_fallback_fragment_ops(uint x, uint y, fragG = spu_sub(term1g, term2g); fragB = spu_sub(term1b, term2b); break; - /* XXX more cases */ + case PIPE_BLEND_REVERSE_SUBTRACT: + fragR = spu_sub(term2r, term1r); + fragG = spu_sub(term2g, term1g); + fragB = spu_sub(term2b, term1b); + break; + case PIPE_BLEND_MIN: + fragR = spu_min(term1r, term2r); + fragG = spu_min(term1g, term2g); + fragB = spu_min(term1b, term2b); + break; + case PIPE_BLEND_MAX: + fragR = spu_max(term1r, term2r); + fragG = spu_max(term1g, term2g); + fragB = spu_max(term1b, term2b); + break; default: ASSERT(0); } @@ -356,7 +467,15 @@ spu_fallback_fragment_ops(uint x, uint y, case PIPE_BLEND_SUBTRACT: fragA = spu_sub(term1a, term2a); break; - /* XXX more cases */ + case PIPE_BLEND_REVERSE_SUBTRACT: + fragA = spu_sub(term2a, term1a); + break; + case PIPE_BLEND_MIN: + fragA = spu_min(term1a, term2a); + break; + case PIPE_BLEND_MAX: + fragA = spu_max(term1a, term2a); + break; default: ASSERT(0); } @@ -384,21 +503,20 @@ spu_fallback_fragment_ops(uint x, uint y, #endif /* - * Pack float colors into 32-bit RGBA words. + * Pack fragment float colors into 32-bit RGBA words. */ switch (spu.fb.color_format) { case PIPE_FORMAT_A8R8G8B8_UNORM: - c0 = spu_pack_A8R8G8B8(frag_aos[0]); - c1 = spu_pack_A8R8G8B8(frag_aos[1]); - c2 = spu_pack_A8R8G8B8(frag_aos[2]); - c3 = spu_pack_A8R8G8B8(frag_aos[3]); + fragc0 = spu_pack_A8R8G8B8(frag_aos[0]); + fragc1 = spu_pack_A8R8G8B8(frag_aos[1]); + fragc2 = spu_pack_A8R8G8B8(frag_aos[2]); + fragc3 = spu_pack_A8R8G8B8(frag_aos[3]); break; - case PIPE_FORMAT_B8G8R8A8_UNORM: - c0 = spu_pack_B8G8R8A8(frag_aos[0]); - c1 = spu_pack_B8G8R8A8(frag_aos[1]); - c2 = spu_pack_B8G8R8A8(frag_aos[2]); - c3 = spu_pack_B8G8R8A8(frag_aos[3]); + fragc0 = spu_pack_B8G8R8A8(frag_aos[0]); + fragc1 = spu_pack_B8G8R8A8(frag_aos[1]); + fragc2 = spu_pack_B8G8R8A8(frag_aos[2]); + fragc3 = spu_pack_B8G8R8A8(frag_aos[3]); break; default: fprintf(stderr, "SPU: Bad pixel format in spu_default_fragment_ops\n"); @@ -407,20 +525,57 @@ spu_fallback_fragment_ops(uint x, uint y, /* - * Color masking + * Do color masking */ if (spu.blend.colormask != 0xf) { - /* XXX to do */ - /* apply color mask to 32-bit packed colors */ + uint cmask = 0x0; /* each byte corresponds to a color channel */ + + /* Form bitmask depending on color buffer format and colormask bits */ + switch (spu.fb.color_format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + if (spu.blend.colormask & PIPE_MASK_R) + cmask |= 0x00ff0000; /* red */ + if (spu.blend.colormask & PIPE_MASK_G) + cmask |= 0x0000ff00; /* green */ + if (spu.blend.colormask & PIPE_MASK_B) + cmask |= 0x000000ff; /* blue */ + if (spu.blend.colormask & PIPE_MASK_A) + cmask |= 0xff000000; /* alpha */ + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + if (spu.blend.colormask & PIPE_MASK_R) + cmask |= 0x0000ff00; /* red */ + if (spu.blend.colormask & PIPE_MASK_G) + cmask |= 0x00ff0000; /* green */ + if (spu.blend.colormask & PIPE_MASK_B) + cmask |= 0xff000000; /* blue */ + if (spu.blend.colormask & PIPE_MASK_A) + cmask |= 0x000000ff; /* alpha */ + break; + default: + ASSERT(0); + } + + /* + * Apply color mask to the 32-bit packed colors. + * if (cmask[i]) + * frag color[i] = frag color[i]; + * else + * frag color[i] = framebuffer color[i]; + */ + fragc0 = (fragc0 & cmask) | (fbc0 & ~cmask); + fragc1 = (fragc1 & cmask) | (fbc1 & ~cmask); + fragc2 = (fragc2 & cmask) | (fbc2 & ~cmask); + fragc3 = (fragc3 & cmask) | (fbc3 & ~cmask); } /* - * Logic Ops + * Do logic ops */ if (spu.blend.logicop_enable) { /* XXX to do */ - /* apply logicop to 32-bit packed colors */ + /* apply logicop to 32-bit packed colors (fragcx and fbcx) */ } @@ -431,45 +586,46 @@ spu_fallback_fragment_ops(uint x, uint y, spu.cur_ctile_status = TILE_STATUS_DIRTY; } else { + /* write no fragments */ return; } /* - * Write new quad colors to the framebuffer/tile. + * Write new fragment/quad colors to the framebuffer/tile. * Only write pixels where the corresponding mask word is set. */ #if LINEAR_QUAD_LAYOUT /* * Quad layout: * +--+--+--+--+ - * |p0|p1|p2|p3| + * |p0|p1|p2|p3|... * +--+--+--+--+ */ if (spu_extract(mask, 0)) - colorTile->ui[y][x*2] = c0; + colorTile->ui[y][x*2] = fragc0; if (spu_extract(mask, 1)) - colorTile->ui[y][x*2+1] = c1; + colorTile->ui[y][x*2+1] = fragc1; if (spu_extract(mask, 2)) - colorTile->ui[y][x*2+2] = c2; + colorTile->ui[y][x*2+2] = fragc2; if (spu_extract(mask, 3)) - colorTile->ui[y][x*2+3] = c3; + colorTile->ui[y][x*2+3] = fragc3; #else /* * Quad layout: * +--+--+ - * |p0|p1| + * |p0|p1|... * +--+--+ - * |p2|p3| + * |p2|p3|... * +--+--+ */ if (spu_extract(mask, 0)) - colorTile->ui[y+0][x+0] = c0; + colorTile->ui[y+0][x+0] = fragc0; if (spu_extract(mask, 1)) - colorTile->ui[y+0][x+1] = c1; + colorTile->ui[y+0][x+1] = fragc1; if (spu_extract(mask, 2)) - colorTile->ui[y+1][x+0] = c2; + colorTile->ui[y+1][x+0] = fragc2; if (spu_extract(mask, 3)) - colorTile->ui[y+1][x+1] = c3; + colorTile->ui[y+1][x+1] = fragc3; #endif } diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c index 305dc98881..7c225e2f27 100644 --- a/src/gallium/drivers/cell/spu/spu_render.c +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -98,7 +98,7 @@ my_tile(uint tx, uint ty) static INLINE void get_cz_tiles(uint tx, uint ty) { - if (spu.read_depth) { + if (spu.read_depth_stencil) { if (spu.cur_ztile_status != TILE_STATUS_CLEAR) { //printf("SPU %u: getting Z tile %u, %u\n", spu.init.id, tx, ty); get_tile(tx, ty, &spu.ztile, TAG_READ_TILE_Z, 1); @@ -153,7 +153,7 @@ static INLINE void wait_put_cz_tiles(void) { wait_on_mask(1 << TAG_WRITE_TILE_COLOR); - if (spu.read_depth) { + if (spu.read_depth_stencil) { wait_on_mask(1 << TAG_WRITE_TILE_Z); } } @@ -175,22 +175,14 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) const ubyte *vertices; const ushort *indexes; uint i, j; + uint num_tiles; - - if (Debug) { - printf("SPU %u: RENDER prim %u, num_vert=%u num_ind=%u " - "inline_vert=%u\n", - spu.init.id, - render->prim_type, - render->num_verts, - render->num_indexes, - render->inline_verts); - - /* - printf(" bound: %g, %g .. %g, %g\n", - render->xmin, render->ymin, render->xmax, render->ymax); - */ - } + D_PRINTF(CELL_DEBUG_CMD, + "RENDER prim=%u num_vert=%u num_ind=%u inline_vert=%u\n", + render->prim_type, + render->num_verts, + render->num_indexes, + render->inline_verts); ASSERT(sizeof(*render) % 4 == 0); ASSERT(total_vertex_bytes % 16 == 0); @@ -251,6 +243,8 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) wait_on_mask(1 << TAG_SURFACE_CLEAR); /* XXX temporary */ + num_tiles = 0; + /** ** loop over tiles, rendering tris **/ @@ -264,6 +258,8 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) if (!my_tile(tx, ty)) continue; + num_tiles++; + spu.cur_ctile_status = spu.ctile_status[ty][tx]; spu.cur_ztile_status = spu.ztile_status[ty][tx]; @@ -293,9 +289,7 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) spu.ztile_status[ty][tx] = spu.cur_ztile_status; } - if (Debug) - printf("SPU %u: RENDER done\n", - spu.init.id); + D_PRINTF(CELL_DEBUG_CMD, + "RENDER done (%u tiles hit)\n", + num_tiles); } - - diff --git a/src/gallium/drivers/cell/spu/spu_shuffle.h b/src/gallium/drivers/cell/spu/spu_shuffle.h new file mode 100644 index 0000000000..74f2a0b6d2 --- /dev/null +++ b/src/gallium/drivers/cell/spu/spu_shuffle.h @@ -0,0 +1,186 @@ +#ifndef SPU_SHUFFLE_H +#define SPU_SHUFFLE_H + +/* + * Generate shuffle patterns with minimal fuss. + * + * Based on ideas from + * http://www.insomniacgames.com/tech/articles/0408/files/shuffles.pdf + * + * A-P indicates 0-15th position in first vector + * a-p indicates 0-15th position in second vector + * + * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ + * |00|01|02|03|04|05|06|07|08|09|0a|0b|0c|0d|0e|0f| + * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ + * | A| B| C| D| + * +-----+-----+-----+-----+-----+-----+-----+-----+ + * | A| B| C| D| E| F| G| H| + * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ + * | A| B| C| D| E| F| G| H| I| J| K| L| M| N| O| P| + * +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ + * + * x or X indicates 0xff + * 8 indicates 0x80 + * 0 indicates 0x00 + * + * The macros SHUFFLE4() SHUFFLE8() and SHUFFLE16() provide a const vector + * unsigned char literal suitable for use with spu_shuffle(). + * + * The macros SHUFB4() SHUFB8() and SHUFB16() provide a const qword vector + * literal suitable for use with si_shufb(). + * + * + * For example : + * SHUFB4(A,A,A,A) + * expands to : + * ((const qword){0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3}) + * + * SHUFFLE8(A,B,a,b,C,c,8,8) + * expands to : + * ((const vector unsigned char){0x00,0x01,0x02,0x03,0x10,0x11,0x12,0x13, + * 0x04,0x05,0x14,0x15,0xe0,0xe0,0xe0,0xe0}) + * + */ + +#include <spu_intrinsics.h> + +#define SHUFFLE_PATTERN_4_A__ 0x00, 0x01, 0x02, 0x03 +#define SHUFFLE_PATTERN_4_B__ 0x04, 0x05, 0x06, 0x07 +#define SHUFFLE_PATTERN_4_C__ 0x08, 0x09, 0x0a, 0x0b +#define SHUFFLE_PATTERN_4_D__ 0x0c, 0x0d, 0x0e, 0x0f +#define SHUFFLE_PATTERN_4_a__ 0x10, 0x11, 0x12, 0x13 +#define SHUFFLE_PATTERN_4_b__ 0x14, 0x15, 0x16, 0x17 +#define SHUFFLE_PATTERN_4_c__ 0x18, 0x19, 0x1a, 0x1b +#define SHUFFLE_PATTERN_4_d__ 0x1c, 0x1d, 0x1e, 0x1f +#define SHUFFLE_PATTERN_4_X__ 0xc0, 0xc0, 0xc0, 0xc0 +#define SHUFFLE_PATTERN_4_x__ 0xc0, 0xc0, 0xc0, 0xc0 +#define SHUFFLE_PATTERN_4_0__ 0x80, 0x80, 0x80, 0x80 +#define SHUFFLE_PATTERN_4_8__ 0xe0, 0xe0, 0xe0, 0xe0 + +#define SHUFFLE_VECTOR_4__(A, B, C, D) \ + SHUFFLE_PATTERN_4_##A##__, \ + SHUFFLE_PATTERN_4_##B##__, \ + SHUFFLE_PATTERN_4_##C##__, \ + SHUFFLE_PATTERN_4_##D##__ + +#define SHUFFLE4(A, B, C, D) \ + ((const vector unsigned char){ \ + SHUFFLE_VECTOR_4__(A, B, C, D) \ + }) + +#define SHUFB4(A, B, C, D) \ + ((const qword){ \ + SHUFFLE_VECTOR_4__(A, B, C, D) \ + }) + + +#define SHUFFLE_PATTERN_8_A__ 0x00, 0x01 +#define SHUFFLE_PATTERN_8_B__ 0x02, 0x03 +#define SHUFFLE_PATTERN_8_C__ 0x04, 0x05 +#define SHUFFLE_PATTERN_8_D__ 0x06, 0x07 +#define SHUFFLE_PATTERN_8_E__ 0x08, 0x09 +#define SHUFFLE_PATTERN_8_F__ 0x0a, 0x0b +#define SHUFFLE_PATTERN_8_G__ 0x0c, 0x0d +#define SHUFFLE_PATTERN_8_H__ 0x0e, 0x0f +#define SHUFFLE_PATTERN_8_a__ 0x10, 0x11 +#define SHUFFLE_PATTERN_8_b__ 0x12, 0x13 +#define SHUFFLE_PATTERN_8_c__ 0x14, 0x15 +#define SHUFFLE_PATTERN_8_d__ 0x16, 0x17 +#define SHUFFLE_PATTERN_8_e__ 0x18, 0x19 +#define SHUFFLE_PATTERN_8_f__ 0x1a, 0x1b +#define SHUFFLE_PATTERN_8_g__ 0x1c, 0x1d +#define SHUFFLE_PATTERN_8_h__ 0x1e, 0x1f +#define SHUFFLE_PATTERN_8_X__ 0xc0, 0xc0 +#define SHUFFLE_PATTERN_8_x__ 0xc0, 0xc0 +#define SHUFFLE_PATTERN_8_0__ 0x80, 0x80 +#define SHUFFLE_PATTERN_8_8__ 0xe0, 0xe0 + + +#define SHUFFLE_VECTOR_8__(A, B, C, D, E, F, G, H) \ + SHUFFLE_PATTERN_8_##A##__, \ + SHUFFLE_PATTERN_8_##B##__, \ + SHUFFLE_PATTERN_8_##C##__, \ + SHUFFLE_PATTERN_8_##D##__, \ + SHUFFLE_PATTERN_8_##E##__, \ + SHUFFLE_PATTERN_8_##F##__, \ + SHUFFLE_PATTERN_8_##G##__, \ + SHUFFLE_PATTERN_8_##H##__ + +#define SHUFFLE8(A, B, C, D, E, F, G, H) \ + ((const vector unsigned char){ \ + SHUFFLE_VECTOR_8__(A, B, C, D, E, F, G, H) \ + }) + +#define SHUFB8(A, B, C, D, E, F, G, H) \ + ((const qword){ \ + SHUFFLE_VECTOR_8__(A, B, C, D, E, F, G, H) \ + }) + + +#define SHUFFLE_PATTERN_16_A__ 0x00 +#define SHUFFLE_PATTERN_16_B__ 0x01 +#define SHUFFLE_PATTERN_16_C__ 0x02 +#define SHUFFLE_PATTERN_16_D__ 0x03 +#define SHUFFLE_PATTERN_16_E__ 0x04 +#define SHUFFLE_PATTERN_16_F__ 0x05 +#define SHUFFLE_PATTERN_16_G__ 0x06 +#define SHUFFLE_PATTERN_16_H__ 0x07 +#define SHUFFLE_PATTERN_16_I__ 0x08 +#define SHUFFLE_PATTERN_16_J__ 0x09 +#define SHUFFLE_PATTERN_16_K__ 0x0a +#define SHUFFLE_PATTERN_16_L__ 0x0b +#define SHUFFLE_PATTERN_16_M__ 0x0c +#define SHUFFLE_PATTERN_16_N__ 0x0d +#define SHUFFLE_PATTERN_16_O__ 0x0e +#define SHUFFLE_PATTERN_16_P__ 0x0f +#define SHUFFLE_PATTERN_16_a__ 0x10 +#define SHUFFLE_PATTERN_16_b__ 0x11 +#define SHUFFLE_PATTERN_16_c__ 0x12 +#define SHUFFLE_PATTERN_16_d__ 0x13 +#define SHUFFLE_PATTERN_16_e__ 0x14 +#define SHUFFLE_PATTERN_16_f__ 0x15 +#define SHUFFLE_PATTERN_16_g__ 0x16 +#define SHUFFLE_PATTERN_16_h__ 0x17 +#define SHUFFLE_PATTERN_16_i__ 0x18 +#define SHUFFLE_PATTERN_16_j__ 0x19 +#define SHUFFLE_PATTERN_16_k__ 0x1a +#define SHUFFLE_PATTERN_16_l__ 0x1b +#define SHUFFLE_PATTERN_16_m__ 0x1c +#define SHUFFLE_PATTERN_16_n__ 0x1d +#define SHUFFLE_PATTERN_16_o__ 0x1e +#define SHUFFLE_PATTERN_16_p__ 0x1f +#define SHUFFLE_PATTERN_16_X__ 0xc0 +#define SHUFFLE_PATTERN_16_x__ 0xc0 +#define SHUFFLE_PATTERN_16_0__ 0x80 +#define SHUFFLE_PATTERN_16_8__ 0xe0 + +#define SHUFFLE_VECTOR_16__(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \ + SHUFFLE_PATTERN_16_##A##__, \ + SHUFFLE_PATTERN_16_##B##__, \ + SHUFFLE_PATTERN_16_##C##__, \ + SHUFFLE_PATTERN_16_##D##__, \ + SHUFFLE_PATTERN_16_##E##__, \ + SHUFFLE_PATTERN_16_##F##__, \ + SHUFFLE_PATTERN_16_##G##__, \ + SHUFFLE_PATTERN_16_##H##__, \ + SHUFFLE_PATTERN_16_##I##__, \ + SHUFFLE_PATTERN_16_##J##__, \ + SHUFFLE_PATTERN_16_##K##__, \ + SHUFFLE_PATTERN_16_##L##__, \ + SHUFFLE_PATTERN_16_##M##__, \ + SHUFFLE_PATTERN_16_##N##__, \ + SHUFFLE_PATTERN_16_##O##__, \ + SHUFFLE_PATTERN_16_##P##__ + +#define SHUFFLE16(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \ + ((const vector unsigned char){ \ + SHUFFLE_VECTOR_16__(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \ + }) + +#define SHUFB16(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \ + ((const qword){ \ + SHUFFLE_VECTOR_16__(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P) \ + }) + +#endif diff --git a/src/gallium/drivers/cell/spu/spu_texture.c b/src/gallium/drivers/cell/spu/spu_texture.c index 117b8a36f8..69784c8978 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.c +++ b/src/gallium/drivers/cell/spu/spu_texture.c @@ -26,6 +26,8 @@ **************************************************************************/ +#include <math.h> + #include "pipe/p_compiler.h" #include "spu_main.h" #include "spu_texture.h" @@ -40,37 +42,19 @@ void invalidate_tex_cache(void) { - uint unit = 0; - uint bytes = 4 * spu.texture[unit].width - * spu.texture[unit].height; - - spu_dcache_mark_dirty((unsigned) spu.texture[unit].start, bytes); -} + uint lvl; + for (lvl = 0; lvl < CELL_MAX_TEXTURE_LEVELS; lvl++) { + uint unit = 0; + uint bytes = 4 * spu.texture[unit].level[lvl].width + * spu.texture[unit].level[lvl].height; + if (spu.texture[unit].target == PIPE_TEXTURE_CUBE) + bytes *= 6; + else if (spu.texture[unit].target == PIPE_TEXTURE_3D) + bytes *= spu.texture[unit].level[lvl].depth; -/** - * XXX look into getting texels for all four pixels in a quad at once. - */ -static uint -get_texel(uint unit, vec_uint4 coordinate) -{ - /* - * XXX we could do the "/ TILE_SIZE" and "% TILE_SIZE" operations as - * SIMD since X and Y are already in a SIMD register. - */ - const unsigned texture_ea = (uintptr_t) spu.texture[unit].start; - ushort x = spu_extract(coordinate, 0); - ushort y = spu_extract(coordinate, 1); - unsigned tile_offset = sizeof(tile_t) - * ((y / TILE_SIZE * spu.texture[unit].tiles_per_row) + (x / TILE_SIZE)); - ushort texel_offset = (ushort) 4 - * (ushort) (((ushort) (y % TILE_SIZE) * (ushort) TILE_SIZE) + (x % TILE_SIZE)); - vec_uint4 tmp; - - spu_dcache_fetch_unaligned((qword *) & tmp, - texture_ea + tile_offset + texel_offset, - 4); - return spu_extract(tmp, 0); + spu_dcache_mark_dirty((unsigned) spu.texture[unit].level[lvl].start, bytes); + } } @@ -88,15 +72,17 @@ get_texel(uint unit, vec_uint4 coordinate) * a time. */ static void -get_four_texels(uint unit, vec_uint4 x, vec_uint4 y, vec_uint4 *texels) +get_four_texels(const struct spu_texture_level *tlevel, uint face, + vec_int4 x, vec_int4 y, + vec_uint4 *texels) { - const unsigned texture_ea = (uintptr_t) spu.texture[unit].start; - vec_uint4 tile_x = spu_rlmask(x, -5); - vec_uint4 tile_y = spu_rlmask(y, -5); - const qword offset_x = si_andi((qword) x, 0x1f); - const qword offset_y = si_andi((qword) y, 0x1f); + unsigned texture_ea = (uintptr_t) tlevel->start; + const vec_int4 tile_x = spu_rlmask(x, -5); /* tile_x = x / 32 */ + const vec_int4 tile_y = spu_rlmask(y, -5); /* tile_y = y / 32 */ + const qword offset_x = si_andi((qword) x, 0x1f); /* offset_x = x & 0x1f */ + const qword offset_y = si_andi((qword) y, 0x1f); /* offset_y = y & 0x1f */ - const qword tiles_per_row = (qword) spu_splats(spu.texture[unit].tiles_per_row); + const qword tiles_per_row = (qword) spu_splats(tlevel->tiles_per_row); const qword tile_size = (qword) spu_splats((unsigned) sizeof(tile_t)); qword tile_offset = si_mpya((qword) tile_y, tiles_per_row, (qword) tile_x); @@ -107,6 +93,8 @@ get_four_texels(uint unit, vec_uint4 x, vec_uint4 y, vec_uint4 *texels) vec_uint4 offset = (vec_uint4) si_a(tile_offset, texel_offset); + texture_ea = texture_ea + face * tlevel->bytes_per_image; + spu_dcache_fetch_unaligned((qword *) & texels[0], texture_ea + spu_extract(offset, 0), 4); spu_dcache_fetch_unaligned((qword *) & texels[1], @@ -118,83 +106,536 @@ get_four_texels(uint unit, vec_uint4 x, vec_uint4 y, vec_uint4 *texels) } +/** clamp vec to [0, max] */ +static INLINE vector signed int +spu_clamp(vector signed int vec, vector signed int max) +{ + static const vector signed int zero = {0,0,0,0}; + vector unsigned int c; + c = spu_cmpgt(vec, zero); /* c = vec > zero ? ~0 : 0 */ + vec = spu_sel(zero, vec, c); + c = spu_cmpgt(vec, max); /* c = vec > max ? ~0 : 0 */ + vec = spu_sel(vec, max, c); + return vec; +} + + + /** - * Get texture sample at texcoord. + * Do nearest texture sampling for four pixels. + * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa). */ -vector float -sample_texture_nearest(uint unit, vector float texcoord) +void +sample_texture_2d_nearest(vector float s, vector float t, + uint unit, uint level, uint face, + vector float colors[4]) { - vector float tc = spu_mul(texcoord, spu.texture[unit].tex_size); - vector unsigned int itc = spu_convtu(tc, 0); /* convert to int */ - itc = spu_and(itc, spu.texture[unit].tex_size_mask); /* mask (GL_REPEAT) */ - uint texel = get_texel(unit, itc); - return spu_unpack_A8R8G8B8(texel); + const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; + vector float ss = spu_mul(s, tlevel->scale_s); + vector float tt = spu_mul(t, tlevel->scale_t); + vector signed int is = spu_convts(ss, 0); + vector signed int it = spu_convts(tt, 0); + vec_uint4 texels[4]; + + /* PIPE_TEX_WRAP_REPEAT */ + is = spu_and(is, tlevel->mask_s); + it = spu_and(it, tlevel->mask_t); + + /* PIPE_TEX_WRAP_CLAMP */ + is = spu_clamp(is, tlevel->max_s); + it = spu_clamp(it, tlevel->max_t); + + get_four_texels(tlevel, face, is, it, texels); + + /* convert four packed ARGBA pixels to float RRRR,GGGG,BBBB,AAAA */ + spu_unpack_A8R8G8B8_transpose4(texels, colors); } -vector float -sample_texture_bilinear(uint unit, vector float texcoord) +/** + * Do bilinear texture sampling for four pixels. + * \param colors returned colors in SOA format (rrrr, gggg, bbbb, aaaa). + */ +void +sample_texture_2d_bilinear(vector float s, vector float t, + uint unit, uint level, uint face, + vector float colors[4]) { - static const vec_uint4 offset_x = {0, 0, 1, 1}; - static const vec_uint4 offset_y = {0, 1, 0, 1}; + const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; + static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f}; - vector float tc = spu_mul(texcoord, spu.texture[unit].tex_size); - tc = spu_add(tc, spu_splats(-0.5f)); /* half texel bias */ + vector float ss = spu_madd(s, tlevel->scale_s, half); + vector float tt = spu_madd(t, tlevel->scale_t, half); - /* integer texcoords S,T: */ - vec_uint4 itc = spu_convtu(tc, 0); /* convert to int */ + vector signed int is0 = spu_convts(ss, 0); + vector signed int it0 = spu_convts(tt, 0); - vec_uint4 texels[4]; - - /* setup texcoords for quad: - * +-----+-----+ - * |x0,y0|x1,y1| - * +-----+-----+ - * |x2,y2|x3,y3| - * +-----+-----+ - */ - vec_uint4 x = spu_splats(spu_extract(itc, 0)); - vec_uint4 y = spu_splats(spu_extract(itc, 1)); - x = spu_add(x, offset_x); - y = spu_add(y, offset_y); + /* is + 1, it + 1 */ + vector signed int is1 = spu_add(is0, 1); + vector signed int it1 = spu_add(it0, 1); - /* GL_REPEAT wrap mode: */ - x = spu_and(x, spu.texture[unit].tex_size_x_mask); - y = spu_and(y, spu.texture[unit].tex_size_y_mask); + /* PIPE_TEX_WRAP_REPEAT */ + is0 = spu_and(is0, tlevel->mask_s); + it0 = spu_and(it0, tlevel->mask_t); + is1 = spu_and(is1, tlevel->mask_s); + it1 = spu_and(it1, tlevel->mask_t); - get_four_texels(unit, x, y, texels); + /* PIPE_TEX_WRAP_CLAMP */ + is0 = spu_clamp(is0, tlevel->max_s); + it0 = spu_clamp(it0, tlevel->max_t); + is1 = spu_clamp(is1, tlevel->max_s); + it1 = spu_clamp(it1, tlevel->max_t); - /* integer A8R8G8B8 to float texel conversion */ - vector float texel00 = spu_unpack_A8R8G8B8(spu_extract(texels[0], 0)); - vector float texel01 = spu_unpack_A8R8G8B8(spu_extract(texels[1], 0)); - vector float texel10 = spu_unpack_A8R8G8B8(spu_extract(texels[2], 0)); - vector float texel11 = spu_unpack_A8R8G8B8(spu_extract(texels[3], 0)); + /* get packed int texels */ + vector unsigned int texels[16]; + get_four_texels(tlevel, face, is0, it0, texels + 0); /* upper-left */ + get_four_texels(tlevel, face, is1, it0, texels + 4); /* upper-right */ + get_four_texels(tlevel, face, is0, it1, texels + 8); /* lower-left */ + get_four_texels(tlevel, face, is1, it1, texels + 12); /* lower-right */ + /* convert packed int texels to float colors */ + vector float ftexels[16]; + spu_unpack_A8R8G8B8_transpose4(texels + 0, ftexels + 0); + spu_unpack_A8R8G8B8_transpose4(texels + 4, ftexels + 4); + spu_unpack_A8R8G8B8_transpose4(texels + 8, ftexels + 8); + spu_unpack_A8R8G8B8_transpose4(texels + 12, ftexels + 12); /* Compute weighting factors in [0,1] * Multiply texcoord by 1024, AND with 1023, convert back to float. */ - vector float tc1024 = spu_mul(tc, spu_splats(1024.0f)); - vector signed int itc1024 = spu_convts(tc1024, 0); - itc1024 = spu_and(itc1024, spu_splats((1 << 10) - 1)); - vector float weight = spu_convtf(itc1024, 10); - - /* smeared frac and 1-frac */ - vector float sfrac = spu_splats(spu_extract(weight, 0)); - vector float tfrac = spu_splats(spu_extract(weight, 1)); - vector float sfrac1 = spu_sub(spu_splats(1.0f), sfrac); - vector float tfrac1 = spu_sub(spu_splats(1.0f), tfrac); - - /* multiply the samples (colors) by the S/T weights */ - texel00 = spu_mul(spu_mul(texel00, sfrac1), tfrac1); - texel10 = spu_mul(spu_mul(texel10, sfrac ), tfrac1); - texel01 = spu_mul(spu_mul(texel01, sfrac1), tfrac ); - texel11 = spu_mul(spu_mul(texel11, sfrac ), tfrac ); - - /* compute sum of weighted samples */ - vector float texel_sum = spu_add(texel00, texel01); - texel_sum = spu_add(texel_sum, texel10); - texel_sum = spu_add(texel_sum, texel11); - - return texel_sum; + vector float ss1024 = spu_mul(ss, spu_splats(1024.0f)); + vector signed int iss1024 = spu_convts(ss1024, 0); + iss1024 = spu_and(iss1024, 1023); + vector float sWeights0 = spu_convtf(iss1024, 10); + + vector float tt1024 = spu_mul(tt, spu_splats(1024.0f)); + vector signed int itt1024 = spu_convts(tt1024, 0); + itt1024 = spu_and(itt1024, 1023); + vector float tWeights0 = spu_convtf(itt1024, 10); + + /* 1 - sWeight and 1 - tWeight */ + vector float sWeights1 = spu_sub(spu_splats(1.0f), sWeights0); + vector float tWeights1 = spu_sub(spu_splats(1.0f), tWeights0); + + /* reds, for four pixels */ + ftexels[ 0] = spu_mul(ftexels[ 0], spu_mul(sWeights1, tWeights1)); /*ul*/ + ftexels[ 4] = spu_mul(ftexels[ 4], spu_mul(sWeights0, tWeights1)); /*ur*/ + ftexels[ 8] = spu_mul(ftexels[ 8], spu_mul(sWeights1, tWeights0)); /*ll*/ + ftexels[12] = spu_mul(ftexels[12], spu_mul(sWeights0, tWeights0)); /*lr*/ + colors[0] = spu_add(spu_add(ftexels[0], ftexels[4]), + spu_add(ftexels[8], ftexels[12])); + + /* greens, for four pixels */ + ftexels[ 1] = spu_mul(ftexels[ 1], spu_mul(sWeights1, tWeights1)); /*ul*/ + ftexels[ 5] = spu_mul(ftexels[ 5], spu_mul(sWeights0, tWeights1)); /*ur*/ + ftexels[ 9] = spu_mul(ftexels[ 9], spu_mul(sWeights1, tWeights0)); /*ll*/ + ftexels[13] = spu_mul(ftexels[13], spu_mul(sWeights0, tWeights0)); /*lr*/ + colors[1] = spu_add(spu_add(ftexels[1], ftexels[5]), + spu_add(ftexels[9], ftexels[13])); + + /* blues, for four pixels */ + ftexels[ 2] = spu_mul(ftexels[ 2], spu_mul(sWeights1, tWeights1)); /*ul*/ + ftexels[ 6] = spu_mul(ftexels[ 6], spu_mul(sWeights0, tWeights1)); /*ur*/ + ftexels[10] = spu_mul(ftexels[10], spu_mul(sWeights1, tWeights0)); /*ll*/ + ftexels[14] = spu_mul(ftexels[14], spu_mul(sWeights0, tWeights0)); /*lr*/ + colors[2] = spu_add(spu_add(ftexels[2], ftexels[6]), + spu_add(ftexels[10], ftexels[14])); + + /* alphas, for four pixels */ + ftexels[ 3] = spu_mul(ftexels[ 3], spu_mul(sWeights1, tWeights1)); /*ul*/ + ftexels[ 7] = spu_mul(ftexels[ 7], spu_mul(sWeights0, tWeights1)); /*ur*/ + ftexels[11] = spu_mul(ftexels[11], spu_mul(sWeights1, tWeights0)); /*ll*/ + ftexels[15] = spu_mul(ftexels[15], spu_mul(sWeights0, tWeights0)); /*lr*/ + colors[3] = spu_add(spu_add(ftexels[3], ftexels[7]), + spu_add(ftexels[11], ftexels[15])); +} + + + +/** + * Adapted from /opt/cell/sdk/usr/spu/include/transpose_matrix4x4.h + */ +static INLINE void +transpose(vector unsigned int *mOut0, + vector unsigned int *mOut1, + vector unsigned int *mOut2, + vector unsigned int *mOut3, + vector unsigned int *mIn) +{ + vector unsigned int abcd, efgh, ijkl, mnop; /* input vectors */ + vector unsigned int aeim, bfjn, cgko, dhlp; /* output vectors */ + vector unsigned int aibj, ckdl, emfn, gohp; /* intermediate vectors */ + + vector unsigned char shufflehi = ((vector unsigned char) { + 0x00, 0x01, 0x02, 0x03, + 0x10, 0x11, 0x12, 0x13, + 0x04, 0x05, 0x06, 0x07, + 0x14, 0x15, 0x16, 0x17}); + vector unsigned char shufflelo = ((vector unsigned char) { + 0x08, 0x09, 0x0A, 0x0B, + 0x18, 0x19, 0x1A, 0x1B, + 0x0C, 0x0D, 0x0E, 0x0F, + 0x1C, 0x1D, 0x1E, 0x1F}); + abcd = *(mIn+0); + efgh = *(mIn+1); + ijkl = *(mIn+2); + mnop = *(mIn+3); + + aibj = spu_shuffle(abcd, ijkl, shufflehi); + ckdl = spu_shuffle(abcd, ijkl, shufflelo); + emfn = spu_shuffle(efgh, mnop, shufflehi); + gohp = spu_shuffle(efgh, mnop, shufflelo); + + aeim = spu_shuffle(aibj, emfn, shufflehi); + bfjn = spu_shuffle(aibj, emfn, shufflelo); + cgko = spu_shuffle(ckdl, gohp, shufflehi); + dhlp = spu_shuffle(ckdl, gohp, shufflelo); + + *mOut0 = aeim; + *mOut1 = bfjn; + *mOut2 = cgko; + *mOut3 = dhlp; +} + + +/** + * Bilinear filtering, using int instead of float arithmetic for computing + * sample weights. + */ +void +sample_texture_2d_bilinear_int(vector float s, vector float t, + uint unit, uint level, uint face, + vector float colors[4]) +{ + const struct spu_texture_level *tlevel = &spu.texture[unit].level[level]; + static const vector float half = {-0.5f, -0.5f, -0.5f, -0.5f}; + + /* Scale texcoords by size of texture, and add half pixel bias */ + vector float ss = spu_madd(s, tlevel->scale_s, half); + vector float tt = spu_madd(t, tlevel->scale_t, half); + + /* convert float coords to fixed-pt coords with 7 fraction bits */ + vector signed int is = spu_convts(ss, 7); /* XXX really need floor() here */ + vector signed int it = spu_convts(tt, 7); /* XXX really need floor() here */ + + /* compute integer texel weights in [0, 127] */ + vector signed int sWeights0 = spu_and(is, 127); + vector signed int tWeights0 = spu_and(it, 127); + vector signed int sWeights1 = spu_sub(127, sWeights0); + vector signed int tWeights1 = spu_sub(127, tWeights0); + + /* texel coords: is0 = is / 128, it0 = is / 128 */ + vector signed int is0 = spu_rlmask(is, -7); + vector signed int it0 = spu_rlmask(it, -7); + + /* texel coords: i1 = is0 + 1, it1 = it0 + 1 */ + vector signed int is1 = spu_add(is0, 1); + vector signed int it1 = spu_add(it0, 1); + + /* PIPE_TEX_WRAP_REPEAT */ + is0 = spu_and(is0, tlevel->mask_s); + it0 = spu_and(it0, tlevel->mask_t); + is1 = spu_and(is1, tlevel->mask_s); + it1 = spu_and(it1, tlevel->mask_t); + + /* PIPE_TEX_WRAP_CLAMP */ + is0 = spu_clamp(is0, tlevel->max_s); + it0 = spu_clamp(it0, tlevel->max_t); + is1 = spu_clamp(is1, tlevel->max_s); + it1 = spu_clamp(it1, tlevel->max_t); + + /* get packed int texels */ + vector unsigned int texels[16]; + get_four_texels(tlevel, face, is0, it0, texels + 0); /* upper-left */ + get_four_texels(tlevel, face, is1, it0, texels + 4); /* upper-right */ + get_four_texels(tlevel, face, is0, it1, texels + 8); /* lower-left */ + get_four_texels(tlevel, face, is1, it1, texels + 12); /* lower-right */ + + /* twiddle packed 32-bit BGRA pixels into RGBA as four unsigned ints */ + { + static const unsigned char ZERO = 0x80; + int i; + for (i = 0; i < 16; i++) { + texels[i] = spu_shuffle(texels[i], texels[i], + ((vector unsigned char) { + ZERO, ZERO, ZERO, 1, + ZERO, ZERO, ZERO, 2, + ZERO, ZERO, ZERO, 3, + ZERO, ZERO, ZERO, 0})); + } + } + + /* convert RGBA,RGBA,RGBA,RGBA to RRRR,GGGG,BBBB,AAAA */ + vector unsigned int texel0, texel1, texel2, texel3, texel4, texel5, texel6, texel7, + texel8, texel9, texel10, texel11, texel12, texel13, texel14, texel15; + transpose(&texel0, &texel1, &texel2, &texel3, texels + 0); + transpose(&texel4, &texel5, &texel6, &texel7, texels + 4); + transpose(&texel8, &texel9, &texel10, &texel11, texels + 8); + transpose(&texel12, &texel13, &texel14, &texel15, texels + 12); + + /* computed weighted colors */ + vector unsigned int c0, c1, c2, c3, cSum; + + /* red */ + c0 = (vector unsigned int) si_mpy((qword) texel0, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/ + c1 = (vector unsigned int) si_mpy((qword) texel4, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/ + c2 = (vector unsigned int) si_mpy((qword) texel8, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/ + c3 = (vector unsigned int) si_mpy((qword) texel12, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/ + cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); + colors[0] = spu_convtf(cSum, 22); + + /* green */ + c0 = (vector unsigned int) si_mpy((qword) texel1, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/ + c1 = (vector unsigned int) si_mpy((qword) texel5, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/ + c2 = (vector unsigned int) si_mpy((qword) texel9, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/ + c3 = (vector unsigned int) si_mpy((qword) texel13, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/ + cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); + colors[1] = spu_convtf(cSum, 22); + + /* blue */ + c0 = (vector unsigned int) si_mpy((qword) texel2, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/ + c1 = (vector unsigned int) si_mpy((qword) texel6, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/ + c2 = (vector unsigned int) si_mpy((qword) texel10, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/ + c3 = (vector unsigned int) si_mpy((qword) texel14, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/ + cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); + colors[2] = spu_convtf(cSum, 22); + + /* alpha */ + c0 = (vector unsigned int) si_mpy((qword) texel3, si_mpy((qword) sWeights1, (qword) tWeights1)); /*ul*/ + c1 = (vector unsigned int) si_mpy((qword) texel7, si_mpy((qword) sWeights0, (qword) tWeights1)); /*ur*/ + c2 = (vector unsigned int) si_mpy((qword) texel11, si_mpy((qword) sWeights1, (qword) tWeights0)); /*ll*/ + c3 = (vector unsigned int) si_mpy((qword) texel15, si_mpy((qword) sWeights0, (qword) tWeights0)); /*lr*/ + cSum = spu_add(spu_add(c0, c1), spu_add(c2, c3)); + colors[3] = spu_convtf(cSum, 22); +} + + + +/** + * Compute level of detail factor from texcoords. + */ +static INLINE float +compute_lambda_2d(uint unit, vector float s, vector float t) +{ + uint baseLevel = 0; + float width = spu.texture[unit].level[baseLevel].width; + float height = spu.texture[unit].level[baseLevel].width; + float dsdx = width * (spu_extract(s, 1) - spu_extract(s, 0)); + float dsdy = width * (spu_extract(s, 2) - spu_extract(s, 0)); + float dtdx = height * (spu_extract(t, 1) - spu_extract(t, 0)); + float dtdy = height * (spu_extract(t, 2) - spu_extract(t, 0)); +#if 0 + /* ideal value */ + float x = dsdx * dsdx + dtdx * dtdx; + float y = dsdy * dsdy + dtdy * dtdy; + float rho = x > y ? x : y; + rho = sqrtf(rho); +#else + /* approximation */ + dsdx = fabsf(dsdx); + dsdy = fabsf(dsdy); + dtdx = fabsf(dtdx); + dtdy = fabsf(dtdy); + float rho = (dsdx + dsdy + dtdx + dtdy) * 0.5; +#endif + float lambda = logf(rho) * 1.442695f; /* compute logbase2(rho) */ + return lambda; +} + + +/** + * Blend two sets of colors according to weight. + */ +static void +blend_colors(vector float c0[4], const vector float c1[4], float weight) +{ + vector float t = spu_splats(weight); + vector float dc0 = spu_sub(c1[0], c0[0]); + vector float dc1 = spu_sub(c1[1], c0[1]); + vector float dc2 = spu_sub(c1[2], c0[2]); + vector float dc3 = spu_sub(c1[3], c0[3]); + c0[0] = spu_madd(dc0, t, c0[0]); + c0[1] = spu_madd(dc1, t, c0[1]); + c0[2] = spu_madd(dc2, t, c0[2]); + c0[3] = spu_madd(dc3, t, c0[3]); +} + + +/** + * Texture sampling with level of detail selection and possibly mipmap + * interpolation. + */ +void +sample_texture_2d_lod(vector float s, vector float t, + uint unit, uint level_ignored, uint face, + vector float colors[4]) +{ + /* + * Note that we're computing a lambda/lod here that's used for all + * four pixels in the quad. + */ + float lambda = compute_lambda_2d(unit, s, t); + + (void) face; + (void) level_ignored; + + /* apply lod bias */ + lambda += spu.sampler[unit].lod_bias; + + /* clamp */ + if (lambda < spu.sampler[unit].min_lod) + lambda = spu.sampler[unit].min_lod; + else if (lambda > spu.sampler[unit].max_lod) + lambda = spu.sampler[unit].max_lod; + + if (lambda <= 0.0f) { + /* magnify */ + spu.mag_sample_texture_2d[unit](s, t, unit, 0, face, colors); + } + else { + /* minify */ + if (spu.sampler[unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) { + /* sample two mipmap levels and interpolate */ + int level = (int) lambda; + if (level > (int) spu.texture[unit].max_level) + level = spu.texture[unit].max_level; + spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors); + if (spu.sampler[unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) { + /* sample second mipmap level */ + float weight = lambda - (float) level; + level++; + if (level <= (int) spu.texture[unit].max_level) { + vector float colors2[4]; + spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors2); + blend_colors(colors, colors2, weight); + } + } + } + else { + /* sample one mipmap level */ + int level = (int) (lambda + 0.5f); + if (level > (int) spu.texture[unit].max_level) + level = spu.texture[unit].max_level; + spu.min_sample_texture_2d[unit](s, t, unit, level, face, colors); + } + } +} + + +/** XXX need a SIMD version of this */ +static unsigned +choose_cube_face(float rx, float ry, float rz, float *newS, float *newT) +{ + /* + major axis + direction target sc tc ma + ---------- ------------------------------- --- --- --- + +rx TEXTURE_CUBE_MAP_POSITIVE_X_EXT -rz -ry rx + -rx TEXTURE_CUBE_MAP_NEGATIVE_X_EXT +rz -ry rx + +ry TEXTURE_CUBE_MAP_POSITIVE_Y_EXT +rx +rz ry + -ry TEXTURE_CUBE_MAP_NEGATIVE_Y_EXT +rx -rz ry + +rz TEXTURE_CUBE_MAP_POSITIVE_Z_EXT +rx -ry rz + -rz TEXTURE_CUBE_MAP_NEGATIVE_Z_EXT -rx -ry rz + */ + const float arx = fabsf(rx); + const float ary = fabsf(ry); + const float arz = fabsf(rz); + unsigned face; + float sc, tc, ma; + + if (arx > ary && arx > arz) { + if (rx >= 0.0F) { + face = PIPE_TEX_FACE_POS_X; + sc = -rz; + tc = -ry; + ma = arx; + } + else { + face = PIPE_TEX_FACE_NEG_X; + sc = rz; + tc = -ry; + ma = arx; + } + } + else if (ary > arx && ary > arz) { + if (ry >= 0.0F) { + face = PIPE_TEX_FACE_POS_Y; + sc = rx; + tc = rz; + ma = ary; + } + else { + face = PIPE_TEX_FACE_NEG_Y; + sc = rx; + tc = -rz; + ma = ary; + } + } + else { + if (rz > 0.0F) { + face = PIPE_TEX_FACE_POS_Z; + sc = rx; + tc = -ry; + ma = arz; + } + else { + face = PIPE_TEX_FACE_NEG_Z; + sc = -rx; + tc = -ry; + ma = arz; + } + } + + *newS = (sc / ma + 1.0F) * 0.5F; + *newT = (tc / ma + 1.0F) * 0.5F; + + return face; +} + + + +void +sample_texture_cube(vector float s, vector float t, vector float r, + uint unit, vector float colors[4]) +{ + uint p, faces[4], level = 0; + float newS[4], newT[4]; + + /* Compute cube faces referenced by the four sets of texcoords. + * XXX we should SIMD-ize this. + */ + for (p = 0; p < 4; p++) { + float rx = spu_extract(s, p); + float ry = spu_extract(t, p); + float rz = spu_extract(r, p); + faces[p] = choose_cube_face(rx, ry, rz, &newS[p], &newT[p]); + } + + if (faces[0] == faces[1] && + faces[0] == faces[2] && + faces[0] == faces[3]) { + /* GOOD! All four texcoords refer to the same cube face */ + s = (vector float) {newS[0], newS[1], newS[2], newS[3]}; + t = (vector float) {newT[0], newT[1], newT[2], newT[3]}; + spu.sample_texture_2d[unit](s, t, unit, level, faces[0], colors); + } + else { + /* BAD! The four texcoords refer to different faces */ + for (p = 0; p < 4; p++) { + vector float c[4]; + + spu.sample_texture_2d[unit](spu_splats(newS[p]), spu_splats(newT[p]), + unit, level, faces[p], c); + + float red = spu_extract(c[0], p); + float green = spu_extract(c[1], p); + float blue = spu_extract(c[2], p); + float alpha = spu_extract(c[3], p); + + colors[0] = spu_insert(red, colors[0], p); + colors[1] = spu_insert(green, colors[1], p); + colors[2] = spu_insert(blue, colors[2], p); + colors[3] = spu_insert(alpha, colors[3], p); + } + } } diff --git a/src/gallium/drivers/cell/spu/spu_texture.h b/src/gallium/drivers/cell/spu/spu_texture.h index f7c9738be8..7b75b007b5 100644 --- a/src/gallium/drivers/cell/spu/spu_texture.h +++ b/src/gallium/drivers/cell/spu/spu_texture.h @@ -36,12 +36,32 @@ extern void invalidate_tex_cache(void); -extern vector float -sample_texture_nearest(uint unit, vector float texcoord); +extern void +sample_texture_2d_nearest(vector float s, vector float t, + uint unit, uint level, uint face, + vector float colors[4]); + + +extern void +sample_texture_2d_bilinear(vector float s, vector float t, + uint unit, uint level, uint face, + vector float colors[4]); + +extern void +sample_texture_2d_bilinear_int(vector float s, vector float t, + uint unit, uint level, uint face, + vector float colors[4]); + +extern void +sample_texture_2d_lod(vector float s, vector float t, + uint unit, uint level, uint face, + vector float colors[4]); -extern vector float -sample_texture_bilinear(uint unit, vector float texcoord); + +extern void +sample_texture_cube(vector float s, vector float t, vector float r, + uint unit, vector float colors[4]); #endif /* SPU_TEXTURE_H */ diff --git a/src/gallium/drivers/cell/spu/spu_tile.c b/src/gallium/drivers/cell/spu/spu_tile.c index 216a33126b..6905015a48 100644 --- a/src/gallium/drivers/cell/spu/spu_tile.c +++ b/src/gallium/drivers/cell/spu/spu_tile.c @@ -87,3 +87,40 @@ put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf) 0 /* rid */); } + +/** + * For tiles whose status is TILE_STATUS_CLEAR, write solid-filled + * tiles back to the main framebuffer. + */ +void +really_clear_tiles(uint surfaceIndex) +{ + const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles; + uint i; + + if (surfaceIndex == 0) { + clear_c_tile(&spu.ctile); + + for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { + uint tx = i % spu.fb.width_tiles; + uint ty = i / spu.fb.width_tiles; + if (spu.ctile_status[ty][tx] == TILE_STATUS_CLEAR) { + put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0); + } + } + } + else { + clear_z_tile(&spu.ztile); + + for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) { + uint tx = i % spu.fb.width_tiles; + uint ty = i / spu.fb.width_tiles; + if (spu.ztile_status[ty][tx] == TILE_STATUS_CLEAR) + put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 1); + } + } + +#if 0 + wait_on_mask(1 << TAG_SURFACE_CLEAR); +#endif +} diff --git a/src/gallium/drivers/cell/spu/spu_tile.h b/src/gallium/drivers/cell/spu/spu_tile.h index 1b5491112d..7bfb52be8f 100644 --- a/src/gallium/drivers/cell/spu/spu_tile.h +++ b/src/gallium/drivers/cell/spu/spu_tile.h @@ -36,12 +36,14 @@ -void +extern void get_tile(uint tx, uint ty, tile_t *tile, int tag, int zBuf); -void +extern void put_tile(uint tx, uint ty, const tile_t *tile, int tag, int zBuf); +extern void +really_clear_tiles(uint surfaceIndex); static INLINE void diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index 8b93878192..d727268475 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -29,12 +29,12 @@ * Triangle rendering within a tile. */ -#include <transpose_matrix4x4.h> #include "pipe/p_compiler.h" #include "pipe/p_format.h" #include "util/u_math.h" #include "spu_colorpack.h" #include "spu_main.h" +#include "spu_shuffle.h" #include "spu_texture.h" #include "spu_tile.h" #include "spu_tri.h" @@ -43,11 +43,6 @@ /** Masks are uint[4] vectors with each element being 0 or 0xffffffff */ typedef vector unsigned int mask_t; -typedef union -{ - vector float v; - float f[4]; -} float4; /** @@ -61,7 +56,7 @@ struct vertex_header { /* XXX fix this */ #undef CEILF -#define CEILF(X) ((float) (int) ((X) + 0.99999)) +#define CEILF(X) ((float) (int) ((X) + 0.99999f)) #define QUAD_TOP_LEFT 0 @@ -75,14 +70,25 @@ struct vertex_header { #define MASK_ALL 0xf +#define CHAN0 0 +#define CHAN1 1 +#define CHAN2 2 +#define CHAN3 3 + + #define DEBUG_VERTS 0 /** * Triangle edge info */ struct edge { - float dx; /**< X(v1) - X(v0), used only during setup */ - float dy; /**< Y(v1) - Y(v0), used only during setup */ + union { + struct { + float dx; /**< X(v1) - X(v0), used only during setup */ + float dy; /**< Y(v1) - Y(v0), used only during setup */ + }; + vec_float4 ds; /**< vector accessor for dx and dy */ + }; float dxdy; /**< dx/dy */ float sx, sy; /**< first sample point coord */ int lines; /**< number of lines on this edge */ @@ -91,9 +97,9 @@ struct edge { struct interp_coef { - float4 a0; - float4 dadx; - float4 dady; + vector float a0; + vector float dadx; + vector float dady; }; @@ -107,34 +113,32 @@ struct setup_stage { * turn. Currently fixed at 4 floats, but should change in time. * Codegen will help cope with this. */ - const struct vertex_header *vmax; - const struct vertex_header *vmid; - const struct vertex_header *vmin; - const struct vertex_header *vprovoke; + union { + struct { + const struct vertex_header *vmin; + const struct vertex_header *vmid; + const struct vertex_header *vmax; + const struct vertex_header *vprovoke; + }; + qword vertex_headers; + }; struct edge ebot; struct edge etop; struct edge emaj; - float oneoverarea; + float oneOverArea; /* XXX maybe make into vector? */ - uint tx, ty; + uint facing; + + uint tx, ty; /**< position of current tile (x, y) */ int cliprect_minx, cliprect_maxx, cliprect_miny, cliprect_maxy; -#if 0 - struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS]; -#else struct interp_coef coef[PIPE_MAX_SHADER_INPUTS]; -#endif - -#if 0 - struct quad_header quad; -#endif struct { - int left[2]; /**< [0] = row0, [1] = row1 */ - int right[2]; + vec_int4 quad; /**< [0] = row0, [1] = row1; {left[0],left[1],right[0],right[1]} */ int y; unsigned y_flags; unsigned mask; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */ @@ -142,118 +146,103 @@ struct setup_stage { }; - static struct setup_stage setup; +static INLINE vector float +splatx(vector float v) +{ + return spu_splats(spu_extract(v, CHAN0)); +} - -#if 0 -/** - * Basically a cast wrapper. - */ -static INLINE struct setup_stage *setup_stage( struct draw_stage *stage ) +static INLINE vector float +splaty(vector float v) { - return (struct setup_stage *)stage; + return spu_splats(spu_extract(v, CHAN1)); } -#endif -#if 0 -/** - * Clip setup.quad against the scissor/surface bounds. - */ -static INLINE void -quad_clip(struct setup_stage *setup) +static INLINE vector float +splatz(vector float v) { - const struct pipe_scissor_state *cliprect = &setup.softpipe->cliprect; - const int minx = (int) cliprect->minx; - const int maxx = (int) cliprect->maxx; - const int miny = (int) cliprect->miny; - const int maxy = (int) cliprect->maxy; - - if (setup.quad.x0 >= maxx || - setup.quad.y0 >= maxy || - setup.quad.x0 + 1 < minx || - setup.quad.y0 + 1 < miny) { - /* totally clipped */ - setup.quad.mask = 0x0; - return; - } - if (setup.quad.x0 < minx) - setup.quad.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); - if (setup.quad.y0 < miny) - setup.quad.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); - if (setup.quad.x0 == maxx - 1) - setup.quad.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); - if (setup.quad.y0 == maxy - 1) - setup.quad.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); + return spu_splats(spu_extract(v, CHAN2)); } -#endif -#if 0 -/** - * Emit a quad (pass to next stage) with clipping. - */ -static INLINE void -clip_emit_quad(struct setup_stage *setup) +static INLINE vector float +splatw(vector float v) { - quad_clip(setup); - if (setup.quad.mask) { - struct softpipe_context *sp = setup.softpipe; - sp->quad.first->run(sp->quad.first, &setup.quad); - } + return spu_splats(spu_extract(v, CHAN3)); } -#endif + /** - * Evaluate attribute coefficients (plane equations) to compute - * attribute values for the four fragments in a quad. - * Eg: four colors will be computed (in AoS format). + * Setup fragment shader inputs by evaluating triangle's vertex + * attribute coefficient info. + * \param x quad x pos + * \param y quad y pos + * \param fragZ returns quad Z values + * \param fragInputs returns fragment program inputs + * Note: this code could be incorporated into the fragment program + * itself to avoid the loop and switch. */ -static INLINE void -eval_coeff(uint slot, float x, float y, vector float result[4]) +static void +eval_inputs(float x, float y, vector float *fragZ, vector float fragInputs[]) { - switch (spu.vertex_info.interp_mode[slot]) { - case INTERP_CONSTANT: - result[QUAD_TOP_LEFT] = - result[QUAD_TOP_RIGHT] = - result[QUAD_BOTTOM_LEFT] = - result[QUAD_BOTTOM_RIGHT] = setup.coef[slot].a0.v; - break; + static const vector float deltaX = (const vector float) {0, 1, 0, 1}; + static const vector float deltaY = (const vector float) {0, 0, 1, 1}; + + const uint posSlot = 0; + const vector float pos = setup.coef[posSlot].a0; + const vector float dposdx = setup.coef[posSlot].dadx; + const vector float dposdy = setup.coef[posSlot].dady; + const vector float fragX = spu_splats(x) + deltaX; + const vector float fragY = spu_splats(y) + deltaY; + vector float fragW, wInv; + uint i; - case INTERP_LINEAR: - /* fall-through, for now */ - default: - { - register vector float dadx = setup.coef[slot].dadx.v; - register vector float dady = setup.coef[slot].dady.v; - register vector float topLeft - = spu_add(setup.coef[slot].a0.v, - spu_add(spu_mul(spu_splats(x), dadx), - spu_mul(spu_splats(y), dady))); - - result[QUAD_TOP_LEFT] = topLeft; - result[QUAD_TOP_RIGHT] = spu_add(topLeft, dadx); - result[QUAD_BOTTOM_LEFT] = spu_add(topLeft, dady); - result[QUAD_BOTTOM_RIGHT] = spu_add(spu_add(topLeft, dadx), dady); + *fragZ = splatz(pos) + fragX * splatz(dposdx) + fragY * splatz(dposdy); + fragW = splatw(pos) + fragX * splatw(dposdx) + fragY * splatw(dposdy); + wInv = spu_re(fragW); /* 1 / w */ + + /* loop over fragment program inputs */ + for (i = 0; i < spu.vertex_info.num_attribs; i++) { + uint attr = i + 1; + enum interp_mode interp = spu.vertex_info.attrib[attr].interp_mode; + + /* constant term */ + vector float a0 = setup.coef[attr].a0; + vector float r0 = splatx(a0); + vector float r1 = splaty(a0); + vector float r2 = splatz(a0); + vector float r3 = splatw(a0); + + if (interp == INTERP_LINEAR || interp == INTERP_PERSPECTIVE) { + /* linear term */ + vector float dadx = setup.coef[attr].dadx; + vector float dady = setup.coef[attr].dady; + /* Use SPU intrinsics here to get slightly better code. + * originally: r0 += fragX * splatx(dadx) + fragY * splatx(dady); + */ + r0 = spu_madd(fragX, splatx(dadx), spu_madd(fragY, splatx(dady), r0)); + r1 = spu_madd(fragX, splaty(dadx), spu_madd(fragY, splaty(dady), r1)); + r2 = spu_madd(fragX, splatz(dadx), spu_madd(fragY, splatz(dady), r2)); + r3 = spu_madd(fragX, splatw(dadx), spu_madd(fragY, splatw(dady), r3)); + if (interp == INTERP_PERSPECTIVE) { + /* perspective term */ + r0 *= wInv; + r1 *= wInv; + r2 *= wInv; + r3 *= wInv; + } } + fragInputs[CHAN0] = r0; + fragInputs[CHAN1] = r1; + fragInputs[CHAN2] = r2; + fragInputs[CHAN3] = r3; + fragInputs += 4; } } -static INLINE vector float -eval_z(float x, float y) -{ - const uint slot = 0; - const float dzdx = setup.coef[slot].dadx.f[2]; - const float dzdy = setup.coef[slot].dady.f[2]; - const float topLeft = setup.coef[slot].a0.f[2] + x * dzdx + y * dzdy; - const vector float topLeftv = spu_splats(topLeft); - const vector float derivs = (vector float) { 0.0, dzdx, dzdy, dzdx + dzdy }; - return spu_add(topLeftv, derivs); -} - - /** * Emit a quad (pass to next stage). No clipping is done. * Note: about 1/5 to 1/7 of the time, mask is zero and this function @@ -261,120 +250,51 @@ eval_z(float x, float y) * overall. */ static INLINE void -emit_quad( int x, int y, mask_t mask ) +emit_quad( int x, int y, mask_t mask) { /* If any bits in mask are set... */ if (spu_extract(spu_orx(mask), 0)) { const int ix = x - setup.cliprect_minx; const int iy = y - setup.cliprect_miny; - vector float colors[4]; spu.cur_ctile_status = TILE_STATUS_DIRTY; spu.cur_ztile_status = TILE_STATUS_DIRTY; - if (spu.texture[0].start) { - /* texture mapping */ - const uint unit = 0; - vector float texcoords[4]; - eval_coeff(2, (float) x, (float) y, texcoords); - - if (spu_extract(mask, 0)) - colors[0] = spu.sample_texture[unit](unit, texcoords[0]); - if (spu_extract(mask, 1)) - colors[1] = spu.sample_texture[unit](unit, texcoords[1]); - if (spu_extract(mask, 2)) - colors[2] = spu.sample_texture[unit](unit, texcoords[2]); - if (spu_extract(mask, 3)) - colors[3] = spu.sample_texture[unit](unit, texcoords[3]); - - - if (spu.texture[1].start) { - /* multi-texture mapping */ - const uint unit = 1; - vector float colors1[4]; - - eval_coeff(2, (float) x, (float) y, texcoords); - - if (spu_extract(mask, 0)) - colors1[0] = spu.sample_texture[unit](unit, texcoords[0]); - if (spu_extract(mask, 1)) - colors1[1] = spu.sample_texture[unit](unit, texcoords[1]); - if (spu_extract(mask, 2)) - colors1[2] = spu.sample_texture[unit](unit, texcoords[2]); - if (spu_extract(mask, 3)) - colors1[3] = spu.sample_texture[unit](unit, texcoords[3]); - - /* hack: modulate first texture by second */ - colors[0] = spu_mul(colors[0], colors1[0]); - colors[1] = spu_mul(colors[1], colors1[1]); - colors[2] = spu_mul(colors[2], colors1[2]); - colors[3] = spu_mul(colors[3], colors1[3]); - } - - } - else { - /* simple shading */ -#if 0 - eval_coeff(1, (float) x, (float) y, colors); - -#else - /* XXX new fragment program code */ - - if (spu.fragment_program) { - vector float inputs[4*4], outputs[2*4]; - - /* setup inputs */ - eval_coeff(1, (float) x, (float) y, inputs); - - /* Execute the current fragment program */ - spu.fragment_program(inputs, outputs, spu.constants); - - /* Copy outputs */ - colors[0] = outputs[0*4+0]; - colors[1] = outputs[0*4+1]; - colors[2] = outputs[0*4+2]; - colors[3] = outputs[0*4+3]; - - if (0 && spu.init.id==0 && y == 48) { - printf("colors[0] = %f %f %f %f\n", - spu_extract(colors[0], 0), - spu_extract(colors[0], 1), - spu_extract(colors[0], 2), - spu_extract(colors[0], 3)); - printf("colors[1] = %f %f %f %f\n", - spu_extract(colors[1], 0), - spu_extract(colors[1], 1), - spu_extract(colors[1], 2), - spu_extract(colors[1], 3)); - } - - } -#endif - } - - { - /* Convert fragment data from AoS to SoA format. - * I.e. (RGBA,RGBA,RGBA,RGBA) -> (RRRR,GGGG,BBBB,AAAA) - * This is temporary! + /* + * Run fragment shader, execute per-fragment ops, update fb/tile. */ - vector float soa_frag[4]; - _transpose_matrix4x4(soa_frag, colors); + vector float inputs[4*4], outputs[2*4]; + vector unsigned int kill_mask; + vector float fragZ; + + eval_inputs((float) x, (float) y, &fragZ, inputs); - float4 fragZ; + ASSERT(spu.fragment_program); + ASSERT(spu.fragment_ops); - fragZ.v = eval_z((float) x, (float) y); + /* Execute the current fragment program */ + kill_mask = spu.fragment_program(inputs, outputs, spu.constants); - /* Do all per-fragment/quad operations here, including: - * alpha test, z test, stencil test, blend and framebuffer writing. + mask = spu_andc(mask, kill_mask); + + /* Execute per-fragment/quad operations, including: + * alpha test, z test, stencil test, blend and framebuffer writing. + * Note that there are two different fragment operations functions + * that can be called, one for front-facing fragments, and one + * for back-facing fragments. (Often the two are the same; + * but in some cases, like two-sided stenciling, they can be + * very different.) So choose the correct function depending + * on the calculated facing. */ - spu.fragment_ops(ix, iy, &spu.ctile, &spu.ztile, - fragZ.v, - soa_frag[0], soa_frag[1], - soa_frag[2], soa_frag[3], + spu.fragment_ops[setup.facing](ix, iy, &spu.ctile, &spu.ztile, + fragZ, + outputs[0*4+0], + outputs[0*4+1], + outputs[0*4+2], + outputs[0*4+3], mask); } - } } @@ -383,64 +303,49 @@ emit_quad( int x, int y, mask_t mask ) * Given an X or Y coordinate, return the block/quad coordinate that it * belongs to. */ -static INLINE int block( int x ) +static INLINE int +block(int x) { return x & ~1; } /** - * Compute mask which indicates which pixels in the 2x2 quad are actually inside - * the triangle's bounds. - * The mask is a uint4 vector and each element will be 0 or 0xffffffff. - */ -static INLINE mask_t calculate_mask( int x ) -{ - /* This is a little tricky. - * Use & instead of && to avoid branches. - * Use negation to convert true/false to ~0/0 values. - */ - mask_t mask; - mask = spu_insert(-((x >= setup.span.left[0]) & (x < setup.span.right[0])), mask, 0); - mask = spu_insert(-((x+1 >= setup.span.left[0]) & (x+1 < setup.span.right[0])), mask, 1); - mask = spu_insert(-((x >= setup.span.left[1]) & (x < setup.span.right[1])), mask, 2); - mask = spu_insert(-((x+1 >= setup.span.left[1]) & (x+1 < setup.span.right[1])), mask, 3); - return mask; -} - - -/** * Render a horizontal span of quads */ -static void flush_spans( void ) +static void +flush_spans(void) { int minleft, maxright; - int x; + + const int l0 = spu_extract(setup.span.quad, 0); + const int l1 = spu_extract(setup.span.quad, 1); + const int r0 = spu_extract(setup.span.quad, 2); + const int r1 = spu_extract(setup.span.quad, 3); switch (setup.span.y_flags) { case 0x3: /* both odd and even lines written (both quad rows) */ - minleft = MIN2(setup.span.left[0], setup.span.left[1]); - maxright = MAX2(setup.span.right[0], setup.span.right[1]); + minleft = MIN2(l0, l1); + maxright = MAX2(r0, r1); break; case 0x1: /* only even line written (quad top row) */ - minleft = setup.span.left[0]; - maxright = setup.span.right[0]; + minleft = l0; + maxright = r0; break; case 0x2: /* only odd line written (quad bottom row) */ - minleft = setup.span.left[1]; - maxright = setup.span.right[1]; + minleft = l1; + maxright = r1; break; default: return; } - /* OK, we're very likely to need the tile data now. * clear or finish waiting if needed. */ @@ -457,7 +362,7 @@ static void flush_spans( void ) } ASSERT(spu.cur_ctile_status != TILE_STATUS_DEFINED); - if (spu.read_depth) { + if (spu.read_depth_stencil) { if (spu.cur_ztile_status == TILE_STATUS_GETTING) { /* wait for mfc_get() to complete */ //printf("SPU: %u: waiting for ztile\n", spu.init.id); @@ -472,93 +377,119 @@ static void flush_spans( void ) ASSERT(spu.cur_ztile_status != TILE_STATUS_DEFINED); } - /* XXX this loop could be moved into the above switch cases and - * calculate_mask() could be simplified a bit... - */ - for (x = block(minleft); x <= block(maxright); x += 2) { -#if 1 - emit_quad( x, setup.span.y, calculate_mask( x ) ); -#endif + /* XXX this loop could be moved into the above switch cases... */ + + /* Setup for mask calculation */ + const vec_int4 quad_LlRr = setup.span.quad; + const vec_int4 quad_RrLl = spu_rlqwbyte(quad_LlRr, 8); + const vec_int4 quad_LLll = spu_shuffle(quad_LlRr, quad_LlRr, SHUFFLE4(A,A,B,B)); + const vec_int4 quad_RRrr = spu_shuffle(quad_RrLl, quad_RrLl, SHUFFLE4(A,A,B,B)); + + const vec_int4 twos = spu_splats(2); + + const int x = block(minleft); + vec_int4 xs = {x, x+1, x, x+1}; + + for (; spu_extract(xs, 0) <= block(maxright); xs += twos) { + /** + * Computes mask to indicate which pixels in the 2x2 quad are actually + * inside the triangle's bounds. + */ + + /* Calculate ({x,x+1,x,x+1} >= {l[0],l[0],l[1],l[1]}) */ + const mask_t gt_LLll_xs = spu_cmpgt(quad_LLll, xs); + const mask_t gte_xs_LLll = spu_nand(gt_LLll_xs, gt_LLll_xs); + + /* Calculate ({r[0],r[0],r[1],r[1]} > {x,x+1,x,x+1}) */ + const mask_t gt_RRrr_xs = spu_cmpgt(quad_RRrr, xs); + + /* Combine results to create mask */ + const mask_t mask = spu_and(gte_xs_LLll, gt_RRrr_xs); + + emit_quad(spu_extract(xs, 0), setup.span.y, mask); } setup.span.y = 0; setup.span.y_flags = 0; - setup.span.right[0] = 0; - setup.span.right[1] = 0; + /* Zero right elements */ + setup.span.quad = spu_shuffle(setup.span.quad, setup.span.quad, SHUFFLE4(A,B,0,0)); } + #if DEBUG_VERTS -static void print_vertex(const struct vertex_header *v) +static void +print_vertex(const struct vertex_header *v) { - int i; - fprintf(stderr, "Vertex: (%p)\n", v); - for (i = 0; i < setup.quad.nr_attrs; i++) { - fprintf(stderr, " %d: %f %f %f %f\n", i, - v->data[i][0], v->data[i][1], v->data[i][2], v->data[i][3]); + uint i; + fprintf(stderr, " Vertex: (%p)\n", v); + for (i = 0; i < spu.vertex_info.num_attribs; i++) { + fprintf(stderr, " %d: %f %f %f %f\n", i, + spu_extract(v->data[i], 0), + spu_extract(v->data[i], 1), + spu_extract(v->data[i], 2), + spu_extract(v->data[i], 3)); } } #endif -static boolean setup_sort_vertices(const struct vertex_header *v0, - const struct vertex_header *v1, - const struct vertex_header *v2) +/** + * Sort vertices from top to bottom. + * Compute area and determine front vs. back facing. + * Do coarse clip test against tile bounds + * \return FALSE if tri is totally outside tile, TRUE otherwise + */ +static boolean +setup_sort_vertices(const struct vertex_header *v0, + const struct vertex_header *v1, + const struct vertex_header *v2) { + float area, sign; #if DEBUG_VERTS - fprintf(stderr, "Triangle:\n"); - print_vertex(v0); - print_vertex(v1); - print_vertex(v2); + if (spu.init.id==0) { + fprintf(stderr, "SPU %u: Triangle:\n", spu.init.id); + print_vertex(v0); + print_vertex(v1); + print_vertex(v2); + } #endif - setup.vprovoke = v2; - /* determine bottom to top order of vertices */ { - float y0 = spu_extract(v0->data[0], 1); - float y1 = spu_extract(v1->data[0], 1); - float y2 = spu_extract(v2->data[0], 1); - if (y0 <= y1) { - if (y1 <= y2) { - /* y0<=y1<=y2 */ - setup.vmin = v0; - setup.vmid = v1; - setup.vmax = v2; - } - else if (y2 <= y0) { - /* y2<=y0<=y1 */ - setup.vmin = v2; - setup.vmid = v0; - setup.vmax = v1; - } - else { - /* y0<=y2<=y1 */ - setup.vmin = v0; - setup.vmid = v2; - setup.vmax = v1; - } - } - else { - if (y0 <= y2) { - /* y1<=y0<=y2 */ - setup.vmin = v1; - setup.vmid = v0; - setup.vmax = v2; - } - else if (y2 <= y1) { - /* y2<=y1<=y0 */ - setup.vmin = v2; - setup.vmid = v1; - setup.vmax = v0; - } - else { - /* y1<=y2<=y0 */ - setup.vmin = v1; - setup.vmid = v2; - setup.vmax = v0; - } - } + /* A table of shuffle patterns for putting vertex_header pointers into + correct order. Quite magical. */ + const vec_uchar16 sort_order_patterns[] = { + SHUFFLE4(A,B,C,C), + SHUFFLE4(C,A,B,C), + SHUFFLE4(A,C,B,C), + SHUFFLE4(B,C,A,C), + SHUFFLE4(B,A,C,C), + SHUFFLE4(C,B,A,C) }; + + /* The vertex_header pointers, packed for easy shuffling later */ + const vec_uint4 vs = {(unsigned)v0, (unsigned)v1, (unsigned)v2}; + + /* Collate y values into two vectors for comparison. + Using only one shuffle constant! ;) */ + const vec_float4 y_02_ = spu_shuffle(v0->data[0], v2->data[0], SHUFFLE4(0,B,b,C)); + const vec_float4 y_10_ = spu_shuffle(v1->data[0], v0->data[0], SHUFFLE4(0,B,b,C)); + const vec_float4 y_012 = spu_shuffle(y_02_, v1->data[0], SHUFFLE4(0,B,b,C)); + const vec_float4 y_120 = spu_shuffle(y_10_, v2->data[0], SHUFFLE4(0,B,b,C)); + + /* Perform comparison: {y0,y1,y2} > {y1,y2,y0} */ + const vec_uint4 compare = spu_cmpgt(y_012, y_120); + /* Compress the result of the comparison into 4 bits */ + const vec_uint4 gather = spu_gather(compare); + /* Subtract one to attain the index into the LUT. Magical. */ + const unsigned int index = spu_extract(gather, 0) - 1; + + /* Load the appropriate pattern and construct the desired vector. */ + setup.vertex_headers = (qword)spu_shuffle(vs, vs, sort_order_patterns[index]); + + /* Using the result of the comparison, set sign. + Very magical. */ + sign = ((si_to_uint(si_cntb((qword)gather)) == 2) ? 1.0f : -1.0f); } /* Check if triangle is completely outside the tile bounds */ @@ -575,41 +506,28 @@ static boolean setup_sort_vertices(const struct vertex_header *v0, spu_extract(setup.vmax->data[0], 0) > setup.cliprect_maxx) return FALSE; - setup.ebot.dx = spu_extract(setup.vmid->data[0], 0) - spu_extract(setup.vmin->data[0], 0); - setup.ebot.dy = spu_extract(setup.vmid->data[0], 1) - spu_extract(setup.vmin->data[0], 1); - setup.emaj.dx = spu_extract(setup.vmax->data[0], 0) - spu_extract(setup.vmin->data[0], 0); - setup.emaj.dy = spu_extract(setup.vmax->data[0], 1) - spu_extract(setup.vmin->data[0], 1); - setup.etop.dx = spu_extract(setup.vmax->data[0], 0) - spu_extract(setup.vmid->data[0], 0); - setup.etop.dy = spu_extract(setup.vmax->data[0], 1) - spu_extract(setup.vmid->data[0], 1); + setup.ebot.ds = spu_sub(setup.vmid->data[0], setup.vmin->data[0]); + setup.emaj.ds = spu_sub(setup.vmax->data[0], setup.vmin->data[0]); + setup.etop.ds = spu_sub(setup.vmax->data[0], setup.vmid->data[0]); /* * Compute triangle's area. Use 1/area to compute partial * derivatives of attributes later. - * - * The area will be the same as prim->det, but the sign may be - * different depending on how the vertices get sorted above. - * - * To determine whether the primitive is front or back facing we - * use the prim->det value because its sign is correct. */ - { - const float area = (setup.emaj.dx * setup.ebot.dy - - setup.ebot.dx * setup.emaj.dy); - - setup.oneoverarea = 1.0f / area; - /* - _mesa_printf("%s one-over-area %f area %f det %f\n", - __FUNCTION__, setup.oneoverarea, area, prim->det ); - */ - } + area = setup.emaj.dx * setup.ebot.dy - setup.ebot.dx * setup.emaj.dy; + + setup.oneOverArea = 1.0f / area; -#if 0 - /* We need to know if this is a front or back-facing triangle for: - * - the GLSL gl_FrontFacing fragment attribute (bool) - * - two-sided stencil test + /* The product of area * sign indicates front/back orientation (0/1). + * Just in case someone gets the bright idea of switching the front + * and back constants without noticing that we're assuming their + * values in this operation, also assert that the values are + * what we think they are. */ - setup.quad.facing = (prim->det > 0.0) ^ (setup.softpipe->rasterizer->front_winding == PIPE_WINDING_CW); -#endif + ASSERT(CELL_FACING_FRONT == 0); + ASSERT(CELL_FACING_BACK == 1); + setup.facing = (area * sign > 0.0f) + ^ (spu.rasterizer.front_winding == PIPE_WINDING_CW); return TRUE; } @@ -622,63 +540,11 @@ static boolean setup_sort_vertices(const struct vertex_header *v0, * \param slot which attribute slot */ static INLINE void -const_coeff(uint slot) +const_coeff4(uint slot) { - setup.coef[slot].dadx.v = (vector float) {0.0, 0.0, 0.0, 0.0}; - setup.coef[slot].dady.v = (vector float) {0.0, 0.0, 0.0, 0.0}; - setup.coef[slot].a0.v = setup.vprovoke->data[slot]; -} - - -/** - * Compute a0, dadx and dady for a linearly interpolated coefficient, - * for a triangle. - */ -static INLINE void -tri_linear_coeff(uint slot, uint firstComp, uint lastComp) -{ - uint i; - const float *vmin_d = (float *) &setup.vmin->data[slot]; - const float *vmid_d = (float *) &setup.vmid->data[slot]; - const float *vmax_d = (float *) &setup.vmax->data[slot]; - const float x = spu_extract(setup.vmin->data[0], 0) - 0.5f; - const float y = spu_extract(setup.vmin->data[0], 1) - 0.5f; - - for (i = firstComp; i < lastComp; i++) { - float botda = vmid_d[i] - vmin_d[i]; - float majda = vmax_d[i] - vmin_d[i]; - float a = setup.ebot.dy * majda - botda * setup.emaj.dy; - float b = setup.emaj.dx * botda - majda * setup.ebot.dx; - - ASSERT(slot < PIPE_MAX_SHADER_INPUTS); - - setup.coef[slot].dadx.f[i] = a * setup.oneoverarea; - setup.coef[slot].dady.f[i] = b * setup.oneoverarea; - - /* calculate a0 as the value which would be sampled for the - * fragment at (0,0), taking into account that we want to sample at - * pixel centers, in other words (0.5, 0.5). - * - * this is neat but unfortunately not a good way to do things for - * triangles with very large values of dadx or dady as it will - * result in the subtraction and re-addition from a0 of a very - * large number, which means we'll end up loosing a lot of the - * fractional bits and precision from a0. the way to fix this is - * to define a0 as the sample at a pixel center somewhere near vmin - * instead - i'll switch to this later. - */ - setup.coef[slot].a0.f[i] = (vmin_d[i] - - (setup.coef[slot].dadx.f[i] * x + - setup.coef[slot].dady.f[i] * y)); - } - - /* - _mesa_printf("attr[%d].%c: %f dx:%f dy:%f\n", - slot, "xyzw"[i], - setup.coef[slot].a0[i], - setup.coef[slot].dadx.f[i], - setup.coef[slot].dady.f[i]); - */ + setup.coef[slot].dadx = (vector float) {0.0, 0.0, 0.0, 0.0}; + setup.coef[slot].dady = (vector float) {0.0, 0.0, 0.0, 0.0}; + setup.coef[slot].a0 = setup.vprovoke->data[slot]; } @@ -702,18 +568,16 @@ tri_linear_coeff4(uint slot) vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda), spu_mul(majda, spu_splats(setup.ebot.dx))); - setup.coef[slot].dadx.v = spu_mul(a, spu_splats(setup.oneoverarea)); - setup.coef[slot].dady.v = spu_mul(b, spu_splats(setup.oneoverarea)); + setup.coef[slot].dadx = spu_mul(a, spu_splats(setup.oneOverArea)); + setup.coef[slot].dady = spu_mul(b, spu_splats(setup.oneOverArea)); - vector float tempx = spu_mul(setup.coef[slot].dadx.v, xxxx); - vector float tempy = spu_mul(setup.coef[slot].dady.v, yyyy); + vector float tempx = spu_mul(setup.coef[slot].dadx, xxxx); + vector float tempy = spu_mul(setup.coef[slot].dady, yyyy); - setup.coef[slot].a0.v = spu_sub(vmin_d, spu_add(tempx, tempy)); + setup.coef[slot].a0 = spu_sub(vmin_d, spu_add(tempx, tempy)); } - -#if 0 /** * Compute a0, dadx and dady for a perspective-corrected interpolant, * for a triangle. @@ -722,82 +586,76 @@ tri_linear_coeff4(uint slot) * Later, when we compute the value at a particular fragment position we'll * divide the interpolated value by the interpolated W at that fragment. */ -static void tri_persp_coeff( unsigned slot, - unsigned i ) +static void +tri_persp_coeff4(uint slot) { - /* premultiply by 1/w: - */ - float mina = setup.vmin->data[slot][i] * setup.vmin->data[0][3]; - float mida = setup.vmid->data[slot][i] * setup.vmid->data[0][3]; - float maxa = setup.vmax->data[slot][i] * setup.vmax->data[0][3]; - - float botda = mida - mina; - float majda = maxa - mina; - float a = setup.ebot.dy * majda - botda * setup.emaj.dy; - float b = setup.emaj.dx * botda - majda * setup.ebot.dx; - - /* - printf("tri persp %d,%d: %f %f %f\n", slot, i, - setup.vmin->data[slot][i], - setup.vmid->data[slot][i], - setup.vmax->data[slot][i] - ); - */ + const vector float xxxx = spu_splats(spu_extract(setup.vmin->data[0], 0) - 0.5f); + const vector float yyyy = spu_splats(spu_extract(setup.vmin->data[0], 1) - 0.5f); + + const vector float vmin_w = spu_splats(spu_extract(setup.vmin->data[0], 3)); + const vector float vmid_w = spu_splats(spu_extract(setup.vmid->data[0], 3)); + const vector float vmax_w = spu_splats(spu_extract(setup.vmax->data[0], 3)); + + vector float vmin_d = setup.vmin->data[slot]; + vector float vmid_d = setup.vmid->data[slot]; + vector float vmax_d = setup.vmax->data[slot]; + + vmin_d = spu_mul(vmin_d, vmin_w); + vmid_d = spu_mul(vmid_d, vmid_w); + vmax_d = spu_mul(vmax_d, vmax_w); + + vector float botda = vmid_d - vmin_d; + vector float majda = vmax_d - vmin_d; - assert(slot < PIPE_MAX_SHADER_INPUTS); - assert(i <= 3); + vector float a = spu_sub(spu_mul(spu_splats(setup.ebot.dy), majda), + spu_mul(botda, spu_splats(setup.emaj.dy))); + vector float b = spu_sub(spu_mul(spu_splats(setup.emaj.dx), botda), + spu_mul(majda, spu_splats(setup.ebot.dx))); - setup.coef[slot].dadx.f[i] = a * setup.oneoverarea; - setup.coef[slot].dady.f[i] = b * setup.oneoverarea; - setup.coef[slot].a0.f[i] = (mina - - (setup.coef[slot].dadx.f[i] * (setup.vmin->data[0][0] - 0.5f) + - setup.coef[slot].dady.f[i] * (setup.vmin->data[0][1] - 0.5f))); + setup.coef[slot].dadx = spu_mul(a, spu_splats(setup.oneOverArea)); + setup.coef[slot].dady = spu_mul(b, spu_splats(setup.oneOverArea)); + + vector float tempx = spu_mul(setup.coef[slot].dadx, xxxx); + vector float tempy = spu_mul(setup.coef[slot].dady, yyyy); + + setup.coef[slot].a0 = spu_sub(vmin_d, spu_add(tempx, tempy)); } -#endif + /** * Compute the setup.coef[] array dadx, dady, a0 values. * Must be called after setup.vmin,vmid,vmax,vprovoke are initialized. */ -static void setup_tri_coefficients(void) +static void +setup_tri_coefficients(void) { -#if 1 uint i; for (i = 0; i < spu.vertex_info.num_attribs; i++) { - switch (spu.vertex_info.interp_mode[i]) { + switch (spu.vertex_info.attrib[i].interp_mode) { case INTERP_NONE: break; - case INTERP_POS: - /*tri_linear_coeff(i, 2, 3);*/ - /* XXX interp W if PERSPECTIVE... */ - tri_linear_coeff4(i); - break; case INTERP_CONSTANT: - const_coeff(i); + const_coeff4(i); break; + case INTERP_POS: + /* fall-through */ case INTERP_LINEAR: tri_linear_coeff4(i); break; case INTERP_PERSPECTIVE: - tri_linear_coeff4(i); /* temporary */ + tri_persp_coeff4(i); break; default: ASSERT(0); } } -#else - ASSERT(spu.vertex_info.interp_mode[0] == INTERP_POS); - ASSERT(spu.vertex_info.interp_mode[1] == INTERP_LINEAR || - spu.vertex_info.interp_mode[1] == INTERP_CONSTANT); - tri_linear_coeff(0, 2, 3); /* slot 0, z */ - tri_linear_coeff(1, 0, 4); /* slot 1, color */ -#endif } -static void setup_tri_edges(void) +static void +setup_tri_edges(void) { float vmin_x = spu_extract(setup.vmin->data[0], 0) + 0.5f; float vmid_x = spu_extract(setup.vmid->data[0], 0) + 0.5f; @@ -827,9 +685,8 @@ static void setup_tri_edges(void) * Render the upper or lower half of a triangle. * Scissoring/cliprect is applied here too. */ -static void subtriangle( struct edge *eleft, - struct edge *eright, - unsigned lines ) +static void +subtriangle(struct edge *eleft, struct edge *eright, unsigned lines) { const int minx = setup.cliprect_minx; const int maxx = setup.cliprect_maxx; @@ -881,9 +738,11 @@ static void subtriangle( struct edge *eleft, setup.span.y = block(_y); } - setup.span.left[_y&1] = left; - setup.span.right[_y&1] = right; - setup.span.y_flags |= 1<<(_y&1); + int offset = _y&1; + vec_int4 quad_LlRr = {left, left, right, right}; + /* Store left and right in 0 or 1 row of quad based on offset */ + setup.span.quad = spu_sel(quad_LlRr, setup.span.quad, spu_maskw(5<<offset)); + setup.span.y_flags |= 1<<offset; } } @@ -902,7 +761,8 @@ static void subtriangle( struct edge *eleft, * The tile data should have already been fetched. */ boolean -tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty) +tri_draw(const float *v0, const float *v1, const float *v2, + uint tx, uint ty) { setup.tx = tx; setup.ty = ty; @@ -924,21 +784,16 @@ tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty) setup.span.y = 0; setup.span.y_flags = 0; - setup.span.right[0] = 0; - setup.span.right[1] = 0; - /* setup.span.z_mode = tri_z_mode( setup.ctx ); */ + /* Zero right elements */ + setup.span.quad = spu_shuffle(setup.span.quad, setup.span.quad, SHUFFLE4(A,B,0,0)); - /* init_constant_attribs( setup ); */ - - if (setup.oneoverarea < 0.0) { - /* emaj on left: - */ + if (setup.oneOverArea < 0.0) { + /* emaj on left */ subtriangle( &setup.emaj, &setup.ebot, setup.ebot.lines ); subtriangle( &setup.emaj, &setup.etop, setup.etop.lines ); } else { - /* emaj on right: - */ + /* emaj on right */ subtriangle( &setup.ebot, &setup.emaj, setup.ebot.lines ); subtriangle( &setup.etop, &setup.emaj, setup.etop.lines ); } diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c index b8a0d4a265..af25dd3718 100644 --- a/src/gallium/drivers/cell/spu/spu_util.c +++ b/src/gallium/drivers/cell/spu/spu_util.c @@ -1,7 +1,7 @@ #include "cell/common.h" #include "pipe/p_shader_tokens.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "tgsi/tgsi_parse.h" //#include "tgsi_build.h" #include "tgsi/tgsi_util.h" diff --git a/src/gallium/drivers/failover/Makefile b/src/gallium/drivers/failover/Makefile index f08b8df07a..dfb7f5dcf6 100644 --- a/src/gallium/drivers/failover/Makefile +++ b/src/gallium/drivers/failover/Makefile @@ -9,6 +9,3 @@ C_SOURCES = \ fo_context.c include ../../Makefile.template - -symlinks: - diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c index 10c4ffc209..0742b27b8f 100644 --- a/src/gallium/drivers/failover/fo_context.c +++ b/src/gallium/drivers/failover/fo_context.c @@ -27,7 +27,7 @@ #include "pipe/p_defines.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "util/u_memory.h" #include "pipe/p_context.h" diff --git a/src/gallium/drivers/failover/fo_context.h b/src/gallium/drivers/failover/fo_context.h index c6409fe1e1..9ba86ba866 100644 --- a/src/gallium/drivers/failover/fo_context.h +++ b/src/gallium/drivers/failover/fo_context.h @@ -114,5 +114,12 @@ failover_context( struct pipe_context *pipe ) return (struct failover_context *)pipe; } +/* Internal functions + */ +void +failover_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buf); + #endif /* FO_CONTEXT_H */ diff --git a/src/gallium/drivers/i915simple/Makefile b/src/gallium/drivers/i915simple/Makefile index 41a61a0020..12821c5a76 100644 --- a/src/gallium/drivers/i915simple/Makefile +++ b/src/gallium/drivers/i915simple/Makefile @@ -26,6 +26,3 @@ C_SOURCES = \ i915_surface.c include ../../Makefile.template - -symlinks: - diff --git a/src/gallium/drivers/i915simple/i915_batch.h b/src/gallium/drivers/i915simple/i915_batch.h index 45bf4f4028..a433cf054d 100644 --- a/src/gallium/drivers/i915simple/i915_batch.h +++ b/src/gallium/drivers/i915simple/i915_batch.h @@ -108,7 +108,7 @@ i915_batchbuffer_flush( struct i915_batchbuffer *batch, #define OUT_RELOC( buf, flags, delta ) \ i915_batchbuffer_reloc( i915->batch, buf, flags, delta ) -#define FLUSH_BATCH(fence) do { \ +#define FLUSH_BATCH(fence) do { \ i915->winsys->batch_flush( i915->winsys, fence ); \ i915->hardware_dirty = ~0; \ } while (0) diff --git a/src/gallium/drivers/i915simple/i915_blit.c b/src/gallium/drivers/i915simple/i915_blit.c index 45fae4c999..448a4708ce 100644 --- a/src/gallium/drivers/i915simple/i915_blit.c +++ b/src/gallium/drivers/i915simple/i915_blit.c @@ -38,7 +38,7 @@ void i915_fill_blit(struct i915_context *i915, unsigned cpp, - short dst_pitch, + unsigned short dst_pitch, struct pipe_buffer *dst_buffer, unsigned dst_offset, short x, short y, @@ -47,15 +47,23 @@ i915_fill_blit(struct i915_context *i915, { unsigned BR13, CMD; + + I915_DBG(i915, + "%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", + __FUNCTION__, + dst_buffer, dst_pitch, dst_offset, x, y, w, h); + switch (cpp) { case 1: case 2: case 3: - BR13 = dst_pitch | (0xF0 << 16) | (1 << 24); + BR13 = (((int) dst_pitch) & 0xffff) | + (0xF0 << 16) | (1 << 24); CMD = XY_COLOR_BLT_CMD; break; case 4: - BR13 = dst_pitch | (0xF0 << 16) | (1 << 24) | (1 << 25); + BR13 = (((int) dst_pitch) & 0xffff) | + (0xF0 << 16) | (1 << 24) | (1 << 25); CMD = (XY_COLOR_BLT_CMD | XY_COLOR_BLT_WRITE_ALPHA | XY_COLOR_BLT_WRITE_RGB); break; @@ -63,10 +71,6 @@ i915_fill_blit(struct i915_context *i915, return; } -// DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", -// __FUNCTION__, dst_buffer, dst_pitch, dst_offset, x, y, w, h); - - if (!BEGIN_BATCH(6, 1)) { FLUSH_BATCH(NULL); assert(BEGIN_BATCH(6, 1)); @@ -77,6 +81,7 @@ i915_fill_blit(struct i915_context *i915, OUT_BATCH(((y + h) << 16) | (x + w)); OUT_RELOC( dst_buffer, I915_BUFFER_ACCESS_WRITE, dst_offset); OUT_BATCH(color); + FLUSH_BATCH(NULL); } @@ -84,10 +89,10 @@ void i915_copy_blit( struct i915_context *i915, unsigned do_flip, unsigned cpp, - short src_pitch, + unsigned short src_pitch, struct pipe_buffer *src_buffer, unsigned src_offset, - short dst_pitch, + unsigned short dst_pitch, struct pipe_buffer *dst_buffer, unsigned dst_offset, short src_x, short src_y, @@ -105,20 +110,16 @@ i915_copy_blit( struct i915_context *i915, src_buffer, src_pitch, src_offset, src_x, src_y, dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h); - src_pitch *= (short) cpp; - dst_pitch *= (short) cpp; - switch (cpp) { case 1: case 2: case 3: - BR13 = (((int) dst_pitch) & 0xffff) | + BR13 = (((int) dst_pitch) & 0xffff) | (0xCC << 16) | (1 << 24); CMD = XY_SRC_COPY_BLT_CMD; break; case 4: - BR13 = - (((int) dst_pitch) & 0xffff) | + BR13 = (((int) dst_pitch) & 0xffff) | (0xCC << 16) | (1 << 24) | (1 << 25); CMD = (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA | @@ -152,6 +153,7 @@ i915_copy_blit( struct i915_context *i915, OUT_BATCH((src_y << 16) | src_x); OUT_BATCH(((int) src_pitch & 0xffff)); OUT_RELOC(src_buffer, I915_BUFFER_ACCESS_READ, src_offset); + FLUSH_BATCH(NULL); } diff --git a/src/gallium/drivers/i915simple/i915_blit.h b/src/gallium/drivers/i915simple/i915_blit.h index 6e5b44e124..0bb3453861 100644 --- a/src/gallium/drivers/i915simple/i915_blit.h +++ b/src/gallium/drivers/i915simple/i915_blit.h @@ -33,10 +33,10 @@ extern void i915_copy_blit(struct i915_context *i915, unsigned do_flip, unsigned cpp, - short src_pitch, + unsigned short src_pitch, struct pipe_buffer *src_buffer, unsigned src_offset, - short dst_pitch, + unsigned short dst_pitch, struct pipe_buffer *dst_buffer, unsigned dst_offset, short srcx, short srcy, @@ -45,7 +45,7 @@ extern void i915_copy_blit(struct i915_context *i915, extern void i915_fill_blit(struct i915_context *i915, unsigned cpp, - short dst_pitch, + unsigned short dst_pitch, struct pipe_buffer *dst_buffer, unsigned dst_offset, short x, short y, diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c index 6dd3eda85d..3e3a596884 100644 --- a/src/gallium/drivers/i915simple/i915_context.c +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -34,7 +34,7 @@ #include "draw/draw_context.h" #include "pipe/p_defines.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_inlines.h" #include "util/u_memory.h" #include "pipe/p_screen.h" diff --git a/src/gallium/drivers/i915simple/i915_debug.c b/src/gallium/drivers/i915simple/i915_debug.c index 5e26d1b905..e08582efab 100644 --- a/src/gallium/drivers/i915simple/i915_debug.c +++ b/src/gallium/drivers/i915simple/i915_debug.c @@ -29,8 +29,9 @@ #include "i915_context.h" #include "i915_winsys.h" #include "i915_debug.h" -#include "pipe/p_winsys.h" -#include "pipe/p_debug.h" +#include "i915_batch.h" +#include "pipe/internal/p_winsys_screen.h" +#include "util/u_debug.h" static void @@ -210,6 +211,7 @@ BITS( PRINTF(stream, ": 0x%x\n", ((dw) & himask) >> (lo)); } +#ifdef DEBUG #define MBZ( dw, hi, lo) do { \ unsigned x = (dw) >> (lo); \ unsigned lomask = (1 << (lo)) - 1; \ @@ -217,6 +219,10 @@ BITS( himask = (1UL << (hi)) - 1; \ assert ((x & himask & ~lomask) == 0); \ } while (0) +#else +#define MBZ( dw, hi, lo) do { \ +} while (0) +#endif static void FLAG( @@ -858,19 +864,17 @@ static boolean i915_debug_packet( struct debug_stream *stream ) void -i915_dump_batchbuffer( struct i915_context *i915 ) +i915_dump_batchbuffer( struct i915_batchbuffer *batch ) { struct debug_stream stream; - /* TODO fix me */ - unsigned *start = 0;/*i915->batch_start;*/ - unsigned *end = 0;/*i915->winsys->batch_start( i915->winsys, 0, 0 );*/ + unsigned *start = (unsigned*)batch->map; + unsigned *end = (unsigned*)batch->ptr; unsigned long bytes = (unsigned long) (end - start) * 4; boolean done = FALSE; stream.offset = 0; stream.ptr = (char *)start; stream.print_addresses = 0; - stream.winsys = i915->pipe.winsys; if (!start || !end) { debug_printf( "\n\nBATCH: ???\n"); diff --git a/src/gallium/drivers/i915simple/i915_debug.h b/src/gallium/drivers/i915simple/i915_debug.h index afb63edabf..16ca7277c7 100644 --- a/src/gallium/drivers/i915simple/i915_debug.h +++ b/src/gallium/drivers/i915simple/i915_debug.h @@ -41,7 +41,6 @@ struct debug_stream char *ptr; /* pointer to gtt offset zero */ char *end; /* pointer to gtt offset zero */ unsigned print_addresses; - struct pipe_winsys *winsys; }; @@ -73,7 +72,7 @@ void i915_print_ureg(const char *msg, unsigned ureg); #if defined(DEBUG) && defined(FILE_DEBUG_FLAG) -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" static INLINE void I915_DBG( @@ -105,9 +104,9 @@ I915_DBG( #endif -void i915_dump_batchbuffer( struct i915_context *i915 ); - +struct i915_batchbuffer; +void i915_dump_batchbuffer( struct i915_batchbuffer *i915 ); void i915_debug_init( struct i915_context *i915 ); diff --git a/src/gallium/drivers/i915simple/i915_debug_fp.c b/src/gallium/drivers/i915simple/i915_debug_fp.c index 48be3e1472..9c5b117b6d 100644 --- a/src/gallium/drivers/i915simple/i915_debug_fp.c +++ b/src/gallium/drivers/i915simple/i915_debug_fp.c @@ -28,7 +28,7 @@ #include "i915_reg.h" #include "i915_debug.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "util/u_memory.h" diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c index 34b4a846c1..961c1bf213 100644 --- a/src/gallium/drivers/i915simple/i915_fpc_translate.c +++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c @@ -144,7 +144,7 @@ src_vector(struct i915_fp_compile *p, const struct tgsi_full_src_register *source) { uint index = source->SrcRegister.Index; - uint src, sem_name, sem_ind; + uint src = 0, sem_name, sem_ind; switch (source->SrcRegister.File) { case TGSI_FILE_TEMPORARY: @@ -321,16 +321,27 @@ static uint translate_tex_src_target(struct i915_fp_compile *p, uint tex) { switch (tex) { + case TGSI_TEXTURE_SHADOW1D: + /* fall-through */ case TGSI_TEXTURE_1D: return D0_SAMPLE_TYPE_2D; + + case TGSI_TEXTURE_SHADOW2D: + /* fall-through */ case TGSI_TEXTURE_2D: return D0_SAMPLE_TYPE_2D; + + case TGSI_TEXTURE_SHADOWRECT: + /* fall-through */ case TGSI_TEXTURE_RECT: return D0_SAMPLE_TYPE_2D; + case TGSI_TEXTURE_3D: return D0_SAMPLE_TYPE_VOLUME; + case TGSI_TEXTURE_CUBE: return D0_SAMPLE_TYPE_CUBE; + default: i915_program_error(p, "TexSrc type"); return 0; @@ -964,7 +975,7 @@ i915_translate_instructions(struct i915_fp_compile *p, = &parse.FullToken.FullImmediate; const uint pos = p->num_immediates++; uint j; - for (j = 0; j < imm->Immediate.Size; j++) { + for (j = 0; j < imm->Immediate.NrTokens - 1; j++) { p->immediates[pos][j] = imm->u.ImmediateFloat32[j].Float; } } diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c index 4fda1ab64f..58c41840e1 100644 --- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -40,9 +40,9 @@ #include "draw/draw_context.h" #include "draw/draw_vbuf.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "pipe/p_inlines.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -62,7 +62,7 @@ struct i915_vbuf_render { struct i915_context *i915; /** Vertex size in bytes */ - unsigned vertex_size; + size_t vertex_size; /** Software primitive */ unsigned prim; @@ -79,6 +79,7 @@ struct i915_vbuf_render { size_t vbo_offset; void *vbo_ptr; size_t vbo_alloc_size; + size_t vbo_max_used; }; @@ -108,7 +109,7 @@ i915_vbuf_render_get_vertex_info( struct vbuf_render *render ) } -static void * +static boolean i915_vbuf_render_allocate_vertices( struct vbuf_render *render, ushort vertex_size, ushort nr_vertices ) @@ -124,7 +125,8 @@ i915_vbuf_render_allocate_vertices( struct vbuf_render *render, if (i915_render->vbo_size > size + i915_render->vbo_offset && !i915->vbo_flushed) { } else { i915->vbo_flushed = 0; - pipe_buffer_reference(screen, &i915_render->vbo, NULL); + if (i915_render->vbo) + pipe_buffer_reference(screen, &i915_render->vbo, NULL); } if (!i915_render->vbo) { @@ -134,19 +136,49 @@ i915_vbuf_render_allocate_vertices( struct vbuf_render *render, 64, I915_BUFFER_USAGE_LIT_VERTEX, i915_render->vbo_size); - i915_render->vbo_ptr = pipe_buffer_map(screen, - i915_render->vbo, - PIPE_BUFFER_USAGE_CPU_WRITE); - pipe_buffer_unmap(screen, i915_render->vbo); + } + i915_render->vertex_size = vertex_size; i915->vbo = i915_render->vbo; i915->vbo_offset = i915_render->vbo_offset; i915->dirty |= I915_NEW_VBO; + if (!i915_render->vbo) + return FALSE; + return TRUE; +} + + +static void * +i915_vbuf_render_map_vertices( struct vbuf_render *render ) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + struct pipe_screen *screen = i915->pipe.screen; + + if (i915->vbo_flushed) + debug_printf("%s bad vbo flush occured stalling on hw\n"); + + i915_render->vbo_ptr = pipe_buffer_map(screen, + i915_render->vbo, + PIPE_BUFFER_USAGE_CPU_WRITE); + return (unsigned char *)i915_render->vbo_ptr + i915->vbo_offset; } +static void +i915_vbuf_render_unmap_vertices( struct vbuf_render *render, + ushort min_index, + ushort max_index ) +{ + struct i915_vbuf_render *i915_render = i915_vbuf_render(render); + struct i915_context *i915 = i915_render->i915; + struct pipe_screen *screen = i915->pipe.screen; + + i915_render->vbo_max_used = MAX2(i915_render->vbo_max_used, i915_render->vertex_size * (max_index + 1)); + pipe_buffer_unmap(screen, i915_render->vbo); +} static boolean i915_vbuf_render_set_primitive( struct vbuf_render *render, @@ -197,9 +229,7 @@ i915_vbuf_render_set_primitive( struct vbuf_render *render, i915_render->fallback = 0; return TRUE; default: - assert((int)"Error unkown primtive type" & 0); - /* Actually, can handle a lot more just fine... Fixme. - */ + /* FIXME: Actually, can handle a lot more just fine... */ return FALSE; } } @@ -456,18 +486,15 @@ out: static void -i915_vbuf_render_release_vertices( struct vbuf_render *render, - void *vertices, - unsigned vertex_size, - unsigned vertices_used ) +i915_vbuf_render_release_vertices( struct vbuf_render *render ) { struct i915_vbuf_render *i915_render = i915_vbuf_render(render); struct i915_context *i915 = i915_render->i915; - size_t size = (size_t)vertex_size * (size_t)vertices_used; assert(i915->vbo); - i915_render->vbo_offset += size; + i915_render->vbo_offset += i915_render->vbo_max_used; + i915_render->vbo_max_used = 0; i915->vbo = NULL; i915->dirty |= I915_NEW_VBO; } @@ -501,6 +528,8 @@ i915_vbuf_render_create( struct i915_context *i915 ) i915_render->base.get_vertex_info = i915_vbuf_render_get_vertex_info; i915_render->base.allocate_vertices = i915_vbuf_render_allocate_vertices; + i915_render->base.map_vertices = i915_vbuf_render_map_vertices; + i915_render->base.unmap_vertices = i915_vbuf_render_unmap_vertices; i915_render->base.set_primitive = i915_vbuf_render_set_primitive; i915_render->base.draw = i915_vbuf_render_draw; i915_render->base.draw_arrays = i915_vbuf_render_draw_arrays; diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c index 1c976082df..b7bd3b3b74 100644 --- a/src/gallium/drivers/i915simple/i915_screen.c +++ b/src/gallium/drivers/i915simple/i915_screen.c @@ -27,7 +27,8 @@ #include "util/u_memory.h" -#include "pipe/p_winsys.h" +#include "util/u_simple_screen.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_inlines.h" #include "util/u_string.h" @@ -203,16 +204,79 @@ i915_destroy_screen( struct pipe_screen *screen ) } +static struct pipe_transfer* +i915_get_tex_transfer(struct pipe_screen *screen, + struct pipe_texture *texture, + unsigned face, unsigned level, unsigned zslice, + enum pipe_transfer_usage usage, unsigned x, unsigned y, + unsigned w, unsigned h) +{ + struct i915_texture *tex = (struct i915_texture *)texture; + struct i915_transfer *trans; + unsigned offset; /* in bytes */ + + if (texture->target == PIPE_TEXTURE_CUBE) { + offset = tex->image_offset[level][face]; + } + else if (texture->target == PIPE_TEXTURE_3D) { + offset = tex->image_offset[level][zslice]; + } + else { + offset = tex->image_offset[level][0]; + assert(face == 0); + assert(zslice == 0); + } + + trans = CALLOC_STRUCT(i915_transfer); + if (trans) { + trans->base.refcount = 1; + pipe_texture_reference(&trans->base.texture, texture); + trans->base.format = trans->base.format; + trans->base.width = w; + trans->base.height = h; + trans->base.block = texture->block; + trans->base.nblocksx = texture->nblocksx[level]; + trans->base.nblocksy = texture->nblocksy[level]; + trans->base.stride = tex->stride; + trans->offset = offset; + trans->base.usage = usage; + } + return &trans->base; +} + +static void +i915_tex_transfer_release(struct pipe_screen *screen, + struct pipe_transfer **transfer) +{ + struct pipe_transfer *trans = *transfer; + + if (--trans->refcount == 0) { + pipe_texture_reference(&trans->texture, NULL); + FREE(trans); + } + + *transfer = NULL; +} + static void * -i915_surface_map( struct pipe_screen *screen, - struct pipe_surface *surface, - unsigned flags ) +i915_transfer_map( struct pipe_screen *screen, + struct pipe_transfer *transfer ) { - char *map = pipe_buffer_map( screen, surface->buffer, flags ); + struct i915_texture *tex = (struct i915_texture *)transfer->texture; + char *map; + unsigned flags = 0; + + if (transfer->usage != PIPE_TRANSFER_WRITE) + flags |= PIPE_BUFFER_USAGE_CPU_READ; + + if (transfer->usage != PIPE_TRANSFER_READ) + flags |= PIPE_BUFFER_USAGE_CPU_WRITE; + + map = pipe_buffer_map( screen, tex->buffer, flags ); if (map == NULL) return NULL; - if (surface->texture && + if (transfer->texture && (flags & PIPE_BUFFER_USAGE_CPU_WRITE)) { /* Do something to notify contexts of a texture change. @@ -220,14 +284,17 @@ i915_surface_map( struct pipe_screen *screen, /* i915_screen(screen)->timestamp++; */ } - return map + surface->offset; + return map + i915_transfer(transfer)->offset + + transfer->y / transfer->block.height * transfer->stride + + transfer->x / transfer->block.width * transfer->block.size; } static void -i915_surface_unmap(struct pipe_screen *screen, - struct pipe_surface *surface) +i915_transfer_unmap(struct pipe_screen *screen, + struct pipe_transfer *transfer) { - pipe_buffer_unmap( screen, surface->buffer ); + struct i915_texture *tex = (struct i915_texture *)transfer->texture; + pipe_buffer_unmap( screen, tex->buffer ); } @@ -275,10 +342,13 @@ i915_create_screen(struct pipe_winsys *winsys, uint pci_id) i915screen->screen.get_param = i915_get_param; i915screen->screen.get_paramf = i915_get_paramf; i915screen->screen.is_format_supported = i915_is_format_supported; - i915screen->screen.surface_map = i915_surface_map; - i915screen->screen.surface_unmap = i915_surface_unmap; + i915screen->screen.get_tex_transfer = i915_get_tex_transfer; + i915screen->screen.tex_transfer_release = i915_tex_transfer_release; + i915screen->screen.transfer_map = i915_transfer_map; + i915screen->screen.transfer_unmap = i915_transfer_unmap; i915_init_screen_texture_functions(&i915screen->screen); + u_simple_screen_init(&i915screen->screen); return &i915screen->screen; } diff --git a/src/gallium/drivers/i915simple/i915_screen.h b/src/gallium/drivers/i915simple/i915_screen.h index 73b0ff05ce..a371663453 100644 --- a/src/gallium/drivers/i915simple/i915_screen.h +++ b/src/gallium/drivers/i915simple/i915_screen.h @@ -50,13 +50,30 @@ struct i915_screen }; -/** cast wrapper */ +/** + * Subclass of pipe_transfer + */ +struct i915_transfer +{ + struct pipe_transfer base; + + unsigned offset; +}; + + +/** cast wrappers */ static INLINE struct i915_screen * i915_screen(struct pipe_screen *pscreen) { return (struct i915_screen *) pscreen; } +static INLINE struct i915_transfer * +i915_transfer( struct pipe_transfer *transfer ) +{ + return (struct i915_transfer *)transfer; +} + extern struct pipe_screen * i915_create_screen(struct pipe_winsys *winsys, uint pci_id); diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c index d2487d8277..273e74002a 100644 --- a/src/gallium/drivers/i915simple/i915_state.c +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -30,7 +30,7 @@ #include "draw/draw_context.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -318,8 +318,8 @@ i915_create_depth_stencil_state(struct pipe_context *pipe, struct i915_depth_stencil_state *cso = CALLOC_STRUCT( i915_depth_stencil_state ); { - int testmask = depth_stencil->stencil[0].value_mask & 0xff; - int writemask = depth_stencil->stencil[0].write_mask & 0xff; + int testmask = depth_stencil->stencil[0].valuemask & 0xff; + int writemask = depth_stencil->stencil[0].writemask & 0xff; cso->stencil_modes4 |= (_3DSTATE_MODES_4_CMD | ENABLE_STENCIL_TEST_MASK | @@ -350,8 +350,8 @@ i915_create_depth_stencil_state(struct pipe_context *pipe, int dfop = i915_translate_stencil_op(depth_stencil->stencil[1].zfail_op); int dpop = i915_translate_stencil_op(depth_stencil->stencil[1].zpass_op); int ref = depth_stencil->stencil[1].ref_value & 0xff; - int tmask = depth_stencil->stencil[1].value_mask & 0xff; - int wmask = depth_stencil->stencil[1].write_mask & 0xff; + int tmask = depth_stencil->stencil[1].valuemask & 0xff; + int wmask = depth_stencil->stencil[1].writemask & 0xff; cso->bfo[0] = (_3DSTATE_BACKFACE_STENCIL_OPS | BFO_ENABLE_STENCIL_FUNCS | @@ -394,7 +394,7 @@ i915_create_depth_stencil_state(struct pipe_context *pipe, if (depth_stencil->alpha.enabled) { int test = i915_translate_compare_func(depth_stencil->alpha.func); - ubyte refByte = float_to_ubyte(depth_stencil->alpha.ref); + ubyte refByte = float_to_ubyte(depth_stencil->alpha.ref_value); cso->depth_LIS6 |= (S6_ALPHA_TEST_ENABLE | (test << S6_ALPHA_TEST_FUNC_SHIFT) | @@ -535,13 +535,13 @@ static void i915_set_constant_buffer(struct pipe_context *pipe, */ if (buf) { void *mapped; - if (buf->size && + if (buf->buffer && buf->buffer->size && (mapped = ws->buffer_map(ws, buf->buffer, - PIPE_BUFFER_USAGE_CPU_READ))) { - memcpy(i915->current.constants[shader], mapped, buf->size); + PIPE_BUFFER_USAGE_CPU_READ))) { + memcpy(i915->current.constants[shader], mapped, buf->buffer->size); ws->buffer_unmap(ws, buf->buffer); i915->current.num_user_constants[shader] - = buf->size / (4 * sizeof(float)); + = buf->buffer->size / (4 * sizeof(float)); } else { i915->current.num_user_constants[shader] = 0; diff --git a/src/gallium/drivers/i915simple/i915_state_emit.c b/src/gallium/drivers/i915simple/i915_state_emit.c index 9bd6f92323..26e03f5127 100644 --- a/src/gallium/drivers/i915simple/i915_state_emit.c +++ b/src/gallium/drivers/i915simple/i915_state_emit.c @@ -211,20 +211,23 @@ i915_emit_hardware_state(struct i915_context *i915 ) struct pipe_surface *depth_surface = i915->framebuffer.zsbuf; if (cbuf_surface) { - unsigned cpitch = cbuf_surface->stride; unsigned ctile = BUF_3D_USE_FENCE; - if (cbuf_surface->texture && - ((struct i915_texture*)(cbuf_surface->texture))->tiled) { + struct i915_texture *tex = (struct i915_texture *) + cbuf_surface->texture; + struct pipe_buffer *buffer = tex->buffer; + assert(tex); + + if (tex && tex->tiled) { ctile = BUF_3D_TILED_SURFACE; } OUT_BATCH(_3DSTATE_BUF_INFO_CMD); - OUT_BATCH(BUF_3D_ID_COLOR_BACK | - BUF_3D_PITCH(cpitch) | /* pitch in bytes */ + OUT_BATCH(BUF_3D_ID_COLOR_BACK | + BUF_3D_PITCH(tex->stride) | /* pitch in bytes */ ctile); - OUT_RELOC(cbuf_surface->buffer, + OUT_RELOC(tex->buffer, I915_BUFFER_ACCESS_WRITE, cbuf_surface->offset); } @@ -232,20 +235,23 @@ i915_emit_hardware_state(struct i915_context *i915 ) /* What happens if no zbuf?? */ if (depth_surface) { - unsigned zpitch = depth_surface->stride; unsigned ztile = BUF_3D_USE_FENCE; - if (depth_surface->texture && - ((struct i915_texture*)(depth_surface->texture))->tiled) { + struct i915_texture *tex = (struct i915_texture *) + depth_surface->texture; + struct pipe_buffer *buffer = tex->buffer; + assert(tex); + + if (tex && tex->tiled) { ztile = BUF_3D_TILED_SURFACE; } OUT_BATCH(_3DSTATE_BUF_INFO_CMD); OUT_BATCH(BUF_3D_ID_DEPTH | - BUF_3D_PITCH(zpitch) | /* pitch in bytes */ + BUF_3D_PITCH(tex->stride) | /* pitch in bytes */ ztile); - OUT_RELOC(depth_surface->buffer, + OUT_RELOC(tex->buffer, I915_BUFFER_ACCESS_WRITE, depth_surface->offset); } diff --git a/src/gallium/drivers/i915simple/i915_surface.c b/src/gallium/drivers/i915simple/i915_surface.c index 62f1926644..7eec649906 100644 --- a/src/gallium/drivers/i915simple/i915_surface.c +++ b/src/gallium/drivers/i915simple/i915_surface.c @@ -31,7 +31,7 @@ #include "pipe/p_defines.h" #include "pipe/p_inlines.h" #include "pipe/p_inlines.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "util/u_tile.h" #include "util/u_rect.h" @@ -47,42 +47,22 @@ i915_surface_copy(struct pipe_context *pipe, struct pipe_surface *src, unsigned srcx, unsigned srcy, unsigned width, unsigned height) { - assert( dst != src ); - assert( dst->block.size == src->block.size ); - assert( dst->block.width == src->block.height ); - assert( dst->block.height == src->block.height ); + struct i915_texture *dst_tex = (struct i915_texture *)dst->texture; + struct i915_texture *src_tex = (struct i915_texture *)src->texture; - if (0) { - void *dst_map = pipe->screen->surface_map( pipe->screen, - dst, - PIPE_BUFFER_USAGE_CPU_WRITE ); - - const void *src_map = pipe->screen->surface_map( pipe->screen, - src, - PIPE_BUFFER_USAGE_CPU_READ ); - - pipe_copy_rect(dst_map, - &dst->block, - dst->stride, - dstx, dsty, - width, height, - src_map, - do_flip ? -(int) src->stride : src->stride, - srcx, do_flip ? height - 1 - srcy : srcy); + assert( dst != src ); + assert( dst_tex->base.block.size == src_tex->base.block.size ); + assert( dst_tex->base.block.width == src_tex->base.block.height ); + assert( dst_tex->base.block.height == src_tex->base.block.height ); + assert( dst_tex->base.block.width == 1 ); + assert( dst_tex->base.block.height == 1 ); - pipe->screen->surface_unmap(pipe->screen, src); - pipe->screen->surface_unmap(pipe->screen, dst); - } - else { - assert(dst->block.width == 1); - assert(dst->block.height == 1); - i915_copy_blit( i915_context(pipe), - do_flip, - dst->block.size, - (short) src->stride, src->buffer, src->offset, - (short) dst->stride, dst->buffer, dst->offset, - (short) srcx, (short) srcy, (short) dstx, (short) dsty, (short) width, (short) height ); - } + i915_copy_blit( i915_context(pipe), + do_flip, + dst_tex->base.block.size, + (unsigned short) src_tex->stride, src_tex->buffer, src->offset, + (unsigned short) dst_tex->stride, dst_tex->buffer, dst->offset, + (short) srcx, (short) srcy, (short) dstx, (short) dsty, (short) width, (short) height ); } @@ -92,26 +72,18 @@ i915_surface_fill(struct pipe_context *pipe, unsigned dstx, unsigned dsty, unsigned width, unsigned height, unsigned value) { - if (0) { - void *dst_map = pipe->screen->surface_map( pipe->screen, - dst, - PIPE_BUFFER_USAGE_CPU_WRITE ); + struct i915_texture *tex = (struct i915_texture *)dst->texture; - pipe_fill_rect(dst_map, &dst->block, dst->stride, dstx, dsty, width, height, value); + assert(tex->base.block.width == 1); + assert(tex->base.block.height == 1); - pipe->screen->surface_unmap(pipe->screen, dst); - } - else { - assert(dst->block.width == 1); - assert(dst->block.height == 1); - i915_fill_blit( i915_context(pipe), - dst->block.size, - (short) dst->stride, - dst->buffer, dst->offset, - (short) dstx, (short) dsty, - (short) width, (short) height, - value ); - } + i915_fill_blit( i915_context(pipe), + tex->base.block.size, + (unsigned short) tex->stride, + tex->buffer, dst->offset, + (short) dstx, (short) dsty, + (short) width, (short) height, + value ); } diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index bd87217063..957726523f 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -34,7 +34,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -178,7 +178,9 @@ i915_displaytarget_layout(struct i915_texture *tex) if (tex->base.width[0] >= 128) { tex->stride = power_of_two(tex->base.nblocksx[0] * pt->block.size); tex->total_nblocksy = round_up(tex->base.nblocksy[0], 8); +#if 0 /* used for tiled display targets */ tex->tiled = 1; +#endif } else { tex->stride = round_up(tex->base.nblocksx[0] * pt->block.size, 64); tex->total_nblocksy = tex->base.nblocksy[0]; @@ -206,11 +208,10 @@ i945_miptree_layout_2d( struct i915_texture *tex ) unsigned nblocksx = pt->nblocksx[0]; unsigned nblocksy = pt->nblocksy[0]; -#if 0 /* used for tiled display targets */ - if (pt->last_level == 0 && pt->block.size == 4) + /* used for tiled display targets */ + if (0) if (i915_displaytarget_layout(tex)) return; -#endif tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); @@ -605,8 +606,8 @@ i915_texture_create(struct pipe_screen *screen, tex_size = tex->stride * tex->total_nblocksy; tex->buffer = ws->buffer_create(ws, 64, - PIPE_BUFFER_USAGE_PIXEL, - tex_size); + PIPE_BUFFER_USAGE_PIXEL, + tex_size); if (!tex->buffer) goto fail; @@ -663,7 +664,6 @@ i915_get_tex_surface(struct pipe_screen *screen, unsigned flags) { struct i915_texture *tex = (struct i915_texture *)pt; - struct pipe_winsys *ws = screen->winsys; struct pipe_surface *ps; unsigned offset; /* in bytes */ @@ -682,16 +682,10 @@ i915_get_tex_surface(struct pipe_screen *screen, ps = CALLOC_STRUCT(pipe_surface); if (ps) { ps->refcount = 1; - ps->winsys = ws; pipe_texture_reference(&ps->texture, pt); - pipe_buffer_reference(screen, &ps->buffer, tex->buffer); ps->format = pt->format; ps->width = pt->width[level]; ps->height = pt->height[level]; - ps->block = pt->block; - ps->nblocksx = pt->nblocksx[level]; - ps->nblocksy = pt->nblocksy[level]; - ps->stride = tex->stride; ps->offset = offset; ps->usage = flags; ps->status = PIPE_SURFACE_STATUS_DEFINED; @@ -756,7 +750,6 @@ i915_tex_surface_release(struct pipe_screen *screen, } pipe_texture_reference(&surf->texture, NULL); - pipe_buffer_reference(screen, &surf->buffer, NULL); FREE(surf); } diff --git a/src/gallium/drivers/i965simple/Makefile b/src/gallium/drivers/i965simple/Makefile index e97146e57c..19182afa75 100644 --- a/src/gallium/drivers/i965simple/Makefile +++ b/src/gallium/drivers/i965simple/Makefile @@ -50,5 +50,3 @@ C_SOURCES = \ brw_wm_surface_state.c include ../../Makefile.template - -symlinks: diff --git a/src/gallium/drivers/i965simple/brw_blit.c b/src/gallium/drivers/i965simple/brw_blit.c index 8494f70493..4d11f8d2ab 100644 --- a/src/gallium/drivers/i965simple/brw_blit.c +++ b/src/gallium/drivers/i965simple/brw_blit.c @@ -35,7 +35,7 @@ #include "brw_reg.h" #include "pipe/p_context.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #define FILE_DEBUG_FLAG DEBUG_BLIT diff --git a/src/gallium/drivers/i965simple/brw_cc.c b/src/gallium/drivers/i965simple/brw_cc.c index 79d4150383..3668123e2e 100644 --- a/src/gallium/drivers/i965simple/brw_cc.c +++ b/src/gallium/drivers/i965simple/brw_cc.c @@ -166,8 +166,8 @@ static void upload_cc_unit( struct brw_context *brw ) cc.cc0.stencil_pass_depth_pass_op = brw_translate_stencil_op( brw->attribs.DepthStencil->stencil[0].zpass_op); cc.cc1.stencil_ref = brw->attribs.DepthStencil->stencil[0].ref_value; - cc.cc1.stencil_write_mask = brw->attribs.DepthStencil->stencil[0].write_mask; - cc.cc1.stencil_test_mask = brw->attribs.DepthStencil->stencil[0].value_mask; + cc.cc1.stencil_write_mask = brw->attribs.DepthStencil->stencil[0].writemask; + cc.cc1.stencil_test_mask = brw->attribs.DepthStencil->stencil[0].valuemask; if (brw->attribs.DepthStencil->stencil[1].enabled) { cc.cc0.bf_stencil_enable = brw->attribs.DepthStencil->stencil[1].enabled; @@ -180,14 +180,14 @@ static void upload_cc_unit( struct brw_context *brw ) cc.cc0.bf_stencil_pass_depth_pass_op = brw_translate_stencil_op( brw->attribs.DepthStencil->stencil[1].zpass_op); cc.cc1.bf_stencil_ref = brw->attribs.DepthStencil->stencil[1].ref_value; - cc.cc2.bf_stencil_write_mask = brw->attribs.DepthStencil->stencil[1].write_mask; - cc.cc2.bf_stencil_test_mask = brw->attribs.DepthStencil->stencil[1].value_mask; + cc.cc2.bf_stencil_write_mask = brw->attribs.DepthStencil->stencil[1].writemask; + cc.cc2.bf_stencil_test_mask = brw->attribs.DepthStencil->stencil[1].valuemask; } /* Not really sure about this: */ - if (brw->attribs.DepthStencil->stencil[0].write_mask || - brw->attribs.DepthStencil->stencil[1].write_mask) + if (brw->attribs.DepthStencil->stencil[0].writemask || + brw->attribs.DepthStencil->stencil[1].writemask) cc.cc0.stencil_write_enable = 1; } @@ -233,7 +233,7 @@ static void upload_cc_unit( struct brw_context *brw ) cc.cc3.alpha_test_func = brw_translate_compare_func(brw->attribs.DepthStencil->alpha.func); - cc.cc7.alpha_ref.ub[0] = float_to_ubyte(brw->attribs.DepthStencil->alpha.ref); + cc.cc7.alpha_ref.ub[0] = float_to_ubyte(brw->attribs.DepthStencil->alpha.ref_value); cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8; } diff --git a/src/gallium/drivers/i965simple/brw_context.c b/src/gallium/drivers/i965simple/brw_context.c index 96920df008..c74cbf8d73 100644 --- a/src/gallium/drivers/i965simple/brw_context.c +++ b/src/gallium/drivers/i965simple/brw_context.c @@ -37,7 +37,7 @@ #include "brw_tex_layout.h" #include "brw_winsys.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_context.h" #include "util/u_memory.h" #include "pipe/p_screen.h" diff --git a/src/gallium/drivers/i965simple/brw_curbe.c b/src/gallium/drivers/i965simple/brw_curbe.c index 824ee7fd6d..904cde8e30 100644 --- a/src/gallium/drivers/i965simple/brw_curbe.c +++ b/src/gallium/drivers/i965simple/brw_curbe.c @@ -38,7 +38,7 @@ #include "brw_util.h" #include "brw_wm.h" #include "pipe/p_state.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -257,13 +257,13 @@ static void upload_constant_buffer(struct brw_context *brw) if (brw->vs.prog_data->num_consts) { /* map the vertex constant buffer and copy to curbe: */ void *data = ws->buffer_map(ws, cbuffer->buffer, 0); - /* FIXME: this is wrong. the cbuffer->size currently + /* FIXME: this is wrong. the cbuffer->buffer->size currently * represents size of consts + immediates. so if we'll * have both we'll copy over the end of the buffer * with the subsequent memcpy */ - memcpy(&buf[offset], data, cbuffer->size); + memcpy(&buf[offset], data, cbuffer->buffer->size); ws->buffer_unmap(ws, cbuffer->buffer); - offset += cbuffer->size; + offset += cbuffer->buffer->size; } /*immediates*/ if (brw->vs.prog_data->num_imm) { diff --git a/src/gallium/drivers/i965simple/brw_draw.c b/src/gallium/drivers/i965simple/brw_draw.c index 7598e3dc8a..648aaa0da5 100644 --- a/src/gallium/drivers/i965simple/brw_draw.c +++ b/src/gallium/drivers/i965simple/brw_draw.c @@ -34,7 +34,7 @@ #include "brw_state.h" #include "pipe/p_context.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" static unsigned hw_prim[PIPE_PRIM_POLYGON+1] = { _3DPRIM_POINTLIST, diff --git a/src/gallium/drivers/i965simple/brw_draw_upload.c b/src/gallium/drivers/i965simple/brw_draw_upload.c index 7c20ea52af..2d9ca3f2ea 100644 --- a/src/gallium/drivers/i965simple/brw_draw_upload.c +++ b/src/gallium/drivers/i965simple/brw_draw_upload.c @@ -223,7 +223,7 @@ boolean brw_upload_vertex_buffers( struct brw_context *brw ) break; } - vbp.vb[i].vb0.bits.pitch = brw->vb.vbo_array[i]->pitch; + vbp.vb[i].vb0.bits.pitch = brw->vb.vbo_array[i]->stride; vbp.vb[i].vb0.bits.pad = 0; vbp.vb[i].vb0.bits.access_type = BRW_VERTEXBUFFER_ACCESS_VERTEXDATA; vbp.vb[i].vb0.bits.vb_index = i; diff --git a/src/gallium/drivers/i965simple/brw_eu_debug.c b/src/gallium/drivers/i965simple/brw_eu_debug.c index 4a94ddefa6..4adfb0c02f 100644 --- a/src/gallium/drivers/i965simple/brw_eu_debug.c +++ b/src/gallium/drivers/i965simple/brw_eu_debug.c @@ -30,7 +30,7 @@ */ -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "brw_eu.h" diff --git a/src/gallium/drivers/i965simple/brw_misc_state.c b/src/gallium/drivers/i965simple/brw_misc_state.c index be812c5da9..99ff4403a5 100644 --- a/src/gallium/drivers/i965simple/brw_misc_state.c +++ b/src/gallium/drivers/i965simple/brw_misc_state.c @@ -223,7 +223,7 @@ static void upload_depthbuffer(struct brw_context *brw) OUT_BATCH(0); } else { unsigned int format; - + struct brw_texture *tex = (struct brw_texture *)depth_surface->texture; assert(depth_surface->block.width == 1); assert(depth_surface->block.height == 1); switch (depth_surface->block.size) { @@ -246,7 +246,7 @@ static void upload_depthbuffer(struct brw_context *brw) (BRW_TILEWALK_YMAJOR << 26) | // (depth_surface->region->tiled << 27) | (BRW_SURFACE_2D << 29)); - OUT_RELOC(depth_surface->buffer, + OUT_RELOC(tex->buffer, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE, 0); OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) | ((depth_surface->stride/depth_surface->block.size - 1) << 6) | diff --git a/src/gallium/drivers/i965simple/brw_screen.c b/src/gallium/drivers/i965simple/brw_screen.c index ab7cd624b2..b22e105f10 100644 --- a/src/gallium/drivers/i965simple/brw_screen.c +++ b/src/gallium/drivers/i965simple/brw_screen.c @@ -27,8 +27,9 @@ #include "util/u_memory.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "util/u_string.h" +#include "util/u_simple_screen.h" #include "brw_context.h" #include "brw_screen.h" @@ -239,6 +240,7 @@ brw_create_screen(struct pipe_winsys *winsys, uint pci_id) brwscreen->screen.is_format_supported = brw_is_format_supported; brw_init_screen_texture_funcs(&brwscreen->screen); + u_simple_screen_init(&brwscreen->screen); return &brwscreen->screen; } diff --git a/src/gallium/drivers/i965simple/brw_state.c b/src/gallium/drivers/i965simple/brw_state.c index af46cb546f..b47f5373f3 100644 --- a/src/gallium/drivers/i965simple/brw_state.c +++ b/src/gallium/drivers/i965simple/brw_state.c @@ -30,7 +30,7 @@ */ -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "util/u_memory.h" #include "pipe/p_inlines.h" #include "pipe/p_shader_tokens.h" diff --git a/src/gallium/drivers/i965simple/brw_state_pool.c b/src/gallium/drivers/i965simple/brw_state_pool.c index 007dc8f9de..e91263cb1f 100644 --- a/src/gallium/drivers/i965simple/brw_state_pool.c +++ b/src/gallium/drivers/i965simple/brw_state_pool.c @@ -42,7 +42,7 @@ * the pool. */ -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "util/u_math.h" #include "util/u_memory.h" #include "pipe/p_inlines.h" diff --git a/src/gallium/drivers/i965simple/brw_surface.c b/src/gallium/drivers/i965simple/brw_surface.c index b89756c47b..0a95dce194 100644 --- a/src/gallium/drivers/i965simple/brw_surface.c +++ b/src/gallium/drivers/i965simple/brw_surface.c @@ -30,7 +30,7 @@ #include "brw_state.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "util/u_tile.h" #include "util/u_rect.h" @@ -74,13 +74,15 @@ brw_surface_copy(struct pipe_context *pipe, pipe->screen->surface_unmap(pipe->screen, dst); } else { + struct brw_texture *dst_tex = (struct brw_texture *)dst->texture; + struct brw_texture *src_tex = (struct brw_texture *)src->texture; assert(dst->block.width == 1); assert(dst->block.height == 1); brw_copy_blit(brw_context(pipe), do_flip, dst->block.size, - (short) src->stride/src->block.size, src->buffer, src->offset, FALSE, - (short) dst->stride/dst->block.size, dst->buffer, dst->offset, FALSE, + (short) src->stride/src->block.size, src_tex->buffer, src->offset, FALSE, + (short) dst->stride/dst->block.size, dst_tex->buffer, dst->offset, FALSE, (short) srcx, (short) srcy, (short) dstx, (short) dsty, (short) width, (short) height, PIPE_LOGICOP_COPY); } @@ -103,12 +105,13 @@ brw_surface_fill(struct pipe_context *pipe, pipe->screen->surface_unmap(pipe->screen, dst); } else { + struct brw_texture *tex = (struct brw_texture *)dst->texture; assert(dst->block.width == 1); assert(dst->block.height == 1); brw_fill_blit(brw_context(pipe), dst->block.size, (short) dst->stride/dst->block.size, - dst->buffer, dst->offset, FALSE, + tex->buffer, dst->offset, FALSE, (short) dstx, (short) dsty, (short) width, (short) height, value); diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.c b/src/gallium/drivers/i965simple/brw_tex_layout.c index cc0c665e02..448229ed4e 100644 --- a/src/gallium/drivers/i965simple/brw_tex_layout.c +++ b/src/gallium/drivers/i965simple/brw_tex_layout.c @@ -37,7 +37,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "util/u_math.h" #include "util/u_memory.h" #include "brw_context.h" @@ -296,9 +296,9 @@ brw_texture_create_screen(struct pipe_screen *screen, if (brw_miptree_layout(tex)) tex->buffer = ws->buffer_create(ws, 64, - PIPE_BUFFER_USAGE_PIXEL, - tex->stride * - tex->total_nblocksy); + PIPE_BUFFER_USAGE_PIXEL, + tex->stride * + tex->total_nblocksy); if (!tex->buffer) { FREE(tex); @@ -322,7 +322,6 @@ brw_texture_release_screen(struct pipe_screen *screen, __FUNCTION__, (void *) *pt, (*pt)->refcount - 1); */ if (--(*pt)->refcount <= 0) { - struct pipe_winsys *ws = screen->winsys; struct brw_texture *tex = (struct brw_texture *)*pt; uint i; @@ -330,7 +329,7 @@ brw_texture_release_screen(struct pipe_screen *screen, DBG("%s deleting %p\n", __FUNCTION__, (void *) tex); */ - winsys_buffer_reference(ws, &tex->buffer, NULL); + pipe_buffer_reference(screen, &tex->buffer, NULL); for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++) if (tex->image_offset[i]) @@ -347,7 +346,6 @@ brw_get_tex_surface_screen(struct pipe_screen *screen, struct pipe_texture *pt, unsigned face, unsigned level, unsigned zslice) { - struct pipe_winsys *ws = screen->winsys; struct brw_texture *tex = (struct brw_texture *)pt; struct pipe_surface *ps; unsigned offset; /* in bytes */ @@ -365,11 +363,10 @@ brw_get_tex_surface_screen(struct pipe_screen *screen, assert(zslice == 0); } - ps = ws->surface_alloc(ws); + ps = CALLOC_STRUCT(pipe_surface); if (ps) { - assert(ps->format); - assert(ps->refcount); - winsys_buffer_reference(ws, &ps->buffer, tex->buffer); + ps->refcount = 1; + pipe_texture_reference(&ps->texture, pt); ps->format = pt->format; ps->width = pt->width[level]; ps->height = pt->height[level]; @@ -378,6 +375,7 @@ brw_get_tex_surface_screen(struct pipe_screen *screen, ps->nblocksy = pt->nblocksy[level]; ps->stride = tex->stride; ps->offset = offset; + ps->status = PIPE_SURFACE_STATUS_DEFINED; } return ps; } diff --git a/src/gallium/drivers/i965simple/brw_vs_emit.c b/src/gallium/drivers/i965simple/brw_vs_emit.c index 34dbc0624d..e03d653482 100644 --- a/src/gallium/drivers/i965simple/brw_vs_emit.c +++ b/src/gallium/drivers/i965simple/brw_vs_emit.c @@ -1293,7 +1293,7 @@ void brw_vs_emit(struct brw_vs_compile *c) break; case TGSI_TOKEN_TYPE_IMMEDIATE: { struct tgsi_full_immediate *imm = &parse.FullToken.FullImmediate; - /*assert(imm->Immediate.Size == 4);*/ + assert(imm->Immediate.NrTokens == 4 + 1); c->prog_data.imm_buf[c->prog_data.num_imm][0] = imm->u.ImmediateFloat32[0].Float; c->prog_data.imm_buf[c->prog_data.num_imm][1] = imm->u.ImmediateFloat32[1].Float; c->prog_data.imm_buf[c->prog_data.num_imm][2] = imm->u.ImmediateFloat32[2].Float; diff --git a/src/gallium/drivers/i965simple/brw_wm.c b/src/gallium/drivers/i965simple/brw_wm.c index 8de565b96c..10161f2d2f 100644 --- a/src/gallium/drivers/i965simple/brw_wm.c +++ b/src/gallium/drivers/i965simple/brw_wm.c @@ -111,8 +111,8 @@ static void brw_wm_populate_key( struct brw_context *brw, if (brw->attribs.DepthStencil->stencil[0].enabled) { lookup |= IZ_STENCIL_TEST_ENABLE_BIT; - if (brw->attribs.DepthStencil->stencil[0].write_mask || - brw->attribs.DepthStencil->stencil[1].write_mask) + if (brw->attribs.DepthStencil->stencil[0].writemask || + brw->attribs.DepthStencil->stencil[1].writemask) lookup |= IZ_STENCIL_WRITE_ENABLE_BIT; } diff --git a/src/gallium/drivers/i965simple/brw_wm_surface_state.c b/src/gallium/drivers/i965simple/brw_wm_surface_state.c index 1a326f9918..1bab5bfdb3 100644 --- a/src/gallium/drivers/i965simple/brw_wm_surface_state.c +++ b/src/gallium/drivers/i965simple/brw_wm_surface_state.c @@ -193,6 +193,7 @@ static void upload_wm_surfaces(struct brw_context *brw ) /* BRW_NEW_FRAMEBUFFER */ struct pipe_surface *pipe_surface = brw->attribs.FrameBuffer.cbufs[0];/*fixme*/ + struct brw_texture *tex = (struct brw_texture *)pipe_surface->texture; memset(&surf, 0, sizeof(surf)); @@ -204,7 +205,7 @@ static void upload_wm_surfaces(struct brw_context *brw ) surf.ss0.surface_type = BRW_SURFACE_2D; - surf.ss1.base_addr = brw_buffer_offset( brw, pipe_surface->buffer ); + surf.ss1.base_addr = brw_buffer_offset( brw, tex->buffer ); surf.ss2.width = pipe_surface->width - 1; surf.ss2.height = pipe_surface->height - 1; diff --git a/src/gallium/drivers/nouveau/nouveau_gldefs.h b/src/gallium/drivers/nouveau/nouveau_gldefs.h new file mode 100644 index 0000000000..ff97aaa9af --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_gldefs.h @@ -0,0 +1,196 @@ +#ifndef __NOUVEAU_GLDEFS_H__ +#define __NOUVEAU_GLDEFS_H__ + +static INLINE unsigned +nvgl_blend_func(unsigned factor) +{ + switch (factor) { + case PIPE_BLENDFACTOR_ZERO: + return 0x0000; + case PIPE_BLENDFACTOR_ONE: + return 0x0001; + case PIPE_BLENDFACTOR_SRC_COLOR: + return 0x0300; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return 0x0301; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return 0x0302; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + return 0x0303; + case PIPE_BLENDFACTOR_DST_ALPHA: + return 0x0304; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return 0x0305; + case PIPE_BLENDFACTOR_DST_COLOR: + return 0x0306; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + return 0x0307; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return 0x0308; + case PIPE_BLENDFACTOR_CONST_COLOR: + return 0x8001; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return 0x8002; + case PIPE_BLENDFACTOR_CONST_ALPHA: + return 0x8003; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return 0x8004; + default: + return 0x0000; + } +} + +static INLINE unsigned +nvgl_blend_eqn(unsigned func) +{ + switch (func) { + case PIPE_BLEND_ADD: + return 0x8006; + case PIPE_BLEND_MIN: + return 0x8007; + case PIPE_BLEND_MAX: + return 0x8008; + case PIPE_BLEND_SUBTRACT: + return 0x800a; + case PIPE_BLEND_REVERSE_SUBTRACT: + return 0x800b; + default: + return 0x8006; + } +} + +static INLINE unsigned +nvgl_logicop_func(unsigned func) +{ + switch (func) { + case PIPE_LOGICOP_CLEAR: + return 0x1500; + case PIPE_LOGICOP_NOR: + return 0x1508; + case PIPE_LOGICOP_AND_INVERTED: + return 0x1504; + case PIPE_LOGICOP_COPY_INVERTED: + return 0x150c; + case PIPE_LOGICOP_AND_REVERSE: + return 0x1502; + case PIPE_LOGICOP_INVERT: + return 0x150a; + case PIPE_LOGICOP_XOR: + return 0x1506; + case PIPE_LOGICOP_NAND: + return 0x150e; + case PIPE_LOGICOP_AND: + return 0x1501; + case PIPE_LOGICOP_EQUIV: + return 0x1509; + case PIPE_LOGICOP_NOOP: + return 0x1505; + case PIPE_LOGICOP_OR_INVERTED: + return 0x150d; + case PIPE_LOGICOP_COPY: + return 0x1503; + case PIPE_LOGICOP_OR_REVERSE: + return 0x150b; + case PIPE_LOGICOP_OR: + return 0x1507; + case PIPE_LOGICOP_SET: + return 0x150f; + default: + return 0x1505; + } +} + +static INLINE unsigned +nvgl_comparison_op(unsigned op) +{ + switch (op) { + case PIPE_FUNC_NEVER: + return 0x0200; + case PIPE_FUNC_LESS: + return 0x0201; + case PIPE_FUNC_EQUAL: + return 0x0202; + case PIPE_FUNC_LEQUAL: + return 0x0203; + case PIPE_FUNC_GREATER: + return 0x0204; + case PIPE_FUNC_NOTEQUAL: + return 0x0205; + case PIPE_FUNC_GEQUAL: + return 0x0206; + case PIPE_FUNC_ALWAYS: + return 0x0207; + default: + return 0x0207; + } +} + +static INLINE unsigned +nvgl_polygon_mode(unsigned mode) +{ + switch (mode) { + case PIPE_POLYGON_MODE_POINT: + return 0x1b00; + case PIPE_POLYGON_MODE_LINE: + return 0x1b01; + case PIPE_POLYGON_MODE_FILL: + return 0x1b02; + default: + return 0x1b02; + } +} + +static INLINE unsigned +nvgl_stencil_op(unsigned op) +{ + switch (op) { + case PIPE_STENCIL_OP_ZERO: + return 0x0000; + case PIPE_STENCIL_OP_INVERT: + return 0x150a; + case PIPE_STENCIL_OP_KEEP: + return 0x1e00; + case PIPE_STENCIL_OP_REPLACE: + return 0x1e01; + case PIPE_STENCIL_OP_INCR: + return 0x1e02; + case PIPE_STENCIL_OP_DECR: + return 0x1e03; + case PIPE_STENCIL_OP_INCR_WRAP: + return 0x8507; + case PIPE_STENCIL_OP_DECR_WRAP: + return 0x8508; + default: + return 0x1e00; + } +} + +static INLINE unsigned +nvgl_primitive(unsigned prim) { + switch (prim) { + case PIPE_PRIM_POINTS: + return 0x0001; + case PIPE_PRIM_LINES: + return 0x0002; + case PIPE_PRIM_LINE_LOOP: + return 0x0003; + case PIPE_PRIM_LINE_STRIP: + return 0x0004; + case PIPE_PRIM_TRIANGLES: + return 0x0005; + case PIPE_PRIM_TRIANGLE_STRIP: + return 0x0006; + case PIPE_PRIM_TRIANGLE_FAN: + return 0x0007; + case PIPE_PRIM_QUADS: + return 0x0008; + case PIPE_PRIM_QUAD_STRIP: + return 0x0009; + case PIPE_PRIM_POLYGON: + return 0x000a; + default: + return 0; + } +} + +#endif diff --git a/src/gallium/drivers/nouveau/nouveau_push.h b/src/gallium/drivers/nouveau/nouveau_push.h new file mode 100644 index 0000000000..54ef1c1291 --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_push.h @@ -0,0 +1,82 @@ +#ifndef __NOUVEAU_PUSH_H__ +#define __NOUVEAU_PUSH_H__ + +#include "nouveau/nouveau_winsys.h" + +#ifndef NOUVEAU_PUSH_CONTEXT +#error undefined push context +#endif + +#define OUT_RING(data) do { \ + NOUVEAU_PUSH_CONTEXT(pc); \ + (*pc->nvws->channel->pushbuf->cur++) = (data); \ +} while(0) + +#define OUT_RINGp(src,size) do { \ + NOUVEAU_PUSH_CONTEXT(pc); \ + memcpy(pc->nvws->channel->pushbuf->cur, (src), (size) * 4); \ + pc->nvws->channel->pushbuf->cur += (size); \ +} while(0) + +#define OUT_RINGf(data) do { \ + union { float v; uint32_t u; } c; \ + c.v = (data); \ + OUT_RING(c.u); \ +} while(0) + +#define BEGIN_RING(obj,mthd,size) do { \ + NOUVEAU_PUSH_CONTEXT(pc); \ + if (pc->nvws->channel->pushbuf->remaining < ((size) + 1)) \ + pc->nvws->push_flush(pc->nvws, ((size) + 1), NULL); \ + OUT_RING((pc->obj->subc << 13) | ((size) << 18) | (mthd)); \ + pc->nvws->channel->pushbuf->remaining -= ((size) + 1); \ +} while(0) + +#define BEGIN_RING_NI(obj,mthd,size) do { \ + BEGIN_RING(obj, (mthd) | 0x40000000, (size)); \ +} while(0) + +#define FIRE_RING(fence) do { \ + NOUVEAU_PUSH_CONTEXT(pc); \ + pc->nvws->push_flush(pc->nvws, 0, fence); \ +} while(0) + +#define OUT_RELOC(bo,data,flags,vor,tor) do { \ + NOUVEAU_PUSH_CONTEXT(pc); \ + pc->nvws->push_reloc(pc->nvws, pc->nvws->channel->pushbuf->cur++, \ + (bo), (data), (flags), (vor), (tor)); \ +} while(0) + +/* Raw data + flags depending on FB/TT buffer */ +#define OUT_RELOCd(bo,data,flags,vor,tor) do { \ + OUT_RELOC((bo), (data), (flags) | NOUVEAU_BO_OR, (vor), (tor)); \ +} while(0) + +/* FB/TT object handle */ +#define OUT_RELOCo(bo,flags) do { \ + OUT_RELOC((bo), 0, (flags) | NOUVEAU_BO_OR, \ + pc->nvws->channel->vram->handle, \ + pc->nvws->channel->gart->handle); \ +} while(0) + +/* Low 32-bits of offset */ +#define OUT_RELOCl(bo,delta,flags) do { \ + OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_LOW, 0, 0); \ +} while(0) + +/* High 32-bits of offset */ +#define OUT_RELOCh(bo,delta,flags) do { \ + OUT_RELOC((bo), (delta), (flags) | NOUVEAU_BO_HIGH, 0, 0); \ +} while(0) + +/* A reloc which'll recombine into a NV_DMA_METHOD packet header */ +#define OUT_RELOCm(bo, flags, obj, mthd, size) do { \ + NOUVEAU_PUSH_CONTEXT(pc); \ + if (pc->nvws->channel->pushbuf->remaining < ((size) + 1)) \ + pc->nvws->push_flush(pc->nvws->channel, ((size) + 1), NULL); \ + OUT_RELOCd((bo), (pc->obj->subc << 13) | ((size) << 18) | (mthd), \ + (flags), 0, 0); \ + pc->nvws->channel->pushbuf->remaining -= ((size) + 1); \ +} while(0) + +#endif diff --git a/src/gallium/drivers/nouveau/nouveau_stateobj.h b/src/gallium/drivers/nouveau/nouveau_stateobj.h new file mode 100644 index 0000000000..029b01e17d --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_stateobj.h @@ -0,0 +1,159 @@ +#ifndef __NOUVEAU_STATEOBJ_H__ +#define __NOUVEAU_STATEOBJ_H__ + +#include "util/u_debug.h" + +struct nouveau_stateobj_reloc { + struct pipe_buffer *bo; + + unsigned offset; + unsigned packet; + + unsigned data; + unsigned flags; + unsigned vor; + unsigned tor; +}; + +struct nouveau_stateobj { + int refcount; + + unsigned *push; + struct nouveau_stateobj_reloc *reloc; + + unsigned *cur; + unsigned cur_packet; + unsigned cur_reloc; +}; + +static INLINE struct nouveau_stateobj * +so_new(unsigned push, unsigned reloc) +{ + struct nouveau_stateobj *so; + + so = MALLOC(sizeof(struct nouveau_stateobj)); + so->refcount = 0; + so->push = MALLOC(sizeof(unsigned) * push); + so->reloc = MALLOC(sizeof(struct nouveau_stateobj_reloc) * reloc); + + so->cur = so->push; + so->cur_reloc = so->cur_packet = 0; + + return so; +} + +static INLINE void +so_ref(struct nouveau_stateobj *ref, struct nouveau_stateobj **pso) +{ + struct nouveau_stateobj *so = *pso; + + if (ref) { + ref->refcount++; + } + + if (so && --so->refcount <= 0) { + free(so->push); + free(so->reloc); + free(so); + } + + *pso = ref; +} + +static INLINE void +so_data(struct nouveau_stateobj *so, unsigned data) +{ + (*so->cur++) = (data); + so->cur_packet += 4; +} + +static INLINE void +so_datap(struct nouveau_stateobj *so, unsigned *data, unsigned size) +{ + so->cur_packet += (4 * size); + while (size--) + (*so->cur++) = (*data++); +} + +static INLINE void +so_method(struct nouveau_stateobj *so, struct nouveau_grobj *gr, + unsigned mthd, unsigned size) +{ + so->cur_packet = (gr->subc << 13) | (1 << 18) | (mthd - 4); + so_data(so, (gr->subc << 13) | (size << 18) | mthd); +} + +static INLINE void +so_reloc(struct nouveau_stateobj *so, struct pipe_buffer *bo, + unsigned data, unsigned flags, unsigned vor, unsigned tor) +{ + struct nouveau_stateobj_reloc *r = &so->reloc[so->cur_reloc++]; + + r->bo = bo; + r->offset = so->cur - so->push; + r->packet = so->cur_packet; + r->data = data; + r->flags = flags; + r->vor = vor; + r->tor = tor; + so_data(so, data); +} + +static INLINE void +so_dump(struct nouveau_stateobj *so) +{ + unsigned i, nr = so->cur - so->push; + + for (i = 0; i < nr; i++) + debug_printf("+0x%04x: 0x%08x\n", i, so->push[i]); +} + +static INLINE void +so_emit(struct nouveau_winsys *nvws, struct nouveau_stateobj *so) +{ + struct nouveau_pushbuf *pb = nvws->channel->pushbuf; + unsigned nr, i; + + nr = so->cur - so->push; + if (pb->remaining < nr) + nvws->push_flush(nvws, nr, NULL); + pb->remaining -= nr; + + memcpy(pb->cur, so->push, nr * 4); + for (i = 0; i < so->cur_reloc; i++) { + struct nouveau_stateobj_reloc *r = &so->reloc[i]; + + nvws->push_reloc(nvws, pb->cur + r->offset, r->bo, + r->data, r->flags, r->vor, r->tor); + } + pb->cur += nr; +} + +static INLINE void +so_emit_reloc_markers(struct nouveau_winsys *nvws, struct nouveau_stateobj *so) +{ + struct nouveau_pushbuf *pb = nvws->channel->pushbuf; + unsigned i; + + if (!so) + return; + + i = so->cur_reloc << 1; + if (nvws->channel->pushbuf->remaining < i) + nvws->push_flush(nvws, i, NULL); + nvws->channel->pushbuf->remaining -= i; + + for (i = 0; i < so->cur_reloc; i++) { + struct nouveau_stateobj_reloc *r = &so->reloc[i]; + + nvws->push_reloc(nvws, pb->cur++, r->bo, r->packet, + (r->flags & (NOUVEAU_BO_VRAM | + NOUVEAU_BO_GART | + NOUVEAU_BO_RDWR)) | + NOUVEAU_BO_DUMMY, 0, 0); + nvws->push_reloc(nvws, pb->cur++, r->bo, r->data, + r->flags | NOUVEAU_BO_DUMMY, r->vor, r->tor); + } +} + +#endif diff --git a/src/gallium/drivers/nouveau/nouveau_util.h b/src/gallium/drivers/nouveau/nouveau_util.h new file mode 100644 index 0000000000..a10114beab --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_util.h @@ -0,0 +1,91 @@ +#ifndef __NOUVEAU_UTIL_H__ +#define __NOUVEAU_UTIL_H__ + +/* Determine how many vertices can be pushed into the command stream. + * Where the remaining space isn't large enough to represent all verices, + * split the buffer at primitive boundaries. + * + * Returns a count of vertices that can be rendered, and an index to + * restart drawing at after a flush. + */ +static INLINE unsigned +nouveau_vbuf_split(unsigned remaining, unsigned overhead, unsigned vpp, + unsigned mode, unsigned start, unsigned count, + unsigned *restart) +{ + int max, adj = 0; + + max = remaining - overhead; + if (max < 0) + return 0; + + max *= vpp; + if (max >= count) + return count; + + switch (mode) { + case PIPE_PRIM_POINTS: + break; + case PIPE_PRIM_LINES: + max = max & 1; + break; + case PIPE_PRIM_TRIANGLES: + max = max - (max % 3); + break; + case PIPE_PRIM_QUADS: + max = max & 3; + break; + case PIPE_PRIM_LINE_LOOP: + case PIPE_PRIM_LINE_STRIP: + if (max < 2) + max = 0; + adj = 1; + break; + case PIPE_PRIM_POLYGON: + case PIPE_PRIM_TRIANGLE_STRIP: + case PIPE_PRIM_TRIANGLE_FAN: + if (max < 3) + max = 0; + adj = 2; + break; + case PIPE_PRIM_QUAD_STRIP: + if (max < 4) + max = 0; + adj = 3; + break; + default: + assert(0); + } + + *restart = start + max - adj; + return max; +} + +/* Integer base-2 logarithm, rounded towards zero. */ +static INLINE unsigned log2i(unsigned i) +{ + unsigned r = 0; + + if (i & 0xffff0000) { + i >>= 16; + r += 16; + } + if (i & 0x0000ff00) { + i >>= 8; + r += 8; + } + if (i & 0x000000f0) { + i >>= 4; + r += 4; + } + if (i & 0x0000000c) { + i >>= 2; + r += 2; + } + if (i & 0x00000002) { + r += 1; + } + return r; +} + +#endif diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h new file mode 100644 index 0000000000..4fcadbae3f --- /dev/null +++ b/src/gallium/drivers/nouveau/nouveau_winsys.h @@ -0,0 +1,101 @@ +#ifndef NOUVEAU_WINSYS_H +#define NOUVEAU_WINSYS_H + +#include <stdint.h> +#include "pipe/internal/p_winsys_screen.h" +#include "pipe/p_defines.h" + +#include "nouveau/nouveau_bo.h" +#include "nouveau/nouveau_channel.h" +#include "nouveau/nouveau_class.h" +#include "nouveau/nouveau_device.h" +#include "nouveau/nouveau_grobj.h" +#include "nouveau/nouveau_notifier.h" +#include "nouveau/nouveau_resource.h" +#include "nouveau/nouveau_pushbuf.h" + +#define NOUVEAU_CAP_HW_VTXBUF (0xbeef0000) +#define NOUVEAU_CAP_HW_IDXBUF (0xbeef0001) + +#define NOUVEAU_TEXTURE_USAGE_LINEAR (1 << 16) + +#define NOUVEAU_BUFFER_USAGE_TEXTURE (1 << 16) +#define NOUVEAU_BUFFER_USAGE_ZETA (1 << 17) +#define NOUVEAU_BUFFER_USAGE_TRANSFER (1 << 18) + +struct nouveau_winsys { + struct nouveau_context *nv; + + struct nouveau_channel *channel; + + int (*res_init)(struct nouveau_resource **heap, unsigned start, + unsigned size); + int (*res_alloc)(struct nouveau_resource *heap, int size, void *priv, + struct nouveau_resource **); + void (*res_free)(struct nouveau_resource **); + + int (*push_reloc)(struct nouveau_winsys *, void *ptr, + struct pipe_buffer *, uint32_t data, + uint32_t flags, uint32_t vor, uint32_t tor); + int (*push_flush)(struct nouveau_winsys *, unsigned size, + struct pipe_fence_handle **fence); + + int (*grobj_alloc)(struct nouveau_winsys *, int grclass, + struct nouveau_grobj **); + void (*grobj_free)(struct nouveau_grobj **); + + int (*notifier_alloc)(struct nouveau_winsys *, int count, + struct nouveau_notifier **); + void (*notifier_free)(struct nouveau_notifier **); + void (*notifier_reset)(struct nouveau_notifier *, int id); + uint32_t (*notifier_status)(struct nouveau_notifier *, int id); + uint32_t (*notifier_retval)(struct nouveau_notifier *, int id); + int (*notifier_wait)(struct nouveau_notifier *, int id, + int status, double timeout); + + int (*surface_copy)(struct nouveau_winsys *, struct pipe_surface *, + unsigned, unsigned, struct pipe_surface *, + unsigned, unsigned, unsigned, unsigned); + int (*surface_fill)(struct nouveau_winsys *, struct pipe_surface *, + unsigned, unsigned, unsigned, unsigned, unsigned); + + struct nouveau_bo *(*get_bo)(struct pipe_buffer *); +}; + +extern struct pipe_screen * +nv04_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *); + +extern struct pipe_context * +nv04_create(struct pipe_screen *, unsigned pctx_id); + +extern struct pipe_screen * +nv10_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *); + +extern struct pipe_context * +nv10_create(struct pipe_screen *, unsigned pctx_id); + +extern struct pipe_screen * +nv20_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *); + +extern struct pipe_context * +nv20_create(struct pipe_screen *, unsigned pctx_id); + +extern struct pipe_screen * +nv30_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *); + +extern struct pipe_context * +nv30_create(struct pipe_screen *, unsigned pctx_id); + +extern struct pipe_screen * +nv40_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *); + +extern struct pipe_context * +nv40_create(struct pipe_screen *, unsigned pctx_id); + +extern struct pipe_screen * +nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *); + +extern struct pipe_context * +nv50_create(struct pipe_screen *, unsigned pctx_id); + +#endif diff --git a/src/gallium/drivers/nv04/Makefile b/src/gallium/drivers/nv04/Makefile new file mode 100644 index 0000000000..cf9deea851 --- /dev/null +++ b/src/gallium/drivers/nv04/Makefile @@ -0,0 +1,20 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = nv04 + +C_SOURCES = \ + nv04_surface_2d.c \ + nv04_clear.c \ + nv04_context.c \ + nv04_fragprog.c \ + nv04_fragtex.c \ + nv04_miptree.c \ + nv04_prim_vbuf.c \ + nv04_screen.c \ + nv04_state.c \ + nv04_state_emit.c \ + nv04_surface.c \ + nv04_vbo.c + +include ../../Makefile.template diff --git a/src/gallium/drivers/nv04/nv04_clear.c b/src/gallium/drivers/nv04/nv04_clear.c new file mode 100644 index 0000000000..01cacd36fe --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_clear.c @@ -0,0 +1,12 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "nv04_context.h" + +void +nv04_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue) +{ + pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); +} diff --git a/src/gallium/drivers/nv04/nv04_context.c b/src/gallium/drivers/nv04/nv04_context.c new file mode 100644 index 0000000000..d6710cd892 --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_context.c @@ -0,0 +1,107 @@ +#include "draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/internal/p_winsys_screen.h" + +#include "nv04_context.h" +#include "nv04_screen.h" + +static void +nv04_flush(struct pipe_context *pipe, unsigned flags, + struct pipe_fence_handle **fence) +{ + struct nv04_context *nv04 = nv04_context(pipe); + + draw_flush(nv04->draw); + + FIRE_RING(fence); +} + +static void +nv04_destroy(struct pipe_context *pipe) +{ + struct nv04_context *nv04 = nv04_context(pipe); + + if (nv04->draw) + draw_destroy(nv04->draw); + + FREE(nv04); +} + +static void +nv04_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) +{ +} + +static boolean +nv04_init_hwctx(struct nv04_context *nv04) +{ + // requires a valid handle +// BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_NOTIFY, 1); +// OUT_RING(0); + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_NOP, 1); + OUT_RING(0); + + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1); + OUT_RING(0x40182800); +// OUT_RING(1<<20/*no cull*/); + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_BLEND, 1); +// OUT_RING(0x24|(1<<6)|(1<<8)); + OUT_RING(0x120001a4); + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FORMAT, 1); + OUT_RING(0x332213a1); + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FILTER, 1); + OUT_RING(0x11001010); + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_COLORKEY, 1); + OUT_RING(0x0); +// BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 1); +// OUT_RING(SCREEN_OFFSET); + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_FOGCOLOR, 1); + OUT_RING(0xff000000); + + + + FIRE_RING (NULL); + return TRUE; +} + +struct pipe_context * +nv04_create(struct pipe_screen *pscreen, unsigned pctx_id) +{ + struct nv04_screen *screen = nv04_screen(pscreen); + struct pipe_winsys *ws = pscreen->winsys; + struct nv04_context *nv04; + struct nouveau_winsys *nvws = screen->nvws; + + nv04 = CALLOC(1, sizeof(struct nv04_context)); + if (!nv04) + return NULL; + nv04->screen = screen; + nv04->pctx_id = pctx_id; + + nv04->nvws = nvws; + + nv04->pipe.winsys = ws; + nv04->pipe.screen = pscreen; + nv04->pipe.destroy = nv04_destroy; + nv04->pipe.set_edgeflags = nv04_set_edgeflags; + nv04->pipe.draw_arrays = nv04_draw_arrays; + nv04->pipe.draw_elements = nv04_draw_elements; + nv04->pipe.clear = nv04_clear; + nv04->pipe.flush = nv04_flush; + + nv04_init_surface_functions(nv04); + nv04_init_state_functions(nv04); + + nv04->draw = draw_create(); + assert(nv04->draw); + draw_wide_point_threshold(nv04->draw, 0.0); + draw_wide_line_threshold(nv04->draw, 0.0); + draw_enable_line_stipple(nv04->draw, FALSE); + draw_enable_point_sprites(nv04->draw, FALSE); + draw_set_rasterize_stage(nv04->draw, nv04_draw_vbuf_stage(nv04)); + + nv04_init_hwctx(nv04); + + return &nv04->pipe; +} + diff --git a/src/gallium/drivers/nv04/nv04_context.h b/src/gallium/drivers/nv04/nv04_context.h new file mode 100644 index 0000000000..2842b2c90d --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_context.h @@ -0,0 +1,151 @@ +#ifndef __NV04_CONTEXT_H__ +#define __NV04_CONTEXT_H__ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_compiler.h" + +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "draw/draw_vertex.h" + +#include "nouveau/nouveau_winsys.h" +#include "nouveau/nouveau_gldefs.h" + +#define NOUVEAU_PUSH_CONTEXT(ctx) \ + struct nv04_screen *ctx = nv04->screen +#include "nouveau/nouveau_push.h" + +#include "nv04_state.h" + +#define NOUVEAU_ERR(fmt, args...) \ + fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); +#define NOUVEAU_MSG(fmt, args...) \ + fprintf(stderr, "nouveau: "fmt, ##args); + +#include "nv04_screen.h" + +#define NV04_NEW_VERTPROG (1 << 1) +#define NV04_NEW_FRAGPROG (1 << 2) +#define NV04_NEW_BLEND (1 << 3) +#define NV04_NEW_RAST (1 << 4) +#define NV04_NEW_CONTROL (1 << 5) +#define NV04_NEW_VIEWPORT (1 << 6) +#define NV04_NEW_SAMPLER (1 << 7) +#define NV04_NEW_FRAMEBUFFER (1 << 8) +#define NV04_NEW_VTXARRAYS (1 << 9) + +struct nv04_context { + struct pipe_context pipe; + + struct nouveau_winsys *nvws; + struct nv04_screen *screen; + unsigned pctx_id; + + struct draw_context *draw; + + int chipset; + struct nouveau_notifier *sync; + + uint32_t dirty; + + struct nv04_blend_state *blend; + struct nv04_sampler_state *sampler[PIPE_MAX_SAMPLERS]; + struct nv04_fragtex_state fragtex; + struct nv04_rasterizer_state *rast; + struct nv04_depth_stencil_alpha_state *dsa; + + struct nv04_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; + unsigned dirty_samplers; + unsigned fp_samplers; + unsigned vp_samplers; + + uint32_t rt_enable; + struct pipe_framebuffer_state *framebuffer; + struct pipe_surface *rt; + struct pipe_surface *zeta; + + struct { + struct pipe_buffer *buffer; + uint32_t format; + } tex[16]; + + unsigned vb_enable; + struct { + struct pipe_buffer *buffer; + unsigned delta; + } vb[16]; + + float *constbuf[PIPE_SHADER_TYPES][32][4]; + unsigned constbuf_nr[PIPE_SHADER_TYPES]; + + struct vertex_info vertex_info; + struct { + + struct nouveau_resource *exec_heap; + struct nouveau_resource *data_heap; + + struct nv04_vertex_program *active; + + struct nv04_vertex_program *current; + struct pipe_buffer *constant_buf; + } vertprog; + + struct { + struct nv04_fragment_program *active; + + struct nv04_fragment_program *current; + struct pipe_buffer *constant_buf; + } fragprog; + + struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; + struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS]; + + struct pipe_viewport_state viewport; +}; + +static INLINE struct nv04_context * +nv04_context(struct pipe_context *pipe) +{ + return (struct nv04_context *)pipe; +} + +extern void nv04_init_state_functions(struct nv04_context *nv04); +extern void nv04_init_surface_functions(struct nv04_context *nv04); +extern void nv04_screen_init_miptree_functions(struct pipe_screen *screen); + +/* nv04_clear.c */ +extern void nv04_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue); + +/* nv04_draw.c */ +extern struct draw_stage *nv04_draw_render_stage(struct nv04_context *nv04); + +/* nv04_fragprog.c */ +extern void nv04_fragprog_bind(struct nv04_context *, + struct nv04_fragment_program *); +extern void nv04_fragprog_destroy(struct nv04_context *, + struct nv04_fragment_program *); + +/* nv04_fragtex.c */ +extern void nv04_fragtex_bind(struct nv04_context *); + +/* nv04_prim_vbuf.c */ +struct draw_stage *nv04_draw_vbuf_stage( struct nv04_context *nv04 ); + +/* nv04_state.c and friends */ +extern void nv04_emit_hw_state(struct nv04_context *nv04); +extern void nv04_state_tex_update(struct nv04_context *nv04); + +/* nv04_vbo.c */ +extern boolean nv04_draw_arrays(struct pipe_context *, unsigned mode, + unsigned start, unsigned count); +extern boolean nv04_draw_elements( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned prim, unsigned start, unsigned count); + + +#endif diff --git a/src/gallium/drivers/nv04/nv04_fragprog.c b/src/gallium/drivers/nv04/nv04_fragprog.c new file mode 100644 index 0000000000..8a2af41fe0 --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_fragprog.c @@ -0,0 +1,21 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" + +#include "nv04_context.h" + +void +nv04_fragprog_bind(struct nv04_context *nv04, struct nv04_fragment_program *fp) +{ +} + +void +nv04_fragprog_destroy(struct nv04_context *nv04, + struct nv04_fragment_program *fp) +{ +} + diff --git a/src/gallium/drivers/nv04/nv04_fragtex.c b/src/gallium/drivers/nv04/nv04_fragtex.c new file mode 100644 index 0000000000..21f990fd53 --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_fragtex.c @@ -0,0 +1,73 @@ +#include "nv04_context.h" +#include "nouveau/nouveau_util.h" + +#define _(m,tf) \ +{ \ + PIPE_FORMAT_##m, \ + NV04_DX5_TEXTURED_TRIANGLE_FORMAT_COLOR_##tf, \ +} + +struct nv04_texture_format { + uint pipe; + int format; +}; + +static struct nv04_texture_format +nv04_texture_formats[] = { + _(A8R8G8B8_UNORM, A8R8G8B8), + _(X8R8G8B8_UNORM, X8R8G8B8), + _(A1R5G5B5_UNORM, A1R5G5B5), + _(A4R4G4B4_UNORM, A4R4G4B4), + _(L8_UNORM, Y8 ), + _(A8_UNORM, Y8 ), +}; + +static uint32_t +nv04_fragtex_format(uint pipe_format) +{ + struct nv04_texture_format *tf = nv04_texture_formats; + int i; + + for (i=0; i< sizeof(nv04_texture_formats)/sizeof(nv04_texture_formats[0]); i++) { + if (tf->pipe == pipe_format) + return tf->format; + tf++; + } + + NOUVEAU_ERR("unknown texture format %s\n", pf_name(pipe_format)); + return 0; +} + + +static void +nv04_fragtex_build(struct nv04_context *nv04, int unit) +{ + struct nv04_miptree *nv04mt = nv04->tex_miptree[unit]; + struct pipe_texture *pt = &nv04mt->base; + + switch (pt->target) { + case PIPE_TEXTURE_2D: + break; + default: + NOUVEAU_ERR("Unknown target %d\n", pt->target); + return; + } + + nv04->fragtex.format = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_ZOH_CORNER + | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CORNER + | nv04_fragtex_format(pt->format) + | ( (pt->last_level + 1) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS_SHIFT ) + | ( log2i(pt->width[0]) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT ) + | ( log2i(pt->height[0]) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT ) + | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE + | NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_EDGE + ; +} + + +void +nv04_fragtex_bind(struct nv04_context *nv04) +{ + nv04_fragtex_build(nv04, 0); +} + diff --git a/src/gallium/drivers/nv04/nv04_miptree.c b/src/gallium/drivers/nv04/nv04_miptree.c new file mode 100644 index 0000000000..993c5ef5dd --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_miptree.c @@ -0,0 +1,177 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "nv04_context.h" +#include "nv04_screen.h" + +static void +nv04_miptree_layout(struct nv04_miptree *nv04mt) +{ + struct pipe_texture *pt = &nv04mt->base; + uint width = pt->width[0], height = pt->height[0]; + uint offset = 0; + int nr_faces, l; + + nr_faces = 1; + + for (l = 0; l <= pt->last_level; l++) { + pt->width[l] = width; + pt->height[l] = height; + + pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); + pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); + + nv04mt->level[l].pitch = pt->width[0]; + nv04mt->level[l].pitch = (nv04mt->level[l].pitch + 63) & ~63; + + width = MAX2(1, width >> 1); + height = MAX2(1, height >> 1); + } + + for (l = 0; l <= pt->last_level; l++) { + + nv04mt->level[l].image_offset = offset; + offset += nv04mt->level[l].pitch * pt->height[l]; + } + + nv04mt->total_size = offset; +} + +static struct pipe_texture * +nv04_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) +{ + struct pipe_winsys *ws = pscreen->winsys; + struct nv04_miptree *mt; + + mt = MALLOC(sizeof(struct nv04_miptree)); + if (!mt) + return NULL; + mt->base = *pt; + mt->base.refcount = 1; + mt->base.screen = pscreen; + mt->shadow_tex = NULL; + mt->shadow_surface = NULL; + + //mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + + nv04_miptree_layout(mt); + + mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL | + NOUVEAU_BUFFER_USAGE_TEXTURE, + mt->total_size); + if (!mt->buffer) { + printf("failed %d byte alloc\n",mt->total_size); + FREE(mt); + return NULL; + } + + return &mt->base; +} + +static struct pipe_texture * +nv04_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, + const unsigned *stride, struct pipe_buffer *pb) +{ + struct nv04_miptree *mt; + + /* Only supports 2D, non-mipmapped textures for the moment */ + if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || + pt->depth[0] != 1) + return NULL; + + mt = CALLOC_STRUCT(nv04_miptree); + if (!mt) + return NULL; + + mt->base = *pt; + mt->base.refcount = 1; + mt->base.screen = pscreen; + mt->level[0].pitch = stride[0]; + mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); + + pipe_buffer_reference(pscreen, &mt->buffer, pb); + return &mt->base; +} + +static void +nv04_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **ppt) +{ + struct pipe_texture *pt = *ppt; + struct nv04_miptree *mt = (struct nv04_miptree *)pt; + int l; + + *ppt = NULL; + if (--pt->refcount) + return; + + pipe_buffer_reference(pscreen, &mt->buffer, NULL); + for (l = 0; l <= pt->last_level; l++) { + if (mt->level[l].image_offset) + FREE(mt->level[l].image_offset); + } + + if (mt->shadow_tex) { + assert(mt->shadow_surface); + pscreen->tex_surface_release(pscreen, &mt->shadow_surface); + nv04_miptree_release(pscreen, &mt->shadow_tex); + } + + FREE(mt); +} + +static struct pipe_surface * +nv04_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice, + unsigned flags) +{ + struct nv04_miptree *nv04mt = (struct nv04_miptree *)pt; + struct pipe_surface *ps; + + ps = CALLOC_STRUCT(pipe_surface); + if (!ps) + return NULL; + pipe_texture_reference(&ps->texture, pt); + ps->format = pt->format; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->block = pt->block; + ps->nblocksx = pt->nblocksx[level]; + ps->nblocksy = pt->nblocksy[level]; + ps->stride = nv04mt->level[level].pitch; + ps->usage = flags; + ps->status = PIPE_SURFACE_STATUS_DEFINED; + ps->refcount = 1; + ps->face = face; + ps->level = level; + ps->zslice = zslice; + + ps->offset = nv04mt->level[level].image_offset; + + return ps; +} + +static void +nv04_miptree_surface_del(struct pipe_screen *pscreen, + struct pipe_surface **psurface) +{ + struct pipe_surface *ps = *psurface; + + *psurface = NULL; + if (--ps->refcount > 0) + return; + + pipe_texture_reference(&ps->texture, NULL); + FREE(ps); +} + +void +nv04_screen_init_miptree_functions(struct pipe_screen *pscreen) +{ + pscreen->texture_create = nv04_miptree_create; + pscreen->texture_blanket = nv04_miptree_blanket; + pscreen->texture_release = nv04_miptree_release; + pscreen->get_tex_surface = nv04_miptree_surface_new; + pscreen->tex_surface_release = nv04_miptree_surface_del; +} + diff --git a/src/gallium/drivers/nv04/nv04_prim_vbuf.c b/src/gallium/drivers/nv04/nv04_prim_vbuf.c new file mode 100644 index 0000000000..f6458232ae --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_prim_vbuf.c @@ -0,0 +1,321 @@ + +#include "util/u_debug.h" +#include "pipe/p_inlines.h" +#include "pipe/internal/p_winsys_screen.h" +#include "pipe/p_compiler.h" + +#include "draw/draw_vbuf.h" + +#include "nv04_context.h" +#include "nv04_state.h" + +#define VERTEX_SIZE 40 +#define VERTEX_BUFFER_SIZE (4096*VERTEX_SIZE) // 4096 vertices of 40 bytes each + +/** + * Primitive renderer for nv04. + */ +struct nv04_vbuf_render { + struct vbuf_render base; + + struct nv04_context *nv04; + + /** Vertex buffer */ + unsigned char* buffer; + + /** Vertex size in bytes */ + unsigned vertex_size; + + /** Current primitive */ + unsigned prim; +}; + + +/** + * Basically a cast wrapper. + */ +static INLINE struct nv04_vbuf_render * +nv04_vbuf_render( struct vbuf_render *render ) +{ + assert(render); + return (struct nv04_vbuf_render *)render; +} + + +static const struct vertex_info * +nv04_vbuf_render_get_vertex_info( struct vbuf_render *render ) +{ + struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render); + struct nv04_context *nv04 = nv04_render->nv04; + return &nv04->vertex_info; +} + + +static boolean +nv04_vbuf_render_allocate_vertices( struct vbuf_render *render, + ushort vertex_size, + ushort nr_vertices ) +{ + struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render); + + nv04_render->buffer = (unsigned char*) MALLOC(VERTEX_BUFFER_SIZE); + assert(!nv04_render->buffer); + + return nv04_render->buffer ? TRUE : FALSE; +} + +static void * +nv04_vbuf_render_map_vertices( struct vbuf_render *render ) +{ + struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render); + return nv04_render->buffer; +} + +static void +nv04_vbuf_render_unmap_vertices( struct vbuf_render *render, + ushort min_index, + ushort max_index ) +{ +} + +static boolean +nv04_vbuf_render_set_primitive( struct vbuf_render *render, + unsigned prim ) +{ + struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render); + + if (prim <= PIPE_PRIM_LINE_STRIP) + return FALSE; + + nv04_render->prim = prim; + return TRUE; +} + +static INLINE void nv04_2triangles(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5) +{ + BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xA),49); + OUT_RINGp(buffer + VERTEX_SIZE * v0,8); + OUT_RINGp(buffer + VERTEX_SIZE * v1,8); + OUT_RINGp(buffer + VERTEX_SIZE * v2,8); + OUT_RINGp(buffer + VERTEX_SIZE * v3,8); + OUT_RINGp(buffer + VERTEX_SIZE * v4,8); + OUT_RINGp(buffer + VERTEX_SIZE * v5,8); + OUT_RING(0xFEDCBA); +} + +static INLINE void nv04_1triangle(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2) +{ + BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xD),25); + OUT_RINGp(buffer + VERTEX_SIZE * v0,8); + OUT_RINGp(buffer + VERTEX_SIZE * v1,8); + OUT_RINGp(buffer + VERTEX_SIZE * v2,8); + OUT_RING(0xFED); +} + +static INLINE void nv04_1quad(struct nv04_context* nv04, unsigned char* buffer, ushort v0, ushort v1, ushort v2, ushort v3) +{ + BEGIN_RING(fahrenheit,NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0xC),33); + OUT_RINGp(buffer + VERTEX_SIZE * v0,8); + OUT_RINGp(buffer + VERTEX_SIZE * v1,8); + OUT_RINGp(buffer + VERTEX_SIZE * v2,8); + OUT_RINGp(buffer + VERTEX_SIZE * v3,8); + OUT_RING(0xFECEDC); +} + +static void nv04_vbuf_render_triangles_elts(struct nv04_vbuf_render * render, const ushort * indices, uint nr_indices) +{ + unsigned char* buffer = render->buffer; + struct nv04_context* nv04 = render->nv04; + int i; + + for( i=0; i< nr_indices-5; i+=6) + nv04_2triangles(nv04, + buffer, + indices[i+0], + indices[i+1], + indices[i+2], + indices[i+3], + indices[i+4], + indices[i+5] + ); + if (i != nr_indices) + { + nv04_1triangle(nv04, + buffer, + indices[i+0], + indices[i+1], + indices[i+2] + ); + i+=3; + } + if (i != nr_indices) + NOUVEAU_ERR("Houston, we have lost some vertices\n"); +} + +static void nv04_vbuf_render_tri_strip_elts(struct nv04_vbuf_render* render, const ushort* indices, uint nr_indices) +{ + const uint32_t striptbl[]={0x321210,0x543432,0x765654,0x987876,0xBA9A98,0xDCBCBA,0xFEDEDC}; + unsigned char* buffer = render->buffer; + struct nv04_context* nv04 = render->nv04; + int i,j; + + for(i = 0; i<nr_indices; i+=14) + { + int numvert = MIN2(16, nr_indices - i); + int numtri = numvert - 2; + if (numvert<3) + break; + + BEGIN_RING( fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), numvert*8 ); + for(j = 0; j<numvert; j++) + OUT_RINGp( buffer + VERTEX_SIZE * indices [i+j], 8 ); + + BEGIN_RING_NI( fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE(0), (numtri+1)/2 ); + for(j = 0; j<numtri/2; j++ ) + OUT_RING(striptbl[j]); + if (numtri%2) + OUT_RING(striptbl[numtri/2]&0xFFF); + } +} + +static void nv04_vbuf_render_tri_fan_elts(struct nv04_vbuf_render* render, const ushort* indices, uint nr_indices) +{ + const uint32_t fantbl[]={0x320210,0x540430,0x760650,0x980870,0xBA0A90,0xDC0CB0,0xFE0ED0}; + unsigned char* buffer = render->buffer; + struct nv04_context* nv04 = render->nv04; + int i,j; + + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x0), 8); + OUT_RINGp(buffer + VERTEX_SIZE * indices[0], 8); + + for(i = 1; i<nr_indices; i+=14) + { + int numvert=MIN2(15, nr_indices - i); + int numtri=numvert-2; + if (numvert < 3) + break; + + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_SX(0x1), numvert*8); + + for(j=0;j<numvert;j++) + OUT_RINGp( buffer + VERTEX_SIZE * indices[ i+j ], 8 ); + + BEGIN_RING_NI(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_TLVERTEX_DRAWPRIMITIVE(0), (numtri+1)/2); + for(j = 0; j<numtri/2; j++) + OUT_RING(fantbl[j]); + if (numtri%2) + OUT_RING(fantbl[numtri/2]&0xFFF); + } +} + +static void nv04_vbuf_render_quads_elts(struct nv04_vbuf_render* render, const ushort* indices, uint nr_indices) +{ + unsigned char* buffer = render->buffer; + struct nv04_context* nv04 = render->nv04; + int i; + + for(i = 0; i < nr_indices; i += 4) + nv04_1quad(nv04, + buffer, + indices[i+0], + indices[i+1], + indices[i+2], + indices[i+3] + ); +} + + +static void +nv04_vbuf_render_draw( struct vbuf_render *render, + const ushort *indices, + uint nr_indices) +{ + struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render); + + // emit the indices + switch( nv04_render->prim ) + { + case PIPE_PRIM_TRIANGLES: + nv04_vbuf_render_triangles_elts(nv04_render, indices, nr_indices); + break; + case PIPE_PRIM_QUAD_STRIP: + case PIPE_PRIM_TRIANGLE_STRIP: + nv04_vbuf_render_tri_strip_elts(nv04_render, indices, nr_indices); + break; + case PIPE_PRIM_TRIANGLE_FAN: + case PIPE_PRIM_POLYGON: + nv04_vbuf_render_tri_fan_elts(nv04_render, indices, nr_indices); + break; + case PIPE_PRIM_QUADS: + nv04_vbuf_render_quads_elts(nv04_render, indices, nr_indices); + break; + default: + NOUVEAU_ERR("You have to implement primitive %d, young padawan\n", nv04_render->prim); + break; + } +} + + +static void +nv04_vbuf_render_release_vertices( struct vbuf_render *render ) +{ + struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render); + + free(nv04_render->buffer); + nv04_render->buffer = NULL; +} + + +static void +nv04_vbuf_render_destroy( struct vbuf_render *render ) +{ + struct nv04_vbuf_render *nv04_render = nv04_vbuf_render(render); + FREE(nv04_render); +} + + +/** + * Create a new primitive render. + */ +static struct vbuf_render * +nv04_vbuf_render_create( struct nv04_context *nv04 ) +{ + struct nv04_vbuf_render *nv04_render = CALLOC_STRUCT(nv04_vbuf_render); + + nv04_render->nv04 = nv04; + + nv04_render->base.max_vertex_buffer_bytes = VERTEX_BUFFER_SIZE; + nv04_render->base.max_indices = 65536; + nv04_render->base.get_vertex_info = nv04_vbuf_render_get_vertex_info; + nv04_render->base.allocate_vertices = nv04_vbuf_render_allocate_vertices; + nv04_render->base.map_vertices = nv04_vbuf_render_map_vertices; + nv04_render->base.unmap_vertices = nv04_vbuf_render_unmap_vertices; + nv04_render->base.set_primitive = nv04_vbuf_render_set_primitive; + nv04_render->base.draw = nv04_vbuf_render_draw; + nv04_render->base.release_vertices = nv04_vbuf_render_release_vertices; + nv04_render->base.destroy = nv04_vbuf_render_destroy; + + return &nv04_render->base; +} + + +/** + * Create a new primitive vbuf/render stage. + */ +struct draw_stage *nv04_draw_vbuf_stage( struct nv04_context *nv04 ) +{ + struct vbuf_render *render; + struct draw_stage *stage; + + render = nv04_vbuf_render_create(nv04); + if(!render) + return NULL; + + stage = draw_vbuf_stage( nv04->draw, render ); + if(!stage) { + render->destroy(render); + return NULL; + } + + return stage; +} diff --git a/src/gallium/drivers/nv04/nv04_screen.c b/src/gallium/drivers/nv04/nv04_screen.c new file mode 100644 index 0000000000..9ef38bc244 --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_screen.c @@ -0,0 +1,237 @@ +#include "pipe/p_screen.h" +#include "pipe/p_inlines.h" +#include "util/u_simple_screen.h" + +#include "nv04_context.h" +#include "nv04_screen.h" + +static const char * +nv04_screen_get_name(struct pipe_screen *screen) +{ + struct nv04_screen *nv04screen = nv04_screen(screen); + struct nouveau_device *dev = nv04screen->nvws->channel->device; + static char buffer[128]; + + snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset); + return buffer; +} + +static const char * +nv04_screen_get_vendor(struct pipe_screen *screen) +{ + return "nouveau"; +} + +static int +nv04_screen_get_param(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 1; + case PIPE_CAP_NPOT_TEXTURES: + return 0; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 0; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 0; + case PIPE_CAP_POINT_SPRITE: + return 0; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + case PIPE_CAP_OCCLUSION_QUERY: + return 0; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 0; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 10; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 0; + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 0; + case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: + return 0; + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + return 0; + case PIPE_CAP_TEXTURE_MIRROR_REPEAT: + return 1; + case NOUVEAU_CAP_HW_VTXBUF: + case NOUVEAU_CAP_HW_IDXBUF: + return 0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0; + } +} + +static float +nv04_screen_get_paramf(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 0.0; + case PIPE_CAP_MAX_POINT_WIDTH: + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 0.0; + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 0.0; + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 0.0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0.0; + } +} + +static boolean +nv04_screen_is_format_supported(struct pipe_screen *screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, unsigned geom_flags) +{ + if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_Z16_UNORM: + return TRUE; + default: + break; + } + } else { + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_A1R5G5B5_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_A8_UNORM: + return TRUE; + default: + break; + } + } + + return FALSE; +} + +static void * +nv04_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, + unsigned flags ) +{ + void *map; + struct nv04_miptree *nv04mt = (struct nv04_miptree *)surface->texture; + + map = pipe_buffer_map(screen, nv04mt->buffer, flags); + if (!map) + return NULL; + + return map + surface->offset; +} + +static void +nv04_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) +{ + struct nv04_miptree *nv04mt = (struct nv04_miptree *)surface->texture; + + pipe_buffer_unmap(screen, nv04mt->buffer); +} + +static void +nv04_screen_destroy(struct pipe_screen *pscreen) +{ + struct nv04_screen *screen = nv04_screen(pscreen); + struct nouveau_winsys *nvws = screen->nvws; + + nvws->notifier_free(&screen->sync); + nvws->grobj_free(&screen->fahrenheit); + nv04_surface_2d_takedown(&screen->eng2d); + + FREE(pscreen); +} + +static struct pipe_buffer * +nv04_surface_buffer(struct pipe_surface *surf) +{ + struct nv04_miptree *mt = (struct nv04_miptree *)surf->texture; + + return mt->buffer; +} + +struct pipe_screen * +nv04_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) +{ + struct nv04_screen *screen = CALLOC_STRUCT(nv04_screen); + unsigned fahrenheit_class = 0, sub3d_class = 0; + unsigned chipset = nvws->channel->device->chipset; + int ret; + + if (!screen) + return NULL; + screen->nvws = nvws; + + if (chipset>=0x20) { + fahrenheit_class = 0; + sub3d_class = 0; + } else if (chipset>=0x10) { + fahrenheit_class = NV10_DX5_TEXTURED_TRIANGLE; + sub3d_class = NV10_CONTEXT_SURFACES_3D; + } else { + fahrenheit_class=NV04_DX5_TEXTURED_TRIANGLE; + sub3d_class = NV04_CONTEXT_SURFACES_3D; + } + + if (!fahrenheit_class) { + NOUVEAU_ERR("Unknown nv04 chipset: nv%02x\n", chipset); + return NULL; + } + + /* 2D engine setup */ + screen->eng2d = nv04_surface_2d_init(nvws); + screen->eng2d->buf = nv04_surface_buffer; + + /* 3D object */ + ret = nvws->grobj_alloc(nvws, fahrenheit_class, &screen->fahrenheit); + if (ret) { + NOUVEAU_ERR("Error creating 3D object: %d\n", ret); + return NULL; + } + + /* 3D surface object */ + ret = nvws->grobj_alloc(nvws, sub3d_class, &screen->context_surfaces_3d); + if (ret) { + NOUVEAU_ERR("Error creating 3D surface object: %d\n", ret); + return NULL; + } + + /* Notifier for sync purposes */ + ret = nvws->notifier_alloc(nvws, 1, &screen->sync); + if (ret) { + NOUVEAU_ERR("Error creating notifier object: %d\n", ret); + nv04_screen_destroy(&screen->pipe); + return NULL; + } + + screen->pipe.winsys = ws; + screen->pipe.destroy = nv04_screen_destroy; + + screen->pipe.get_name = nv04_screen_get_name; + screen->pipe.get_vendor = nv04_screen_get_vendor; + screen->pipe.get_param = nv04_screen_get_param; + screen->pipe.get_paramf = nv04_screen_get_paramf; + + screen->pipe.is_format_supported = nv04_screen_is_format_supported; + + screen->pipe.surface_map = nv04_surface_map; + screen->pipe.surface_unmap = nv04_surface_unmap; + + nv04_screen_init_miptree_functions(&screen->pipe); + u_simple_screen_init(&screen->pipe); + + return &screen->pipe; +} + diff --git a/src/gallium/drivers/nv04/nv04_screen.h b/src/gallium/drivers/nv04/nv04_screen.h new file mode 100644 index 0000000000..540aec907b --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_screen.h @@ -0,0 +1,27 @@ +#ifndef __NV04_SCREEN_H__ +#define __NV04_SCREEN_H__ + +#include "pipe/p_screen.h" +#include "nv04_surface_2d.h" + +struct nv04_screen { + struct pipe_screen pipe; + + struct nouveau_winsys *nvws; + unsigned chipset; + + /* HW graphics objects */ + struct nv04_surface_2d *eng2d; + struct nouveau_grobj *fahrenheit; + struct nouveau_grobj *context_surfaces_3d; + struct nouveau_notifier *sync; + +}; + +static INLINE struct nv04_screen * +nv04_screen(struct pipe_screen *screen) +{ + return (struct nv04_screen *)screen; +} + +#endif diff --git a/src/gallium/drivers/nv04/nv04_state.c b/src/gallium/drivers/nv04/nv04_state.c new file mode 100644 index 0000000000..87c635f962 --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_state.c @@ -0,0 +1,458 @@ +#include "draw/draw_context.h" +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" + +#include "tgsi/tgsi_parse.h" + +#include "nv04_context.h" +#include "nv04_state.h" + +static void * +nv04_blend_state_create(struct pipe_context *pipe, + const struct pipe_blend_state *cso) +{ + struct nv04_blend_state *cb; + + cb = MALLOC(sizeof(struct nv04_blend_state)); + + cb->b_enable = cso->blend_enable ? 1 : 0; + cb->b_src = ((nvgl_blend_func(cso->alpha_src_factor)<<16) | + (nvgl_blend_func(cso->rgb_src_factor))); + cb->b_dst = ((nvgl_blend_func(cso->alpha_dst_factor)<<16) | + (nvgl_blend_func(cso->rgb_dst_factor))); + + + return (void *)cb; +} + +static void +nv04_blend_state_bind(struct pipe_context *pipe, void *blend) +{ + struct nv04_context *nv04 = nv04_context(pipe); + + nv04->blend = (struct nv04_blend_state*)blend; + + nv04->dirty |= NV04_NEW_BLEND; +} + +static void +nv04_blend_state_delete(struct pipe_context *pipe, void *hwcso) +{ + free(hwcso); +} + + +static INLINE unsigned +wrap_mode(unsigned wrap) { + unsigned ret; + + switch (wrap) { + case PIPE_TEX_WRAP_REPEAT: + ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_REPEAT; + break; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_MIRRORED_REPEAT; + break; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE; + break; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_BORDER; + break; + case PIPE_TEX_WRAP_CLAMP: + ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + case PIPE_TEX_WRAP_MIRROR_CLAMP: + default: + NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); + ret = NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP; + } + return ret >> NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT; +} + +static void * +nv04_sampler_state_create(struct pipe_context *pipe, + const struct pipe_sampler_state *cso) +{ + + struct nv04_sampler_state *ss; + uint32_t filter = 0; + + ss = MALLOC(sizeof(struct nv04_sampler_state)); + + ss->format = ((wrap_mode(cso->wrap_s) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_SHIFT) | + (wrap_mode(cso->wrap_t) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_SHIFT)); + + if (cso->max_anisotropy > 1.0) { + filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MINIFY_ENABLE | NV04_DX5_TEXTURED_TRIANGLE_FILTER_ANISOTROPIC_MAGNIFY_ENABLE; + } + + switch (cso->mag_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_LINEAR; + break; + case PIPE_TEX_FILTER_NEAREST: + default: + filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MAGNIFY_NEAREST; + break; + } + + switch (cso->min_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_LINEAR; + break; + } + break; + case PIPE_TEX_FILTER_NEAREST: + default: + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + filter |= NV04_DX5_TEXTURED_TRIANGLE_FILTER_MINIFY_NEAREST; + break; + } + break; + } + + ss->filter = filter; + + return (void *)ss; +} + +static void +nv04_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) +{ + struct nv04_context *nv04 = nv04_context(pipe); + unsigned unit; + + for (unit = 0; unit < nr; unit++) { + nv04->sampler[unit] = sampler[unit]; + nv04->dirty_samplers |= (1 << unit); + } +} + +static void +nv04_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +{ + free(hwcso); +} + +static void +nv04_set_sampler_texture(struct pipe_context *pipe, unsigned nr, + struct pipe_texture **miptree) +{ + struct nv04_context *nv04 = nv04_context(pipe); + unsigned unit; + + for (unit = 0; unit < nr; unit++) { + nv04->tex_miptree[unit] = (struct nv04_miptree *)miptree[unit]; + nv04->dirty_samplers |= (1 << unit); + } +} + +static void * +nv04_rasterizer_state_create(struct pipe_context *pipe, + const struct pipe_rasterizer_state *cso) +{ + struct nv04_rasterizer_state *rs; + + /*XXX: ignored: + * scissor + * points/lines (no hw support, emulated with tris in gallium) + */ + rs = MALLOC(sizeof(struct nv04_rasterizer_state)); + + rs->blend = cso->flatshade ? NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_FLAT : NV04_DX5_TEXTURED_TRIANGLE_BLEND_SHADE_MODE_GOURAUD; + + return (void *)rs; +} + +static void +nv04_rasterizer_state_bind(struct pipe_context *pipe, void *rast) +{ + struct nv04_context *nv04 = nv04_context(pipe); + + nv04->rast = (struct nv04_rasterizer_state*)rast; + + draw_set_rasterizer_state(nv04->draw, (nv04->rast ? nv04->rast->templ : NULL)); + + nv04->dirty |= NV04_NEW_RAST | NV04_NEW_BLEND; +} + +static void +nv04_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) +{ + free(hwcso); +} + +static INLINE uint32_t nv04_compare_func(uint32_t f) +{ + switch ( f ) { + case PIPE_FUNC_NEVER: return 1; + case PIPE_FUNC_LESS: return 2; + case PIPE_FUNC_EQUAL: return 3; + case PIPE_FUNC_LEQUAL: return 4; + case PIPE_FUNC_GREATER: return 5; + case PIPE_FUNC_NOTEQUAL: return 6; + case PIPE_FUNC_GEQUAL: return 7; + case PIPE_FUNC_ALWAYS: return 8; + } + NOUVEAU_MSG("Unable to find the function\n"); + return 0; +} + +static void * +nv04_depth_stencil_alpha_state_create(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *cso) +{ + struct nv04_depth_stencil_alpha_state *hw; + + hw = MALLOC(sizeof(struct nv04_depth_stencil_alpha_state)); + + hw->control = float_to_ubyte(cso->alpha.ref_value); + hw->control |= ( nv04_compare_func(cso->alpha.func) << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_FUNC_SHIFT ); + hw->control |= cso->alpha.enabled ? NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ALPHA_TEST_ENABLE : 0; + hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_ORIGIN; + hw->control |= cso->depth.enabled ? (1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_ENABLE_SHIFT) : 0; + hw->control |= ( nv04_compare_func(cso->depth.func)<< NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FUNC_SHIFT ); + hw->control |= 1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_CULL_MODE_SHIFT; // no culling, handled by the draw module + hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_DITHER_ENABLE; + hw->control |= NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_PERSPECTIVE_ENABLE; + hw->control |= cso->depth.writemask ? (1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_WRITE_ENABLE_SHIFT) : 0; + hw->control |= 1 << NV04_DX5_TEXTURED_TRIANGLE_CONTROL_Z_FORMAT_SHIFT; // integer zbuffer format + + return (void *)hw; +} + +static void +nv04_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv04_context *nv04 = nv04_context(pipe); + + nv04->dsa = hwcso; + nv04->dirty |= NV04_NEW_CONTROL; +} + +static void +nv04_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) +{ + free(hwcso); +} + +static void * +nv04_vp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct nv04_context *nv04 = nv04_context(pipe); + + return draw_create_vertex_shader(nv04->draw, templ); +} + +static void +nv04_vp_state_bind(struct pipe_context *pipe, void *shader) +{ + struct nv04_context *nv04 = nv04_context(pipe); + + draw_bind_vertex_shader(nv04->draw, (struct draw_vertex_shader *) shader); + + nv04->dirty |= NV04_NEW_VERTPROG; +} + +static void +nv04_vp_state_delete(struct pipe_context *pipe, void *shader) +{ + struct nv04_context *nv04 = nv04_context(pipe); + + draw_delete_vertex_shader(nv04->draw, (struct draw_vertex_shader *) shader); +} + +static void * +nv04_fp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nv04_fragment_program *fp; + + fp = CALLOC(1, sizeof(struct nv04_fragment_program)); + fp->pipe.tokens = tgsi_dup_tokens(cso->tokens); + + return (void *)fp; +} + +static void +nv04_fp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv04_context *nv04 = nv04_context(pipe); + struct nv04_fragment_program *fp = hwcso; + + nv04->fragprog.current = fp; + nv04->dirty |= NV04_NEW_FRAGPROG; +} + +static void +nv04_fp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv04_context *nv04 = nv04_context(pipe); + struct nv04_fragment_program *fp = hwcso; + + nv04_fragprog_destroy(nv04, fp); + free((void*)fp->pipe.tokens); + free(fp); +} + +static void +nv04_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *bcol) +{ +} + +static void +nv04_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) +{ +} + +static void +nv04_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, + const struct pipe_constant_buffer *buf ) +{ + struct nv04_context *nv04 = nv04_context(pipe); + struct pipe_winsys *ws = pipe->winsys; + + assert(shader < PIPE_SHADER_TYPES); + assert(index == 0); + + if (buf) { + void *mapped; + if (buf->buffer && buf->buffer->size && + (mapped = ws->buffer_map(ws, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ))) + { + memcpy(nv04->constbuf[shader], mapped, buf->buffer->size); + nv04->constbuf_nr[shader] = + buf->buffer->size / (4 * sizeof(float)); + ws->buffer_unmap(ws, buf->buffer); + } + } +} + +static void +nv04_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct nv04_context *nv04 = nv04_context(pipe); + + nv04->framebuffer = (struct pipe_framebuffer_state*)fb; + + nv04->dirty |= NV04_NEW_FRAMEBUFFER; +} +static void +nv04_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ + NOUVEAU_ERR("line stipple hahaha\n"); +} + +static void +nv04_set_scissor_state(struct pipe_context *pipe, + const struct pipe_scissor_state *s) +{ +/* struct nv04_context *nv04 = nv04_context(pipe); + + // XXX + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_SCISSOR_HORIZ, 2); + OUT_RING (((s->maxx - s->minx) << 16) | s->minx); + OUT_RING (((s->maxy - s->miny) << 16) | s->miny);*/ +} + +static void +nv04_set_viewport_state(struct pipe_context *pipe, + const struct pipe_viewport_state *viewport) +{ + struct nv04_context *nv04 = nv04_context(pipe); + + nv04->viewport = *viewport; + + draw_set_viewport_state(nv04->draw, &nv04->viewport); +} + +static void +nv04_set_vertex_buffers(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_buffer *buffers) +{ + struct nv04_context *nv04 = nv04_context(pipe); + + memcpy(nv04->vtxbuf, buffers, count * sizeof(buffers[0])); + nv04->dirty |= NV04_NEW_VTXARRAYS; + + draw_set_vertex_buffers(nv04->draw, count, buffers); +} + +static void +nv04_set_vertex_elements(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_element *elements) +{ + struct nv04_context *nv04 = nv04_context(pipe); + + memcpy(nv04->vtxelt, elements, sizeof(*elements) * count); + nv04->dirty |= NV04_NEW_VTXARRAYS; + + draw_set_vertex_elements(nv04->draw, count, elements); +} + +void +nv04_init_state_functions(struct nv04_context *nv04) +{ + nv04->pipe.create_blend_state = nv04_blend_state_create; + nv04->pipe.bind_blend_state = nv04_blend_state_bind; + nv04->pipe.delete_blend_state = nv04_blend_state_delete; + + nv04->pipe.create_sampler_state = nv04_sampler_state_create; + nv04->pipe.bind_sampler_states = nv04_sampler_state_bind; + nv04->pipe.delete_sampler_state = nv04_sampler_state_delete; + nv04->pipe.set_sampler_textures = nv04_set_sampler_texture; + + nv04->pipe.create_rasterizer_state = nv04_rasterizer_state_create; + nv04->pipe.bind_rasterizer_state = nv04_rasterizer_state_bind; + nv04->pipe.delete_rasterizer_state = nv04_rasterizer_state_delete; + + nv04->pipe.create_depth_stencil_alpha_state = nv04_depth_stencil_alpha_state_create; + nv04->pipe.bind_depth_stencil_alpha_state = nv04_depth_stencil_alpha_state_bind; + nv04->pipe.delete_depth_stencil_alpha_state = nv04_depth_stencil_alpha_state_delete; + + nv04->pipe.create_vs_state = nv04_vp_state_create; + nv04->pipe.bind_vs_state = nv04_vp_state_bind; + nv04->pipe.delete_vs_state = nv04_vp_state_delete; + + nv04->pipe.create_fs_state = nv04_fp_state_create; + nv04->pipe.bind_fs_state = nv04_fp_state_bind; + nv04->pipe.delete_fs_state = nv04_fp_state_delete; + + nv04->pipe.set_blend_color = nv04_set_blend_color; + nv04->pipe.set_clip_state = nv04_set_clip_state; + nv04->pipe.set_constant_buffer = nv04_set_constant_buffer; + nv04->pipe.set_framebuffer_state = nv04_set_framebuffer_state; + nv04->pipe.set_polygon_stipple = nv04_set_polygon_stipple; + nv04->pipe.set_scissor_state = nv04_set_scissor_state; + nv04->pipe.set_viewport_state = nv04_set_viewport_state; + + nv04->pipe.set_vertex_buffers = nv04_set_vertex_buffers; + nv04->pipe.set_vertex_elements = nv04_set_vertex_elements; +} + diff --git a/src/gallium/drivers/nv04/nv04_state.h b/src/gallium/drivers/nv04/nv04_state.h new file mode 100644 index 0000000000..15d4685ec1 --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_state.h @@ -0,0 +1,74 @@ +#ifndef __NV04_STATE_H__ +#define __NV04_STATE_H__ + +#include "pipe/p_state.h" +#include "tgsi/tgsi_scan.h" + +struct nv04_blend_state { + uint32_t b_enable; + uint32_t b_src; + uint32_t b_dst; +}; + +struct nv04_fragtex_state { + uint32_t format; +}; + +struct nv04_sampler_state { + uint32_t filter; + uint32_t format; +}; + +struct nv04_depth_stencil_alpha_state { + uint32_t control; +}; + +struct nv04_rasterizer_state { + uint32_t blend; + + const struct pipe_rasterizer_state *templ; +}; + +struct nv04_miptree { + struct pipe_texture base; + + struct pipe_buffer *buffer; + uint total_size; + + struct pipe_texture *shadow_tex; + struct pipe_surface *shadow_surface; + + struct { + uint pitch; + uint image_offset; + } level[PIPE_MAX_TEXTURE_LEVELS]; +}; + +struct nv04_fragment_program_data { + unsigned offset; + unsigned index; +}; + +struct nv04_fragment_program { + struct pipe_shader_state pipe; + struct tgsi_shader_info info; + + boolean translated; + boolean on_hw; + unsigned samplers; + + uint32_t *insn; + int insn_len; + + struct nv04_fragment_program_data *consts; + unsigned nr_consts; + + struct pipe_buffer *buffer; + + uint32_t fp_control; + uint32_t fp_reg_control; +}; + + + +#endif diff --git a/src/gallium/drivers/nv04/nv04_state_emit.c b/src/gallium/drivers/nv04/nv04_state_emit.c new file mode 100644 index 0000000000..bd8ef1adbf --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_state_emit.c @@ -0,0 +1,223 @@ +#include "nv04_context.h" +#include "nv04_state.h" + +static void nv04_vertex_layout(struct pipe_context* pipe) +{ + struct nv04_context *nv04 = nv04_context(pipe); + struct nv04_fragment_program *fp = nv04->fragprog.current; + uint32_t src = 0; + int i; + struct vertex_info vinfo; + + memset(&vinfo, 0, sizeof(vinfo)); + + for (i = 0; i < fp->info.num_inputs; i++) { + int isn = fp->info.input_semantic_name[i]; + int isi = fp->info.input_semantic_index[i]; + switch (isn) { + case TGSI_SEMANTIC_POSITION: + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src++); + break; + case TGSI_SEMANTIC_COLOR: + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src++); + break; + default: + case TGSI_SEMANTIC_GENERIC: + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++); + break; + case TGSI_SEMANTIC_FOG: + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++); + break; + } + } + + printf("%d vertex input\n",fp->info.num_inputs); + draw_compute_vertex_size(&vinfo); +} + +static uint32_t nv04_blend_func(uint32_t f) +{ + switch ( f ) { + case PIPE_BLENDFACTOR_ZERO: return 0x1; + case PIPE_BLENDFACTOR_ONE: return 0x2; + case PIPE_BLENDFACTOR_SRC_COLOR: return 0x3; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: return 0x4; + case PIPE_BLENDFACTOR_SRC_ALPHA: return 0x5; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: return 0x6; + case PIPE_BLENDFACTOR_DST_ALPHA: return 0x7; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: return 0x8; + case PIPE_BLENDFACTOR_DST_COLOR: return 0x9; + case PIPE_BLENDFACTOR_INV_DST_COLOR: return 0xA; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return 0xB; + } + NOUVEAU_MSG("Unable to find the blend function 0x%x\n",f); + return 0; +} + +static void nv04_emit_control(struct nv04_context* nv04) +{ + uint32_t control = nv04->dsa->control; + + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1); + OUT_RING(control); +} + +static void nv04_emit_blend(struct nv04_context* nv04) +{ + uint32_t blend; + + blend=0x4; // texture MODULATE_ALPHA + blend|=0x20; // alpha is MSB + blend|=(2<<6); // flat shading + blend|=(1<<8); // persp correct + blend|=(0<<16); // no fog + blend|=(nv04->blend->b_enable<<20); + blend|=(nv04_blend_func(nv04->blend->b_src)<<24); + blend|=(nv04_blend_func(nv04->blend->b_dst)<<28); + + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_BLEND, 1); + OUT_RING(blend); +} + +static void nv04_emit_sampler(struct nv04_context *nv04, int unit) +{ + struct nv04_miptree *nv04mt = nv04->tex_miptree[unit]; + struct pipe_texture *pt = &nv04mt->base; + + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 3); + OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCd(nv04mt->buffer, (nv04->fragtex.format | nv04->sampler[unit]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); + OUT_RING(nv04->sampler[unit]->filter); +} + +static void nv04_state_emit_framebuffer(struct nv04_context* nv04) +{ + struct pipe_framebuffer_state* fb = nv04->framebuffer; + struct pipe_surface *rt, *zeta; + uint32_t rt_format, w, h; + int colour_format = 0, zeta_format = 0; + struct nv04_miptree *nv04mt = 0; + + w = fb->cbufs[0]->width; + h = fb->cbufs[0]->height; + colour_format = fb->cbufs[0]->format; + rt = fb->cbufs[0]; + + if (fb->zsbuf) { + if (colour_format) { + assert(w == fb->zsbuf->width); + assert(h == fb->zsbuf->height); + } else { + w = fb->zsbuf->width; + h = fb->zsbuf->height; + } + + zeta_format = fb->zsbuf->format; + zeta = fb->zsbuf; + } + + switch (colour_format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case 0: + rt_format = 0x108; + break; + case PIPE_FORMAT_R5G6B5_UNORM: + rt_format = 0x103; + break; + default: + assert(0); + } + + BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_FORMAT, 1); + OUT_RING(rt_format); + + nv04mt = (struct nv04_miptree *)rt->texture; + /* FIXME pitches have to be aligned ! */ + BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2); + OUT_RING(rt->stride|(zeta->stride<<16)); + OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + if (fb->zsbuf) { + nv04mt = (struct nv04_miptree *)zeta->texture; + BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1); + OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + } +} + +void +nv04_emit_hw_state(struct nv04_context *nv04) +{ + int i; + + if (nv04->dirty & NV04_NEW_VERTPROG) { + //nv04_vertprog_bind(nv04, nv04->vertprog.current); + nv04->dirty &= ~NV04_NEW_VERTPROG; + } + + if (nv04->dirty & NV04_NEW_FRAGPROG) { + nv04_fragprog_bind(nv04, nv04->fragprog.current); + nv04->dirty &= ~NV04_NEW_FRAGPROG; + nv04->dirty_samplers |= (1<<10); + nv04->dirty_samplers = 0; + } + + if (nv04->dirty & NV04_NEW_CONTROL) { + nv04->dirty &= ~NV04_NEW_CONTROL; + + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_CONTROL, 1); + OUT_RING(nv04->dsa->control); + } + + if (nv04->dirty & NV04_NEW_BLEND) { + nv04->dirty &= ~NV04_NEW_BLEND; + + nv04_emit_blend(nv04); + } + + if (nv04->dirty & NV04_NEW_VTXARRAYS) { + nv04->dirty &= ~NV04_NEW_VTXARRAYS; + nv04_vertex_layout(nv04); + } + + if (nv04->dirty & NV04_NEW_SAMPLER) { + nv04->dirty &= ~NV04_NEW_SAMPLER; + + nv04_emit_sampler(nv04, 0); + } + + if (nv04->dirty & NV04_NEW_VIEWPORT) { + nv04->dirty &= ~NV04_NEW_VIEWPORT; +// nv04_state_emit_viewport(nv04); + } + + if (nv04->dirty & NV04_NEW_FRAMEBUFFER) { + nv04->dirty &= ~NV04_NEW_FRAMEBUFFER; + nv04_state_emit_framebuffer(nv04); + } + + /* Emit relocs for every referenced buffer. + * This is to ensure the bufmgr has an accurate idea of how + * the buffer is used. This isn't very efficient, but we don't + * seem to take a significant performance hit. Will be improved + * at some point. Vertex arrays are emitted by nv04_vbo.c + */ + + /* Render target */ + BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2); + OUT_RING(nv04->rt->stride|(nv04->zeta->stride<<16)); + OUT_RELOCl(nv04->rt, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + if (nv04->zeta) { + BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1); + OUT_RELOCl(nv04->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + } + + /* Texture images */ + for (i = 0; i < 1; i++) { + if (!(nv04->fp_samplers & (1 << i))) + continue; + struct nv04_miptree *nv04mt = nv04->tex_miptree[i]; + BEGIN_RING(fahrenheit, NV04_DX5_TEXTURED_TRIANGLE_OFFSET, 2); + OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCd(nv04mt->buffer, (nv04->fragtex.format | nv04->sampler[i]->format), NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); + } +} + diff --git a/src/gallium/drivers/nv04/nv04_surface.c b/src/gallium/drivers/nv04/nv04_surface.c new file mode 100644 index 0000000000..14abf16679 --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_surface.c @@ -0,0 +1,72 @@ + +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "nv04_context.h" +#include "pipe/p_defines.h" +#include "pipe/internal/p_winsys_screen.h" +#include "pipe/p_inlines.h" +#include "util/u_tile.h" + +static void +nv04_surface_copy(struct pipe_context *pipe, boolean do_flip, + struct pipe_surface *dest, unsigned destx, unsigned desty, + struct pipe_surface *src, unsigned srcx, unsigned srcy, + unsigned width, unsigned height) +{ + struct nv04_context *nv04 = nv04_context(pipe); + struct nv04_surface_2d *eng2d = nv04->screen->eng2d; + + if (do_flip) { + desty += height; + while (height--) { + eng2d->copy(eng2d, dest, destx, desty--, src, + srcx, srcy++, width, 1); + } + return; + } + + eng2d->copy(eng2d, dest, destx, desty, src, srcx, srcy, width, height); +} + +static void +nv04_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, + unsigned destx, unsigned desty, unsigned width, + unsigned height, unsigned value) +{ + struct nv04_context *nv04 = nv04_context(pipe); + struct nv04_surface_2d *eng2d = nv04->screen->eng2d; + + eng2d->fill(eng2d, dest, destx, desty, width, height, value); +} + +void +nv04_init_surface_functions(struct nv04_context *nv04) +{ + nv04->pipe.surface_copy = nv04_surface_copy; + nv04->pipe.surface_fill = nv04_surface_fill; +} diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.c b/src/gallium/drivers/nv04/nv04_surface_2d.c new file mode 100644 index 0000000000..230cfd17dd --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_surface_2d.c @@ -0,0 +1,448 @@ +#include "pipe/p_context.h" +#include "pipe/p_format.h" +#include "util/u_memory.h" + +#include "nouveau/nouveau_winsys.h" +#include "nouveau/nouveau_util.h" +#include "nv04_surface_2d.h" + +static INLINE int +nv04_surface_format(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_A8_UNORM: + return NV04_CONTEXT_SURFACES_2D_FORMAT_Y8; + case PIPE_FORMAT_R16_SNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + return NV04_CONTEXT_SURFACES_2D_FORMAT_R5G6B5; + case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_A8R8G8B8_UNORM: + return NV04_CONTEXT_SURFACES_2D_FORMAT_A8R8G8B8; + case PIPE_FORMAT_Z24S8_UNORM: + return NV04_CONTEXT_SURFACES_2D_FORMAT_Y32; + default: + return -1; + } +} + +static INLINE int +nv04_rect_format(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_A8_UNORM: + return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8; + case PIPE_FORMAT_R5G6B5_UNORM: + return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A16R5G6B5; + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + return NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT_A8R8G8B8; + default: + return -1; + } +} + +static INLINE int +nv04_scaled_image_format(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_A1R5G5B5_UNORM: + return NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A1R5G5B5; + case PIPE_FORMAT_A8R8G8B8_UNORM: + return NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_A8R8G8B8; + case PIPE_FORMAT_X8R8G8B8_UNORM: + return NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_X8R8G8B8; + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_R16_SNORM: + return NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_FORMAT_R5G6B5; + default: + return -1; + } +} + +static INLINE unsigned +nv04_swizzle_bits(unsigned x, unsigned y) +{ + unsigned u = (x & 0x001) << 0 | + (x & 0x002) << 1 | + (x & 0x004) << 2 | + (x & 0x008) << 3 | + (x & 0x010) << 4 | + (x & 0x020) << 5 | + (x & 0x040) << 6 | + (x & 0x080) << 7 | + (x & 0x100) << 8 | + (x & 0x200) << 9 | + (x & 0x400) << 10 | + (x & 0x800) << 11; + + unsigned v = (y & 0x001) << 1 | + (y & 0x002) << 2 | + (y & 0x004) << 3 | + (y & 0x008) << 4 | + (y & 0x010) << 5 | + (y & 0x020) << 6 | + (y & 0x040) << 7 | + (y & 0x080) << 8 | + (y & 0x100) << 9 | + (y & 0x200) << 10 | + (y & 0x400) << 11 | + (y & 0x800) << 12; + return v | u; +} + +static int +nv04_surface_copy_swizzle(struct nv04_surface_2d *ctx, + struct pipe_surface *dst, int dx, int dy, + struct pipe_surface *src, int sx, int sy, + int w, int h) +{ + struct nouveau_channel *chan = ctx->nvws->channel; + struct nouveau_grobj *swzsurf = ctx->swzsurf; + struct nouveau_grobj *sifm = ctx->sifm; + struct nouveau_bo *src_bo = ctx->nvws->get_bo(ctx->buf(src)); + struct nouveau_bo *dst_bo = ctx->nvws->get_bo(ctx->buf(dst)); + const unsigned max_w = 1024; + const unsigned max_h = 1024; + const unsigned sub_w = w > max_w ? max_w : w; + const unsigned sub_h = h > max_h ? max_h : h; + unsigned cx; + unsigned cy; + + /* POT or GTFO */ + assert(!(w & (w - 1)) && !(h & (h - 1))); + + BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_DMA_IMAGE, 1); + OUT_RELOCo(chan, dst_bo, + NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + + BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_FORMAT, 1); + OUT_RING (chan, nv04_surface_format(dst->format) | + log2i(w) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_U_SHIFT | + log2i(h) << NV04_SWIZZLED_SURFACE_FORMAT_BASE_SIZE_V_SHIFT); + + BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_DMA_IMAGE, 1); + OUT_RELOCo(chan, src_bo, + NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_SURFACE, 1); + OUT_RING (chan, swzsurf->handle); + + for (cy = 0; cy < h; cy += sub_h) { + for (cx = 0; cx < w; cx += sub_w) { + BEGIN_RING(chan, swzsurf, NV04_SWIZZLED_SURFACE_OFFSET, 1); + OUT_RELOCl(chan, dst_bo, dst->offset + nv04_swizzle_bits(cx, cy) * + dst->block.size, NOUVEAU_BO_GART | + NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + + BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION, 9); + OUT_RING (chan, NV04_SCALED_IMAGE_FROM_MEMORY_COLOR_CONVERSION_TRUNCATE); + OUT_RING (chan, nv04_scaled_image_format(src->format)); + OUT_RING (chan, NV04_SCALED_IMAGE_FROM_MEMORY_OPERATION_SRCCOPY); + OUT_RING (chan, 0); + OUT_RING (chan, sub_h << 16 | sub_w); + OUT_RING (chan, 0); + OUT_RING (chan, sub_h << 16 | sub_w); + OUT_RING (chan, 1 << 20); + OUT_RING (chan, 1 << 20); + + BEGIN_RING(chan, sifm, NV04_SCALED_IMAGE_FROM_MEMORY_SIZE, 4); + OUT_RING (chan, sub_h << 16 | sub_w); + OUT_RING (chan, src->stride | + NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_ORIGIN_CENTER | + NV04_SCALED_IMAGE_FROM_MEMORY_FORMAT_FILTER_POINT_SAMPLE); + OUT_RELOCl(chan, src_bo, src->offset + cy * src->stride + + cx * src->block.size, NOUVEAU_BO_GART | + NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RING (chan, 0); + } + } + + return 0; +} + +static int +nv04_surface_copy_m2mf(struct nv04_surface_2d *ctx, + struct pipe_surface *dst, int dx, int dy, + struct pipe_surface *src, int sx, int sy, int w, int h) +{ + struct nouveau_channel *chan = ctx->nvws->channel; + struct nouveau_grobj *m2mf = ctx->m2mf; + struct nouveau_bo *src_bo = ctx->nvws->get_bo(ctx->buf(src)); + struct nouveau_bo *dst_bo = ctx->nvws->get_bo(ctx->buf(dst)); + unsigned dst_offset, src_offset; + + dst_offset = dst->offset + (dy * dst->stride) + (dx * dst->block.size); + src_offset = src->offset + (sy * src->stride) + (sx * src->block.size); + + WAIT_RING (chan, 3 + ((h / 2047) + 1) * 9); + BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_BUFFER_IN, 2); + OUT_RELOCo(chan, src_bo, + NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCo(chan, dst_bo, + NOUVEAU_BO_GART | NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + + while (h) { + int count = (h > 2047) ? 2047 : h; + + BEGIN_RING(chan, m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_OFFSET_IN, 8); + OUT_RELOCl(chan, src_bo, src_offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCl(chan, dst_bo, dst_offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_WR); + OUT_RING (chan, src->stride); + OUT_RING (chan, dst->stride); + OUT_RING (chan, w * src->block.size); + OUT_RING (chan, count); + OUT_RING (chan, 0x0101); + OUT_RING (chan, 0); + + h -= count; + src_offset += src->stride * count; + dst_offset += dst->stride * count; + } + + return 0; +} + +static int +nv04_surface_copy_blit(struct nv04_surface_2d *ctx, struct pipe_surface *dst, + int dx, int dy, struct pipe_surface *src, int sx, int sy, + int w, int h) +{ + struct nouveau_channel *chan = ctx->nvws->channel; + struct nouveau_grobj *surf2d = ctx->surf2d; + struct nouveau_grobj *blit = ctx->blit; + struct nouveau_bo *src_bo = ctx->nvws->get_bo(ctx->buf(src)); + struct nouveau_bo *dst_bo = ctx->nvws->get_bo(ctx->buf(dst)); + int format; + + format = nv04_surface_format(dst->format); + if (format < 0) + return 1; + + WAIT_RING (chan, 12); + BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2); + OUT_RELOCo(chan, src_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_FORMAT, 4); + OUT_RING (chan, format); + OUT_RING (chan, (dst->stride << 16) | src->stride); + OUT_RELOCl(chan, src_bo, src->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD); + OUT_RELOCl(chan, dst_bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + + BEGIN_RING(chan, blit, 0x0300, 3); + OUT_RING (chan, (sy << 16) | sx); + OUT_RING (chan, (dy << 16) | dx); + OUT_RING (chan, ( h << 16) | w); + + return 0; +} + +static void +nv04_surface_copy(struct nv04_surface_2d *ctx, struct pipe_surface *dst, + int dx, int dy, struct pipe_surface *src, int sx, int sy, + int w, int h) +{ + int src_linear = src->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR; + int dst_linear = dst->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR; + + assert(src->format == dst->format); + + /* Setup transfer to swizzle the texture to vram if needed */ + if (src_linear && !dst_linear && w > 1 && h > 1) { + nv04_surface_copy_swizzle(ctx, dst, dx, dy, src, sx, sy, w, h); + return; + } + + /* NV_CONTEXT_SURFACES_2D has buffer alignment restrictions, fallback + * to NV_MEMORY_TO_MEMORY_FORMAT in this case. + */ + if ((src->offset & 63) || (dst->offset & 63) || + (src->stride & 63) || (dst->stride & 63)) { + nv04_surface_copy_m2mf(ctx, dst, dx, dy, src, sx, sy, w, h); + return; + } + + nv04_surface_copy_blit(ctx, dst, dx, dy, src, sx, sy, w, h); +} + +static void +nv04_surface_fill(struct nv04_surface_2d *ctx, struct pipe_surface *dst, + int dx, int dy, int w, int h, unsigned value) +{ + struct nouveau_channel *chan = ctx->nvws->channel; + struct nouveau_grobj *surf2d = ctx->surf2d; + struct nouveau_grobj *rect = ctx->rect; + struct nouveau_bo *dst_bo = ctx->nvws->get_bo(ctx->buf(dst)); + int cs2d_format, gdirect_format; + + cs2d_format = nv04_surface_format(dst->format); + assert(cs2d_format >= 0); + + gdirect_format = nv04_rect_format(dst->format); + assert(gdirect_format >= 0); + + WAIT_RING (chan, 16); + BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2); + OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCo(chan, dst_bo, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(chan, surf2d, NV04_CONTEXT_SURFACES_2D_FORMAT, 4); + OUT_RING (chan, cs2d_format); + OUT_RING (chan, (dst->stride << 16) | dst->stride); + OUT_RELOCl(chan, dst_bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCl(chan, dst_bo, dst->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + + BEGIN_RING(chan, rect, NV04_GDI_RECTANGLE_TEXT_COLOR_FORMAT, 1); + OUT_RING (chan, gdirect_format); + BEGIN_RING(chan, rect, NV04_GDI_RECTANGLE_TEXT_COLOR1_A, 1); + OUT_RING (chan, value); + BEGIN_RING(chan, rect, + NV04_GDI_RECTANGLE_TEXT_UNCLIPPED_RECTANGLE_POINT(0), 2); + OUT_RING (chan, (dx << 16) | dy); + OUT_RING (chan, ( w << 16) | h); +} + +void +nv04_surface_2d_takedown(struct nv04_surface_2d **pctx) +{ + struct nv04_surface_2d *ctx; + + if (!pctx || !*pctx) + return; + ctx = *pctx; + *pctx = NULL; + + nouveau_notifier_free(&ctx->ntfy); + nouveau_grobj_free(&ctx->m2mf); + nouveau_grobj_free(&ctx->surf2d); + nouveau_grobj_free(&ctx->swzsurf); + nouveau_grobj_free(&ctx->rect); + nouveau_grobj_free(&ctx->blit); + nouveau_grobj_free(&ctx->sifm); + + FREE(ctx); +} + +struct nv04_surface_2d * +nv04_surface_2d_init(struct nouveau_winsys *nvws) +{ + struct nv04_surface_2d *ctx = CALLOC_STRUCT(nv04_surface_2d); + struct nouveau_channel *chan = nvws->channel; + unsigned handle = 0x88000000, class; + int ret; + + if (!ctx) + return NULL; + + ret = nouveau_notifier_alloc(chan, handle++, 1, &ctx->ntfy); + if (ret) { + nv04_surface_2d_takedown(&ctx); + return NULL; + } + + ret = nouveau_grobj_alloc(chan, handle++, 0x0039, &ctx->m2mf); + if (ret) { + nv04_surface_2d_takedown(&ctx); + return NULL; + } + + BEGIN_RING(chan, ctx->m2mf, NV04_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 1); + OUT_RING (chan, ctx->ntfy->handle); + + if (chan->device->chipset < 0x10) + class = NV04_CONTEXT_SURFACES_2D; + else + class = NV10_CONTEXT_SURFACES_2D; + + ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->surf2d); + if (ret) { + nv04_surface_2d_takedown(&ctx); + return NULL; + } + + BEGIN_RING(chan, ctx->surf2d, + NV04_CONTEXT_SURFACES_2D_DMA_IMAGE_SOURCE, 2); + OUT_RING (chan, chan->vram->handle); + OUT_RING (chan, chan->vram->handle); + + if (chan->device->chipset < 0x10) + class = NV04_IMAGE_BLIT; + else + class = NV12_IMAGE_BLIT; + + ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->blit); + if (ret) { + nv04_surface_2d_takedown(&ctx); + return NULL; + } + + BEGIN_RING(chan, ctx->blit, NV04_IMAGE_BLIT_DMA_NOTIFY, 1); + OUT_RING (chan, ctx->ntfy->handle); + BEGIN_RING(chan, ctx->blit, NV04_IMAGE_BLIT_SURFACE, 1); + OUT_RING (chan, ctx->surf2d->handle); + BEGIN_RING(chan, ctx->blit, NV04_IMAGE_BLIT_OPERATION, 1); + OUT_RING (chan, NV04_IMAGE_BLIT_OPERATION_SRCCOPY); + + ret = nouveau_grobj_alloc(chan, handle++, NV04_GDI_RECTANGLE_TEXT, + &ctx->rect); + if (ret) { + nv04_surface_2d_takedown(&ctx); + return NULL; + } + + BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_DMA_NOTIFY, 1); + OUT_RING (chan, ctx->ntfy->handle); + BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_SURFACE, 1); + OUT_RING (chan, ctx->surf2d->handle); + BEGIN_RING(chan, ctx->rect, NV04_GDI_RECTANGLE_TEXT_OPERATION, 1); + OUT_RING (chan, NV04_GDI_RECTANGLE_TEXT_OPERATION_SRCCOPY); + BEGIN_RING(chan, ctx->rect, + NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT, 1); + OUT_RING (chan, NV04_GDI_RECTANGLE_TEXT_MONOCHROME_FORMAT_LE); + + switch (chan->device->chipset & 0xf0) { + case 0x00: + case 0x10: + class = NV04_SWIZZLED_SURFACE; + break; + case 0x20: + class = NV20_SWIZZLED_SURFACE; + break; + case 0x30: + class = NV30_SWIZZLED_SURFACE; + break; + case 0x40: + case 0x60: + class = NV40_SWIZZLED_SURFACE; + break; + default: + /* Famous last words: this really can't happen.. */ + assert(0); + break; + } + + ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->swzsurf); + if (ret) { + nv04_surface_2d_takedown(&ctx); + return NULL; + } + + if (chan->device->chipset < 0x10) { + class = NV04_SCALED_IMAGE_FROM_MEMORY; + } else + if (chan->device->chipset < 0x40) { + class = NV10_SCALED_IMAGE_FROM_MEMORY; + } else { + class = NV40_SCALED_IMAGE_FROM_MEMORY; + } + + ret = nouveau_grobj_alloc(chan, handle++, class, &ctx->sifm); + if (ret) { + nv04_surface_2d_takedown(&ctx); + return NULL; + } + + ctx->nvws = nvws; + ctx->copy = nv04_surface_copy; + ctx->fill = nv04_surface_fill; + return ctx; +} diff --git a/src/gallium/drivers/nv04/nv04_surface_2d.h b/src/gallium/drivers/nv04/nv04_surface_2d.h new file mode 100644 index 0000000000..21b8f86960 --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_surface_2d.h @@ -0,0 +1,29 @@ +#ifndef __NV04_SURFACE_2D_H__ +#define __NV04_SURFACE_2D_H__ + +struct nv04_surface_2d { + struct nouveau_winsys *nvws; + struct nouveau_notifier *ntfy; + struct nouveau_grobj *surf2d; + struct nouveau_grobj *swzsurf; + struct nouveau_grobj *m2mf; + struct nouveau_grobj *rect; + struct nouveau_grobj *blit; + struct nouveau_grobj *sifm; + + struct pipe_buffer *(*buf)(struct pipe_surface *); + + void (*copy)(struct nv04_surface_2d *, struct pipe_surface *dst, + int dx, int dy, struct pipe_surface *src, int sx, int sy, + int w, int h); + void (*fill)(struct nv04_surface_2d *, struct pipe_surface *dst, + int dx, int dy, int w, int h, unsigned value); +}; + +struct nv04_surface_2d * +nv04_surface_2d_init(struct nouveau_winsys *nvws); + +void +nv04_surface_2d_takedown(struct nv04_surface_2d **); + +#endif diff --git a/src/gallium/drivers/nv04/nv04_vbo.c b/src/gallium/drivers/nv04/nv04_vbo.c new file mode 100644 index 0000000000..d21a0e34f7 --- /dev/null +++ b/src/gallium/drivers/nv04/nv04_vbo.c @@ -0,0 +1,78 @@ +#include "draw/draw_context.h" +#include "pipe/p_context.h" +#include "pipe/p_state.h" + +#include "nv04_context.h" +#include "nv04_state.h" + +#include "nouveau/nouveau_channel.h" +#include "nouveau/nouveau_pushbuf.h" + +boolean nv04_draw_elements( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned prim, unsigned start, unsigned count) +{ + struct nv04_context *nv04 = nv04_context( pipe ); + struct draw_context *draw = nv04->draw; + unsigned i; + + nv04_emit_hw_state(nv04); + + /* + * Map vertex buffers + */ + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { + if (nv04->vtxbuf[i].buffer) { + void *buf + = pipe->winsys->buffer_map(pipe->winsys, + nv04->vtxbuf[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_vertex_buffer(draw, i, buf); + } + } + /* Map index buffer, if present */ + if (indexBuffer) { + void *mapped_indexes + = pipe->winsys->buffer_map(pipe->winsys, indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); + } + else { + /* no index/element buffer */ + draw_set_mapped_element_buffer(draw, 0, NULL); + } + + draw_set_mapped_constant_buffer(draw, + nv04->constbuf[PIPE_SHADER_VERTEX], + nv04->constbuf_nr[PIPE_SHADER_VERTEX]); + + /* draw! */ + draw_arrays(nv04->draw, prim, start, count); + + /* + * unmap vertex/index buffers + */ + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { + if (nv04->vtxbuf[i].buffer) { + pipe->winsys->buffer_unmap(pipe->winsys, nv04->vtxbuf[i].buffer); + draw_set_mapped_vertex_buffer(draw, i, NULL); + } + } + if (indexBuffer) { + pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer); + draw_set_mapped_element_buffer(draw, 0, NULL); + } + + return TRUE; +} + +boolean nv04_draw_arrays( struct pipe_context *pipe, + unsigned prim, unsigned start, unsigned count) +{ + printf("coucou in draw arrays\n"); + return nv04_draw_elements(pipe, NULL, 0, prim, start, count); +} + + + diff --git a/src/gallium/drivers/nv10/Makefile b/src/gallium/drivers/nv10/Makefile new file mode 100644 index 0000000000..2b5fbd4f5a --- /dev/null +++ b/src/gallium/drivers/nv10/Makefile @@ -0,0 +1,19 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = nv10 + +C_SOURCES = \ + nv10_clear.c \ + nv10_context.c \ + nv10_fragprog.c \ + nv10_fragtex.c \ + nv10_miptree.c \ + nv10_prim_vbuf.c \ + nv10_screen.c \ + nv10_state.c \ + nv10_state_emit.c \ + nv10_surface.c \ + nv10_vbo.c + +include ../../Makefile.template diff --git a/src/gallium/drivers/nv10/nv10_clear.c b/src/gallium/drivers/nv10/nv10_clear.c new file mode 100644 index 0000000000..be7e09cf4b --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_clear.c @@ -0,0 +1,12 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "nv10_context.h" + +void +nv10_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue) +{ + pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); +} diff --git a/src/gallium/drivers/nv10/nv10_context.c b/src/gallium/drivers/nv10/nv10_context.c new file mode 100644 index 0000000000..ef2c0c5d9f --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_context.c @@ -0,0 +1,296 @@ +#include "draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/internal/p_winsys_screen.h" + +#include "nv10_context.h" +#include "nv10_screen.h" + +static void +nv10_flush(struct pipe_context *pipe, unsigned flags, + struct pipe_fence_handle **fence) +{ + struct nv10_context *nv10 = nv10_context(pipe); + + draw_flush(nv10->draw); + + FIRE_RING(fence); +} + +static void +nv10_destroy(struct pipe_context *pipe) +{ + struct nv10_context *nv10 = nv10_context(pipe); + + if (nv10->draw) + draw_destroy(nv10->draw); + + FREE(nv10); +} + +static void nv10_init_hwctx(struct nv10_context *nv10) +{ + struct nv10_screen *screen = nv10->screen; + struct nouveau_winsys *nvws = screen->nvws; + int i; + float projectionmatrix[16]; + + BEGIN_RING(celsius, NV10TCL_DMA_NOTIFY, 1); + OUT_RING (screen->sync->handle); + BEGIN_RING(celsius, NV10TCL_DMA_IN_MEMORY0, 2); + OUT_RING (nvws->channel->vram->handle); + OUT_RING (nvws->channel->gart->handle); + BEGIN_RING(celsius, NV10TCL_DMA_IN_MEMORY2, 2); + OUT_RING (nvws->channel->vram->handle); + OUT_RING (nvws->channel->vram->handle); + + BEGIN_RING(celsius, NV10TCL_NOP, 1); + OUT_RING (0); + + BEGIN_RING(celsius, NV10TCL_RT_HORIZ, 2); + OUT_RING (0); + OUT_RING (0); + + BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 1); + OUT_RING ((0x7ff<<16)|0x800); + BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_VERT(0), 1); + OUT_RING ((0x7ff<<16)|0x800); + + for (i=1;i<8;i++) { + BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(i), 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_VERT(i), 1); + OUT_RING (0); + } + + BEGIN_RING(celsius, 0x290, 1); + OUT_RING ((0x10<<16)|1); + BEGIN_RING(celsius, 0x3f4, 1); + OUT_RING (0); + + BEGIN_RING(celsius, NV10TCL_NOP, 1); + OUT_RING (0); + + if (nv10->screen->celsius->grclass != NV10TCL) { + /* For nv11, nv17 */ + BEGIN_RING(celsius, 0x120, 3); + OUT_RING (0); + OUT_RING (1); + OUT_RING (2); + + BEGIN_RING(celsius, NV10TCL_NOP, 1); + OUT_RING (0); + } + + BEGIN_RING(celsius, NV10TCL_NOP, 1); + OUT_RING (0); + + /* Set state */ + BEGIN_RING(celsius, NV10TCL_FOG_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_FUNC, 2); + OUT_RING (0x207); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_TX_ENABLE(0), 2); + OUT_RING (0); + OUT_RING (0); + + BEGIN_RING(celsius, NV10TCL_RC_IN_ALPHA(0), 12); + OUT_RING (0x30141010); + OUT_RING (0); + OUT_RING (0x20040000); + OUT_RING (0); + OUT_RING (0); + OUT_RING (0); + OUT_RING (0x00000c00); + OUT_RING (0); + OUT_RING (0x00000c00); + OUT_RING (0x18000000); + OUT_RING (0x300e0300); + OUT_RING (0x0c091c80); + + BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_DITHER_ENABLE, 2); + OUT_RING (1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_VERTEX_WEIGHT_ENABLE, 2); + OUT_RING (0); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_SRC, 4); + OUT_RING (1); + OUT_RING (0); + OUT_RING (0); + OUT_RING (0x8006); + BEGIN_RING(celsius, NV10TCL_STENCIL_MASK, 8); + OUT_RING (0xff); + OUT_RING (0x207); + OUT_RING (0); + OUT_RING (0xff); + OUT_RING (0x1e00); + OUT_RING (0x1e00); + OUT_RING (0x1e00); + OUT_RING (0x1d01); + BEGIN_RING(celsius, NV10TCL_NORMALIZE_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_FOG_ENABLE, 2); + OUT_RING (0); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_LIGHT_MODEL, 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_COLOR_CONTROL, 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_ENABLED_LIGHTS, 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_POLYGON_OFFSET_POINT_ENABLE, 3); + OUT_RING (0); + OUT_RING (0); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_DEPTH_FUNC, 1); + OUT_RING (0x201); + BEGIN_RING(celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_DEPTH_TEST_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_POLYGON_OFFSET_FACTOR, 2); + OUT_RING (0); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_POINT_SIZE, 1); + OUT_RING (8); + BEGIN_RING(celsius, NV10TCL_POINT_PARAMETERS_ENABLE, 2); + OUT_RING (0); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_LINE_WIDTH, 1); + OUT_RING (8); + BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_POLYGON_MODE_FRONT, 2); + OUT_RING (0x1b02); + OUT_RING (0x1b02); + BEGIN_RING(celsius, NV10TCL_CULL_FACE, 2); + OUT_RING (0x405); + OUT_RING (0x901); + BEGIN_RING(celsius, NV10TCL_POLYGON_SMOOTH_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_CULL_FACE_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_TX_GEN_S(0), 8); + for (i=0;i<8;i++) { + OUT_RING (0); + } + BEGIN_RING(celsius, NV10TCL_FOG_EQUATION_CONSTANT, 3); + OUT_RING (0x3fc00000); /* -1.50 */ + OUT_RING (0xbdb8aa0a); /* -0.09 */ + OUT_RING (0); /* 0.00 */ + + BEGIN_RING(celsius, NV10TCL_NOP, 1); + OUT_RING (0); + + BEGIN_RING(celsius, NV10TCL_FOG_MODE, 2); + OUT_RING (0x802); + OUT_RING (2); + /* for some reason VIEW_MATRIX_ENABLE need to be 6 instead of 4 when + * using texturing, except when using the texture matrix + */ + BEGIN_RING(celsius, NV10TCL_VIEW_MATRIX_ENABLE, 1); + OUT_RING (6); + BEGIN_RING(celsius, NV10TCL_COLOR_MASK, 1); + OUT_RING (0x01010101); + + /* Set vertex component */ + BEGIN_RING(celsius, NV10TCL_VERTEX_COL_4F_R, 4); + OUT_RINGf (1.0); + OUT_RINGf (1.0); + OUT_RINGf (1.0); + OUT_RINGf (1.0); + BEGIN_RING(celsius, NV10TCL_VERTEX_COL2_3F_R, 3); + OUT_RING (0); + OUT_RING (0); + OUT_RING (0); + BEGIN_RING(celsius, NV10TCL_VERTEX_NOR_3F_X, 3); + OUT_RING (0); + OUT_RING (0); + OUT_RINGf (1.0); + BEGIN_RING(celsius, NV10TCL_VERTEX_TX0_4F_S, 4); + OUT_RINGf (0.0); + OUT_RINGf (0.0); + OUT_RINGf (0.0); + OUT_RINGf (1.0); + BEGIN_RING(celsius, NV10TCL_VERTEX_TX1_4F_S, 4); + OUT_RINGf (0.0); + OUT_RINGf (0.0); + OUT_RINGf (0.0); + OUT_RINGf (1.0); + BEGIN_RING(celsius, NV10TCL_VERTEX_FOG_1F, 1); + OUT_RINGf (0.0); + BEGIN_RING(celsius, NV10TCL_EDGEFLAG_ENABLE, 1); + OUT_RING (1); + + memset(projectionmatrix, 0, sizeof(projectionmatrix)); + BEGIN_RING(celsius, NV10TCL_PROJECTION_MATRIX(0), 16); + projectionmatrix[0*4+0] = 1.0; + projectionmatrix[1*4+1] = 1.0; + projectionmatrix[2*4+2] = 1.0; + projectionmatrix[3*4+3] = 1.0; + for (i=0;i<16;i++) { + OUT_RINGf (projectionmatrix[i]); + } + + BEGIN_RING(celsius, NV10TCL_DEPTH_RANGE_NEAR, 2); + OUT_RING (0.0); + OUT_RINGf (16777216.0); + + BEGIN_RING(celsius, NV10TCL_VIEWPORT_SCALE_X, 4); + OUT_RINGf (-2048.0); + OUT_RINGf (-2048.0); + OUT_RINGf (16777215.0 * 0.5); + OUT_RING (0); + + FIRE_RING (NULL); +} + +static void +nv10_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) +{ +} + +struct pipe_context * +nv10_create(struct pipe_screen *pscreen, unsigned pctx_id) +{ + struct nv10_screen *screen = nv10_screen(pscreen); + struct pipe_winsys *ws = pscreen->winsys; + struct nv10_context *nv10; + struct nouveau_winsys *nvws = screen->nvws; + + nv10 = CALLOC(1, sizeof(struct nv10_context)); + if (!nv10) + return NULL; + nv10->screen = screen; + nv10->pctx_id = pctx_id; + + nv10->nvws = nvws; + + nv10->pipe.winsys = ws; + nv10->pipe.screen = pscreen; + nv10->pipe.destroy = nv10_destroy; + nv10->pipe.set_edgeflags = nv10_set_edgeflags; + nv10->pipe.draw_arrays = nv10_draw_arrays; + nv10->pipe.draw_elements = nv10_draw_elements; + nv10->pipe.clear = nv10_clear; + nv10->pipe.flush = nv10_flush; + + nv10_init_surface_functions(nv10); + nv10_init_state_functions(nv10); + + nv10->draw = draw_create(); + assert(nv10->draw); + draw_set_rasterize_stage(nv10->draw, nv10_draw_vbuf_stage(nv10)); + + nv10_init_hwctx(nv10); + + return &nv10->pipe; +} + diff --git a/src/gallium/drivers/nv10/nv10_context.h b/src/gallium/drivers/nv10/nv10_context.h new file mode 100644 index 0000000000..f3b56de25a --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_context.h @@ -0,0 +1,153 @@ +#ifndef __NV10_CONTEXT_H__ +#define __NV10_CONTEXT_H__ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_compiler.h" + +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "draw/draw_vertex.h" + +#include "nouveau/nouveau_winsys.h" +#include "nouveau/nouveau_gldefs.h" + +#define NOUVEAU_PUSH_CONTEXT(ctx) \ + struct nv10_screen *ctx = nv10->screen +#include "nouveau/nouveau_push.h" + +#include "nv10_state.h" + +#define NOUVEAU_ERR(fmt, args...) \ + fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); +#define NOUVEAU_MSG(fmt, args...) \ + fprintf(stderr, "nouveau: "fmt, ##args); + +#define NV10_NEW_VERTPROG (1 << 0) +#define NV10_NEW_FRAGPROG (1 << 1) +#define NV10_NEW_VTXARRAYS (1 << 2) +#define NV10_NEW_BLEND (1 << 3) +#define NV10_NEW_BLENDCOL (1 << 4) +#define NV10_NEW_RAST (1 << 5) +#define NV10_NEW_DSA (1 << 6) +#define NV10_NEW_VIEWPORT (1 << 7) +#define NV10_NEW_SCISSOR (1 << 8) +#define NV10_NEW_FRAMEBUFFER (1 << 9) + +#include "nv10_screen.h" + +struct nv10_context { + struct pipe_context pipe; + + struct nouveau_winsys *nvws; + struct nv10_screen *screen; + unsigned pctx_id; + + struct draw_context *draw; + + uint32_t dirty; + + struct nv10_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; + struct nv10_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; + unsigned dirty_samplers; + unsigned fp_samplers; + unsigned vp_samplers; + + uint32_t rt_enable; + struct pipe_buffer *rt[4]; + struct pipe_buffer *zeta; + uint32_t lma_offset; + + struct nv10_blend_state *blend; + struct pipe_blend_color *blend_color; + struct nv10_rasterizer_state *rast; + struct nv10_depth_stencil_alpha_state *dsa; + struct pipe_viewport_state *viewport; + struct pipe_scissor_state *scissor; + struct pipe_framebuffer_state *framebuffer; + + //struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; + float *constbuf[PIPE_SHADER_TYPES][32][4]; + unsigned constbuf_nr[PIPE_SHADER_TYPES]; + + struct vertex_info vertex_info; + + struct { + struct pipe_buffer *buffer; + uint32_t format; + } tex[2]; + + unsigned vb_enable; + struct { + struct pipe_buffer *buffer; + unsigned delta; + } vb[16]; + +/* struct { + + struct nouveau_resource *exec_heap; + struct nouveau_resource *data_heap; + + struct nv10_vertex_program *active; + + struct nv10_vertex_program *current; + } vertprog; +*/ + struct { + struct nv10_fragment_program *active; + + struct nv10_fragment_program *current; + struct pipe_buffer *constant_buf; + } fragprog; + + struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; + struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS]; +}; + +static INLINE struct nv10_context * +nv10_context(struct pipe_context *pipe) +{ + return (struct nv10_context *)pipe; +} + +extern void nv10_init_state_functions(struct nv10_context *nv10); +extern void nv10_init_surface_functions(struct nv10_context *nv10); + +extern void nv10_screen_init_miptree_functions(struct pipe_screen *pscreen); + +/* nv10_clear.c */ +extern void nv10_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue); + +/* nv10_draw.c */ +extern struct draw_stage *nv10_draw_render_stage(struct nv10_context *nv10); + +/* nv10_fragprog.c */ +extern void nv10_fragprog_bind(struct nv10_context *, + struct nv10_fragment_program *); +extern void nv10_fragprog_destroy(struct nv10_context *, + struct nv10_fragment_program *); + +/* nv10_fragtex.c */ +extern void nv10_fragtex_bind(struct nv10_context *); + +/* nv10_prim_vbuf.c */ +struct draw_stage *nv10_draw_vbuf_stage( struct nv10_context *nv10 ); +extern void nv10_vtxbuf_bind(struct nv10_context* nv10); + +/* nv10_state.c and friends */ +extern void nv10_emit_hw_state(struct nv10_context *nv10); +extern void nv10_state_tex_update(struct nv10_context *nv10); + +/* nv10_vbo.c */ +extern boolean nv10_draw_arrays(struct pipe_context *, unsigned mode, + unsigned start, unsigned count); +extern boolean nv10_draw_elements( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned prim, unsigned start, unsigned count); + + +#endif diff --git a/src/gallium/drivers/nv10/nv10_fragprog.c b/src/gallium/drivers/nv10/nv10_fragprog.c new file mode 100644 index 0000000000..698db5a16a --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_fragprog.c @@ -0,0 +1,21 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" + +#include "nv10_context.h" + +void +nv10_fragprog_bind(struct nv10_context *nv10, struct nv10_fragment_program *fp) +{ +} + +void +nv10_fragprog_destroy(struct nv10_context *nv10, + struct nv10_fragment_program *fp) +{ +} + diff --git a/src/gallium/drivers/nv10/nv10_fragtex.c b/src/gallium/drivers/nv10/nv10_fragtex.c new file mode 100644 index 0000000000..27f2f87584 --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_fragtex.c @@ -0,0 +1,124 @@ +#include "nv10_context.h" +#include "nouveau/nouveau_util.h" + +#define _(m,tf) \ +{ \ + TRUE, \ + PIPE_FORMAT_##m, \ + NV10TCL_TX_FORMAT_FORMAT_##tf, \ +} + +struct nv10_texture_format { + boolean defined; + uint pipe; + int format; +}; + +static struct nv10_texture_format +nv10_texture_formats[] = { + _(A8R8G8B8_UNORM, A8R8G8B8), + _(A1R5G5B5_UNORM, A1R5G5B5), + _(A4R4G4B4_UNORM, A4R4G4B4), + _(L8_UNORM , L8 ), + _(A8_UNORM , A8 ), + _(A8L8_UNORM , A8L8 ), +// _(RGB_DXT1 , DXT1, ), +// _(RGBA_DXT1 , DXT1, ), +// _(RGBA_DXT3 , DXT3, ), +// _(RGBA_DXT5 , DXT5, ), + {}, +}; + +static struct nv10_texture_format * +nv10_fragtex_format(uint pipe_format) +{ + struct nv10_texture_format *tf = nv10_texture_formats; + + while (tf->defined) { + if (tf->pipe == pipe_format) + return tf; + tf++; + } + + return NULL; +} + + +static void +nv10_fragtex_build(struct nv10_context *nv10, int unit) +{ +#if 0 + struct nv10_sampler_state *ps = nv10->tex_sampler[unit]; + struct nv10_miptree *nv10mt = nv10->tex_miptree[unit]; + struct pipe_texture *pt = &nv10mt->base; + struct nv10_texture_format *tf; + uint32_t txf, txs, txp; + + tf = nv10_fragtex_format(pt->format); + if (!tf || !tf->defined) { + NOUVEAU_ERR("Unsupported texture format: 0x%x\n", pt->format); + return; + } + + txf = tf->format << 8; + txf |= (pt->last_level + 1) << 16; + txf |= log2i(pt->width[0]) << 20; + txf |= log2i(pt->height[0]) << 24; + txf |= log2i(pt->depth[0]) << 28; + txf |= 8; + + switch (pt->target) { + case PIPE_TEXTURE_CUBE: + txf |= NV10TCL_TX_FORMAT_CUBE_MAP; + /* fall-through */ + case PIPE_TEXTURE_2D: + txf |= (2<<4); + break; + case PIPE_TEXTURE_1D: + txf |= (1<<4); + break; + default: + NOUVEAU_ERR("Unknown target %d\n", pt->target); + return; + } + + BEGIN_RING(celsius, NV10TCL_TX_OFFSET(unit), 8); + OUT_RELOCl(nv10mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCd(nv10mt->buffer,txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); + OUT_RING (ps->wrap); + OUT_RING (0x40000000); /* enable */ + OUT_RING (txs); + OUT_RING (ps->filt | 0x2000 /* magic */); + OUT_RING ((pt->width[0] << 16) | pt->height[0]); + OUT_RING (ps->bcol); +#endif +} + +void +nv10_fragtex_bind(struct nv10_context *nv10) +{ +#if 0 + struct nv10_fragment_program *fp = nv10->fragprog.active; + unsigned samplers, unit; + + samplers = nv10->fp_samplers & ~fp->samplers; + while (samplers) { + unit = ffs(samplers) - 1; + samplers &= ~(1 << unit); + + BEGIN_RING(celsius, NV10TCL_TX_ENABLE(unit), 1); + OUT_RING (0); + } + + samplers = nv10->dirty_samplers & fp->samplers; + while (samplers) { + unit = ffs(samplers) - 1; + samplers &= ~(1 << unit); + + nv10_fragtex_build(nv10, unit); + } + + nv10->fp_samplers = fp->samplers; +#endif +} + diff --git a/src/gallium/drivers/nv10/nv10_miptree.c b/src/gallium/drivers/nv10/nv10_miptree.c new file mode 100644 index 0000000000..9616135461 --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_miptree.c @@ -0,0 +1,174 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "nv10_context.h" +#include "nv10_screen.h" + +static void +nv10_miptree_layout(struct nv10_miptree *nv10mt) +{ + struct pipe_texture *pt = &nv10mt->base; + boolean swizzled = FALSE; + uint width = pt->width[0], height = pt->height[0]; + uint offset = 0; + int nr_faces, l, f; + + if (pt->target == PIPE_TEXTURE_CUBE) { + nr_faces = 6; + } else { + nr_faces = 1; + } + + for (l = 0; l <= pt->last_level; l++) { + pt->width[l] = width; + pt->height[l] = height; + pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); + pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); + + if (swizzled) + nv10mt->level[l].pitch = pt->nblocksx[l] * pt->block.size; + else + nv10mt->level[l].pitch = pt->nblocksx[0] * pt->block.size; + nv10mt->level[l].pitch = (nv10mt->level[l].pitch + 63) & ~63; + + nv10mt->level[l].image_offset = + CALLOC(nr_faces, sizeof(unsigned)); + + width = MAX2(1, width >> 1); + height = MAX2(1, height >> 1); + + } + + for (f = 0; f < nr_faces; f++) { + for (l = 0; l <= pt->last_level; l++) { + nv10mt->level[l].image_offset[f] = offset; + offset += nv10mt->level[l].pitch * pt->height[l]; + } + } + + nv10mt->total_size = offset; +} + +static struct pipe_texture * +nv10_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, + const unsigned *stride, struct pipe_buffer *pb) +{ + struct nv10_miptree *mt; + + /* Only supports 2D, non-mipmapped textures for the moment */ + if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || + pt->depth[0] != 1) + return NULL; + + mt = CALLOC_STRUCT(nv10_miptree); + if (!mt) + return NULL; + + mt->base = *pt; + mt->base.refcount = 1; + mt->base.screen = pscreen; + mt->level[0].pitch = stride[0]; + mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); + + pipe_buffer_reference(pscreen, &mt->buffer, pb); + return &mt->base; +} + +static struct pipe_texture * +nv10_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt) +{ + struct pipe_winsys *ws = screen->winsys; + struct nv10_miptree *mt; + + mt = MALLOC(sizeof(struct nv10_miptree)); + if (!mt) + return NULL; + mt->base = *pt; + mt->base.refcount = 1; + mt->base.screen = screen; + + nv10_miptree_layout(mt); + + mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL, + mt->total_size); + if (!mt->buffer) { + FREE(mt); + return NULL; + } + + return &mt->base; +} + +static void +nv10_miptree_release(struct pipe_screen *screen, struct pipe_texture **pt) +{ + struct pipe_texture *mt = *pt; + + *pt = NULL; + if (--mt->refcount <= 0) { + struct nv10_miptree *nv10mt = (struct nv10_miptree *)mt; + int l; + + pipe_buffer_reference(screen, &nv10mt->buffer, NULL); + for (l = 0; l <= mt->last_level; l++) { + if (nv10mt->level[l].image_offset) + FREE(nv10mt->level[l].image_offset); + } + FREE(nv10mt); + } +} + +static void +nv10_miptree_update(struct pipe_context *pipe, struct pipe_texture *mt, + uint face, uint levels) +{ +} + + +static struct pipe_surface * +nv10_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice, + unsigned flags) +{ + struct pipe_winsys *ws = screen->winsys; + struct nv10_miptree *nv10mt = (struct nv10_miptree *)pt; + struct pipe_surface *ps; + + ps = CALLOC_STRUCT(pipe_surface); + if (!ps) + return NULL; + pipe_texture_reference(&ps->texture, pt); + ps->format = pt->format; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->block = pt->block; + ps->nblocksx = pt->nblocksx[level]; + ps->nblocksy = pt->nblocksy[level]; + ps->stride = nv10mt->level[level].pitch; + ps->refcount = 1; + + if (pt->target == PIPE_TEXTURE_CUBE) { + ps->offset = nv10mt->level[level].image_offset[face]; + } else { + ps->offset = nv10mt->level[level].image_offset[0]; + } + + return ps; +} + +static void +nv10_miptree_surface_release(struct pipe_screen *screen, + struct pipe_surface **surface) +{ +} + +void nv10_screen_init_miptree_functions(struct pipe_screen *pscreen) +{ + pscreen->texture_create = nv10_miptree_create; + pscreen->texture_blanket = nv10_miptree_blanket; + pscreen->texture_release = nv10_miptree_release; + pscreen->get_tex_surface = nv10_miptree_surface_get; + pscreen->tex_surface_release = nv10_miptree_surface_release; +} + diff --git a/src/gallium/drivers/nv10/nv10_prim_vbuf.c b/src/gallium/drivers/nv10/nv10_prim_vbuf.c new file mode 100644 index 0000000000..491a881806 --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_prim_vbuf.c @@ -0,0 +1,265 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Build post-transformation, post-clipping vertex buffers and element + * lists by hooking into the end of the primitive pipeline and + * manipulating the vertex_id field in the vertex headers. + * + * XXX: work in progress + * + * \author José Fonseca <jrfonseca@tungstengraphics.com> + * \author Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "util/u_debug.h" +#include "pipe/p_inlines.h" +#include "pipe/internal/p_winsys_screen.h" + +#include "nv10_context.h" +#include "nv10_state.h" + +#include "draw/draw_vbuf.h" + +/** + * Primitive renderer for nv10. + */ +struct nv10_vbuf_render { + struct vbuf_render base; + + struct nv10_context *nv10; + + /** Vertex buffer */ + struct pipe_buffer* buffer; + + /** Vertex size in bytes */ + unsigned vertex_size; + + /** Hardware primitive */ + unsigned hwprim; +}; + + +void nv10_vtxbuf_bind( struct nv10_context* nv10 ) +{ + int i; + for(i = 0; i < 8; i++) { + BEGIN_RING(celsius, NV10TCL_VERTEX_ARRAY_ATTRIB_OFFSET(i), 1); + OUT_RING(0/*nv10->vtxbuf*/); + BEGIN_RING(celsius, NV10TCL_VERTEX_ARRAY_ATTRIB_FORMAT(i) ,1); + OUT_RING(0/*XXX*/); + } +} + +/** + * Basically a cast wrapper. + */ +static INLINE struct nv10_vbuf_render * +nv10_vbuf_render( struct vbuf_render *render ) +{ + assert(render); + return (struct nv10_vbuf_render *)render; +} + + +static const struct vertex_info * +nv10_vbuf_render_get_vertex_info( struct vbuf_render *render ) +{ + struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render); + struct nv10_context *nv10 = nv10_render->nv10; + + nv10_emit_hw_state(nv10); + + return &nv10->vertex_info; +} + +static boolean +nv10_vbuf_render_allocate_vertices( struct vbuf_render *render, + ushort vertex_size, + ushort nr_vertices ) +{ + struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render); + struct nv10_context *nv10 = nv10_render->nv10; + struct pipe_winsys *winsys = nv10->pipe.winsys; + size_t size = (size_t)vertex_size * (size_t)nr_vertices; + + assert(!nv10_render->buffer); + nv10_render->buffer = winsys->buffer_create(winsys, 64, PIPE_BUFFER_USAGE_VERTEX, size); + + nv10->dirty |= NV10_NEW_VTXARRAYS; + + if (nv10_render->buffer) + return FALSE; + return TRUE; +} + +static void * +nv10_vbuf_render_map_vertices( struct vbuf_render *render ) +{ + struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render); + struct nv10_context *nv10 = nv10_render->nv10; + struct pipe_winsys *winsys = nv10->pipe.winsys; + + return winsys->buffer_map(winsys, + nv10_render->buffer, + PIPE_BUFFER_USAGE_CPU_WRITE); +} + +static void +nv10_vbuf_render_unmap_vertices( struct vbuf_render *render, + ushort min_index, + ushort max_index ) +{ + struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render); + struct nv10_context *nv10 = nv10_render->nv10; + struct pipe_winsys *winsys = nv10->pipe.winsys; + + assert(!nv10_render->buffer); + winsys->buffer_unmap(winsys, nv10_render->buffer); +} + +static boolean +nv10_vbuf_render_set_primitive( struct vbuf_render *render, + unsigned prim ) +{ + struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render); + unsigned hwp = nvgl_primitive(prim); + if (hwp == 0) + return FALSE; + + nv10_render->hwprim = hwp; + return TRUE; +} + + +static void +nv10_vbuf_render_draw( struct vbuf_render *render, + const ushort *indices, + uint nr_indices) +{ + struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render); + struct nv10_context *nv10 = nv10_render->nv10; + int push, i; + + nv10_emit_hw_state(nv10); + + BEGIN_RING(celsius, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1); + OUT_RELOCl(nv10_render->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + + BEGIN_RING(celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); + OUT_RING(nv10_render->hwprim); + + if (nr_indices & 1) { + BEGIN_RING(celsius, NV10TCL_VB_ELEMENT_U32, 1); + OUT_RING (indices[0]); + indices++; nr_indices--; + } + + while (nr_indices) { + // XXX too big/small ? check the size + push = MIN2(nr_indices, 1200 * 2); + + BEGIN_RING_NI(celsius, NV10TCL_VB_ELEMENT_U16, push >> 1); + for (i = 0; i < push; i+=2) + OUT_RING((indices[i+1] << 16) | indices[i]); + + nr_indices -= push; + indices += push; + } + + BEGIN_RING(celsius, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); + OUT_RING (0); +} + + +static void +nv10_vbuf_render_release_vertices( struct vbuf_render *render ) +{ + struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render); + struct nv10_context *nv10 = nv10_render->nv10; + struct pipe_screen *pscreen = &nv10->screen->pipe; + + assert(nv10_render->buffer); + pipe_buffer_reference(pscreen, &nv10_render->buffer, NULL); +} + + +static void +nv10_vbuf_render_destroy( struct vbuf_render *render ) +{ + struct nv10_vbuf_render *nv10_render = nv10_vbuf_render(render); + FREE(nv10_render); +} + + +/** + * Create a new primitive render. + */ +static struct vbuf_render * +nv10_vbuf_render_create( struct nv10_context *nv10 ) +{ + struct nv10_vbuf_render *nv10_render = CALLOC_STRUCT(nv10_vbuf_render); + + nv10_render->nv10 = nv10; + + nv10_render->base.max_vertex_buffer_bytes = 16*1024; + nv10_render->base.max_indices = 1024; + nv10_render->base.get_vertex_info = nv10_vbuf_render_get_vertex_info; + nv10_render->base.allocate_vertices = nv10_vbuf_render_allocate_vertices; + nv10_render->base.map_vertices = nv10_vbuf_render_map_vertices; + nv10_render->base.unmap_vertices = nv10_vbuf_render_unmap_vertices; + nv10_render->base.set_primitive = nv10_vbuf_render_set_primitive; + nv10_render->base.draw = nv10_vbuf_render_draw; + nv10_render->base.release_vertices = nv10_vbuf_render_release_vertices; + nv10_render->base.destroy = nv10_vbuf_render_destroy; + + return &nv10_render->base; +} + + +/** + * Create a new primitive vbuf/render stage. + */ +struct draw_stage *nv10_draw_vbuf_stage( struct nv10_context *nv10 ) +{ + struct vbuf_render *render; + struct draw_stage *stage; + + render = nv10_vbuf_render_create(nv10); + if(!render) + return NULL; + + stage = draw_vbuf_stage( nv10->draw, render ); + if(!stage) { + render->destroy(render); + return NULL; + } + + return stage; +} diff --git a/src/gallium/drivers/nv10/nv10_screen.c b/src/gallium/drivers/nv10/nv10_screen.c new file mode 100644 index 0000000000..f417b06c94 --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_screen.c @@ -0,0 +1,226 @@ +#include "pipe/p_screen.h" +#include "util/u_simple_screen.h" + +#include "nv10_context.h" +#include "nv10_screen.h" + +static const char * +nv10_screen_get_name(struct pipe_screen *screen) +{ + struct nv10_screen *nv10screen = nv10_screen(screen); + struct nouveau_device *dev = nv10screen->nvws->channel->device; + static char buffer[128]; + + snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset); + return buffer; +} + +static const char * +nv10_screen_get_vendor(struct pipe_screen *screen) +{ + return "nouveau"; +} + +static int +nv10_screen_get_param(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 2; + case PIPE_CAP_NPOT_TEXTURES: + return 0; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 0; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 1; + case PIPE_CAP_POINT_SPRITE: + return 0; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + case PIPE_CAP_OCCLUSION_QUERY: + return 0; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 0; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 12; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 0; + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 12; + case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: + return 0; + case NOUVEAU_CAP_HW_VTXBUF: + case NOUVEAU_CAP_HW_IDXBUF: + return 0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0; + } +} + +static float +nv10_screen_get_paramf(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 10.0; + case PIPE_CAP_MAX_POINT_WIDTH: + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 64.0; + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 2.0; + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 4.0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0.0; + } +} + +static boolean +nv10_screen_is_format_supported(struct pipe_screen *screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, unsigned geom_flags) +{ + if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z16_UNORM: + return TRUE; + default: + break; + } + } else { + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_A1R5G5B5_UNORM: + case PIPE_FORMAT_A4R4G4B4_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_I8_UNORM: + return TRUE; + default: + break; + } + } + + return FALSE; +} + +static void * +nv10_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, + unsigned flags ) +{ + struct pipe_winsys *ws = screen->winsys; + void *map; + struct nv10_miptree *nv10mt = (struct nv10_miptree *)surface->texture; + + map = ws->buffer_map(ws, nv10mt->buffer, flags); + if (!map) + return NULL; + + return map + surface->offset; +} + +static void +nv10_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) +{ + struct pipe_winsys *ws = screen->winsys; + struct nv10_miptree *nv10mt = (struct nv10_miptree *)surface->texture; + + ws->buffer_unmap(ws, nv10mt->buffer); +} + +static void +nv10_screen_destroy(struct pipe_screen *pscreen) +{ + struct nv10_screen *screen = nv10_screen(pscreen); + struct nouveau_winsys *nvws = screen->nvws; + + nvws->notifier_free(&screen->sync); + nvws->grobj_free(&screen->celsius); + + FREE(pscreen); +} + +static struct pipe_buffer * +nv10_surface_buffer(struct pipe_surface *surf) +{ + struct nv10_miptree *mt = (struct nv10_miptree *)surf->texture; + + return mt->buffer; +} + +struct pipe_screen * +nv10_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) +{ + struct nv10_screen *screen = CALLOC_STRUCT(nv10_screen); + unsigned celsius_class; + unsigned chipset = nvws->channel->device->chipset; + int ret; + + if (!screen) + return NULL; + screen->nvws = nvws; + + /* 2D engine setup */ + screen->eng2d = nv04_surface_2d_init(nvws); + screen->eng2d->buf = nv10_surface_buffer; + + /* 3D object */ + if (chipset>=0x20) + celsius_class=NV11TCL; + else if (chipset>=0x17) + celsius_class=NV17TCL; + else if (chipset>=0x11) + celsius_class=NV11TCL; + else + celsius_class=NV10TCL; + + if (!celsius_class) { + NOUVEAU_ERR("Unknown nv1x chipset: nv%02x\n", chipset); + return NULL; + } + + ret = nvws->grobj_alloc(nvws, celsius_class, &screen->celsius); + if (ret) { + NOUVEAU_ERR("Error creating 3D object: %d\n", ret); + return FALSE; + } + + /* Notifier for sync purposes */ + ret = nvws->notifier_alloc(nvws, 1, &screen->sync); + if (ret) { + NOUVEAU_ERR("Error creating notifier object: %d\n", ret); + nv10_screen_destroy(&screen->pipe); + return NULL; + } + + screen->pipe.winsys = ws; + screen->pipe.destroy = nv10_screen_destroy; + + screen->pipe.get_name = nv10_screen_get_name; + screen->pipe.get_vendor = nv10_screen_get_vendor; + screen->pipe.get_param = nv10_screen_get_param; + screen->pipe.get_paramf = nv10_screen_get_paramf; + + screen->pipe.is_format_supported = nv10_screen_is_format_supported; + + screen->pipe.surface_map = nv10_surface_map; + screen->pipe.surface_unmap = nv10_surface_unmap; + + nv10_screen_init_miptree_functions(&screen->pipe); + u_simple_screen_init(&screen->pipe); + + return &screen->pipe; +} + diff --git a/src/gallium/drivers/nv10/nv10_screen.h b/src/gallium/drivers/nv10/nv10_screen.h new file mode 100644 index 0000000000..60102a369a --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_screen.h @@ -0,0 +1,24 @@ +#ifndef __NV10_SCREEN_H__ +#define __NV10_SCREEN_H__ + +#include "pipe/p_screen.h" +#include "nv04/nv04_surface_2d.h" + +struct nv10_screen { + struct pipe_screen pipe; + + struct nouveau_winsys *nvws; + + /* HW graphics objects */ + struct nv04_surface_2d *eng2d; + struct nouveau_grobj *celsius; + struct nouveau_notifier *sync; +}; + +static INLINE struct nv10_screen * +nv10_screen(struct pipe_screen *screen) +{ + return (struct nv10_screen *)screen; +} + +#endif diff --git a/src/gallium/drivers/nv10/nv10_state.c b/src/gallium/drivers/nv10/nv10_state.c new file mode 100644 index 0000000000..119af66dfd --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_state.c @@ -0,0 +1,589 @@ +#include "draw/draw_context.h" +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" + +#include "tgsi/tgsi_parse.h" + +#include "nv10_context.h" +#include "nv10_state.h" + +static void * +nv10_blend_state_create(struct pipe_context *pipe, + const struct pipe_blend_state *cso) +{ + struct nv10_blend_state *cb; + + cb = MALLOC(sizeof(struct nv10_blend_state)); + + cb->b_enable = cso->blend_enable ? 1 : 0; + cb->b_srcfunc = ((nvgl_blend_func(cso->alpha_src_factor)<<16) | + (nvgl_blend_func(cso->rgb_src_factor))); + cb->b_dstfunc = ((nvgl_blend_func(cso->alpha_dst_factor)<<16) | + (nvgl_blend_func(cso->rgb_dst_factor))); + + cb->c_mask = (((cso->colormask & PIPE_MASK_A) ? (0x01<<24) : 0) | + ((cso->colormask & PIPE_MASK_R) ? (0x01<<16) : 0) | + ((cso->colormask & PIPE_MASK_G) ? (0x01<< 8) : 0) | + ((cso->colormask & PIPE_MASK_B) ? (0x01<< 0) : 0)); + + cb->d_enable = cso->dither ? 1 : 0; + + return (void *)cb; +} + +static void +nv10_blend_state_bind(struct pipe_context *pipe, void *blend) +{ + struct nv10_context *nv10 = nv10_context(pipe); + + nv10->blend = (struct nv10_blend_state*)blend; + + nv10->dirty |= NV10_NEW_BLEND; +} + +static void +nv10_blend_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + + +static INLINE unsigned +wrap_mode(unsigned wrap) { + unsigned ret; + + switch (wrap) { + case PIPE_TEX_WRAP_REPEAT: + ret = NV10TCL_TX_FORMAT_WRAP_S_REPEAT; + break; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + ret = NV10TCL_TX_FORMAT_WRAP_S_MIRRORED_REPEAT; + break; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + ret = NV10TCL_TX_FORMAT_WRAP_S_CLAMP_TO_EDGE; + break; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + ret = NV10TCL_TX_FORMAT_WRAP_S_CLAMP_TO_BORDER; + break; + case PIPE_TEX_WRAP_CLAMP: + ret = NV10TCL_TX_FORMAT_WRAP_S_CLAMP; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + case PIPE_TEX_WRAP_MIRROR_CLAMP: + default: + NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); + ret = NV10TCL_TX_FORMAT_WRAP_S_REPEAT; + break; + } + + return ret >> NV10TCL_TX_FORMAT_WRAP_S_SHIFT; +} + +static void * +nv10_sampler_state_create(struct pipe_context *pipe, + const struct pipe_sampler_state *cso) +{ + struct nv10_sampler_state *ps; + uint32_t filter = 0; + + ps = MALLOC(sizeof(struct nv10_sampler_state)); + + ps->wrap = ((wrap_mode(cso->wrap_s) << NV10TCL_TX_FORMAT_WRAP_S_SHIFT) | + (wrap_mode(cso->wrap_t) << NV10TCL_TX_FORMAT_WRAP_T_SHIFT)); + + ps->en = 0; + if (cso->max_anisotropy > 1.0) { + /* no idea, binary driver sets it, works without it.. meh.. */ + ps->wrap |= (1 << 5); + +/* if (cso->max_anisotropy >= 16.0) { + ps->en |= NV10TCL_TX_ENABLE_ANISO_16X; + } else + if (cso->max_anisotropy >= 12.0) { + ps->en |= NV10TCL_TX_ENABLE_ANISO_12X; + } else + if (cso->max_anisotropy >= 10.0) { + ps->en |= NV10TCL_TX_ENABLE_ANISO_10X; + } else + if (cso->max_anisotropy >= 8.0) { + ps->en |= NV10TCL_TX_ENABLE_ANISO_8X; + } else + if (cso->max_anisotropy >= 6.0) { + ps->en |= NV10TCL_TX_ENABLE_ANISO_6X; + } else + if (cso->max_anisotropy >= 4.0) { + ps->en |= NV10TCL_TX_ENABLE_ANISO_4X; + } else { + ps->en |= NV10TCL_TX_ENABLE_ANISO_2X; + }*/ + } + + switch (cso->mag_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + filter |= NV10TCL_TX_FILTER_MAGNIFY_LINEAR; + break; + case PIPE_TEX_FILTER_NEAREST: + default: + filter |= NV10TCL_TX_FILTER_MAGNIFY_NEAREST; + break; + } + + switch (cso->min_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + filter |= NV10TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + filter |= NV10TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + filter |= NV10TCL_TX_FILTER_MINIFY_LINEAR; + break; + } + break; + case PIPE_TEX_FILTER_NEAREST: + default: + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + filter |= NV10TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + filter |= NV10TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + filter |= NV10TCL_TX_FILTER_MINIFY_NEAREST; + break; + } + break; + } + + ps->filt = filter; + +/* if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + switch (cso->compare_func) { + case PIPE_FUNC_NEVER: + ps->wrap |= NV10TCL_TX_WRAP_RCOMP_NEVER; + break; + case PIPE_FUNC_GREATER: + ps->wrap |= NV10TCL_TX_WRAP_RCOMP_GREATER; + break; + case PIPE_FUNC_EQUAL: + ps->wrap |= NV10TCL_TX_WRAP_RCOMP_EQUAL; + break; + case PIPE_FUNC_GEQUAL: + ps->wrap |= NV10TCL_TX_WRAP_RCOMP_GEQUAL; + break; + case PIPE_FUNC_LESS: + ps->wrap |= NV10TCL_TX_WRAP_RCOMP_LESS; + break; + case PIPE_FUNC_NOTEQUAL: + ps->wrap |= NV10TCL_TX_WRAP_RCOMP_NOTEQUAL; + break; + case PIPE_FUNC_LEQUAL: + ps->wrap |= NV10TCL_TX_WRAP_RCOMP_LEQUAL; + break; + case PIPE_FUNC_ALWAYS: + ps->wrap |= NV10TCL_TX_WRAP_RCOMP_ALWAYS; + break; + default: + break; + } + }*/ + + ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) | + (float_to_ubyte(cso->border_color[0]) << 16) | + (float_to_ubyte(cso->border_color[1]) << 8) | + (float_to_ubyte(cso->border_color[2]) << 0)); + + return (void *)ps; +} + +static void +nv10_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) +{ + struct nv10_context *nv10 = nv10_context(pipe); + unsigned unit; + + for (unit = 0; unit < nr; unit++) { + nv10->tex_sampler[unit] = sampler[unit]; + nv10->dirty_samplers |= (1 << unit); + } +} + +static void +nv10_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void +nv10_set_sampler_texture(struct pipe_context *pipe, unsigned nr, + struct pipe_texture **miptree) +{ + struct nv10_context *nv10 = nv10_context(pipe); + unsigned unit; + + for (unit = 0; unit < nr; unit++) { + nv10->tex_miptree[unit] = (struct nv10_miptree *)miptree[unit]; + nv10->dirty_samplers |= (1 << unit); + } +} + +static void * +nv10_rasterizer_state_create(struct pipe_context *pipe, + const struct pipe_rasterizer_state *cso) +{ + struct nv10_rasterizer_state *rs; + int i; + + /*XXX: ignored: + * light_twoside + * offset_cw/ccw -nohw + * scissor + * point_smooth -nohw + * multisample + * offset_units / offset_scale + */ + rs = MALLOC(sizeof(struct nv10_rasterizer_state)); + + rs->templ = cso; + + rs->shade_model = cso->flatshade ? 0x1d00 : 0x1d01; + + rs->line_width = (unsigned char)(cso->line_width * 8.0) & 0xff; + rs->line_smooth_en = cso->line_smooth ? 1 : 0; + + rs->point_size = *(uint32_t*)&cso->point_size; + + rs->poly_smooth_en = cso->poly_smooth ? 1 : 0; + + if (cso->front_winding == PIPE_WINDING_CCW) { + rs->front_face = NV10TCL_FRONT_FACE_CCW; + rs->poly_mode_front = nvgl_polygon_mode(cso->fill_ccw); + rs->poly_mode_back = nvgl_polygon_mode(cso->fill_cw); + } else { + rs->front_face = NV10TCL_FRONT_FACE_CW; + rs->poly_mode_front = nvgl_polygon_mode(cso->fill_cw); + rs->poly_mode_back = nvgl_polygon_mode(cso->fill_ccw); + } + + switch (cso->cull_mode) { + case PIPE_WINDING_CCW: + rs->cull_face_en = 1; + if (cso->front_winding == PIPE_WINDING_CCW) + rs->cull_face = NV10TCL_CULL_FACE_FRONT; + else + rs->cull_face = NV10TCL_CULL_FACE_BACK; + break; + case PIPE_WINDING_CW: + rs->cull_face_en = 1; + if (cso->front_winding == PIPE_WINDING_CW) + rs->cull_face = NV10TCL_CULL_FACE_FRONT; + else + rs->cull_face = NV10TCL_CULL_FACE_BACK; + break; + case PIPE_WINDING_BOTH: + rs->cull_face_en = 1; + rs->cull_face = NV10TCL_CULL_FACE_FRONT_AND_BACK; + break; + case PIPE_WINDING_NONE: + default: + rs->cull_face_en = 0; + rs->cull_face = 0; + break; + } + + if (cso->point_sprite) { + rs->point_sprite = (1 << 0); + for (i = 0; i < 8; i++) { + if (cso->sprite_coord_mode[i] != PIPE_SPRITE_COORD_NONE) + rs->point_sprite |= (1 << (8 + i)); + } + } else { + rs->point_sprite = 0; + } + + return (void *)rs; +} + +static void +nv10_rasterizer_state_bind(struct pipe_context *pipe, void *rast) +{ + struct nv10_context *nv10 = nv10_context(pipe); + + nv10->rast = (struct nv10_rasterizer_state*)rast; + + draw_set_rasterizer_state(nv10->draw, (nv10->rast ? nv10->rast->templ : NULL)); + + nv10->dirty |= NV10_NEW_RAST; +} + +static void +nv10_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void * +nv10_depth_stencil_alpha_state_create(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *cso) +{ + struct nv10_depth_stencil_alpha_state *hw; + + hw = MALLOC(sizeof(struct nv10_depth_stencil_alpha_state)); + + hw->depth.func = nvgl_comparison_op(cso->depth.func); + hw->depth.write_enable = cso->depth.writemask ? 1 : 0; + hw->depth.test_enable = cso->depth.enabled ? 1 : 0; + + hw->stencil.enable = cso->stencil[0].enabled ? 1 : 0; + hw->stencil.wmask = cso->stencil[0].writemask; + hw->stencil.func = nvgl_comparison_op(cso->stencil[0].func); + hw->stencil.ref = cso->stencil[0].ref_value; + hw->stencil.vmask = cso->stencil[0].valuemask; + hw->stencil.fail = nvgl_stencil_op(cso->stencil[0].fail_op); + hw->stencil.zfail = nvgl_stencil_op(cso->stencil[0].zfail_op); + hw->stencil.zpass = nvgl_stencil_op(cso->stencil[0].zpass_op); + + hw->alpha.enabled = cso->alpha.enabled ? 1 : 0; + hw->alpha.func = nvgl_comparison_op(cso->alpha.func); + hw->alpha.ref = float_to_ubyte(cso->alpha.ref_value); + + return (void *)hw; +} + +static void +nv10_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *dsa) +{ + struct nv10_context *nv10 = nv10_context(pipe); + + nv10->dsa = (struct nv10_depth_stencil_alpha_state*)dsa; + + nv10->dirty |= NV10_NEW_DSA; +} + +static void +nv10_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void * +nv10_vp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct nv10_context *nv10 = nv10_context(pipe); + + return draw_create_vertex_shader(nv10->draw, templ); +} + +static void +nv10_vp_state_bind(struct pipe_context *pipe, void *shader) +{ + struct nv10_context *nv10 = nv10_context(pipe); + + draw_bind_vertex_shader(nv10->draw, (struct draw_vertex_shader *) shader); + + nv10->dirty |= NV10_NEW_VERTPROG; +} + +static void +nv10_vp_state_delete(struct pipe_context *pipe, void *shader) +{ + struct nv10_context *nv10 = nv10_context(pipe); + + draw_delete_vertex_shader(nv10->draw, (struct draw_vertex_shader *) shader); +} + +static void * +nv10_fp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nv10_fragment_program *fp; + + fp = CALLOC(1, sizeof(struct nv10_fragment_program)); + fp->pipe.tokens = tgsi_dup_tokens(cso->tokens); + + tgsi_scan_shader(cso->tokens, &fp->info); + + return (void *)fp; +} + +static void +nv10_fp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv10_context *nv10 = nv10_context(pipe); + struct nv10_fragment_program *fp = hwcso; + + nv10->fragprog.current = fp; + nv10->dirty |= NV10_NEW_FRAGPROG; +} + +static void +nv10_fp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv10_context *nv10 = nv10_context(pipe); + struct nv10_fragment_program *fp = hwcso; + + nv10_fragprog_destroy(nv10, fp); + FREE((void*)fp->pipe.tokens); + FREE(fp); +} + +static void +nv10_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *bcol) +{ + struct nv10_context *nv10 = nv10_context(pipe); + + nv10->blend_color = (struct pipe_blend_color*)bcol; + + nv10->dirty |= NV10_NEW_BLENDCOL; +} + +static void +nv10_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) +{ + struct nv10_context *nv10 = nv10_context(pipe); + + draw_set_clip_state(nv10->draw, clip); +} + +static void +nv10_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, + const struct pipe_constant_buffer *buf ) +{ + struct nv10_context *nv10 = nv10_context(pipe); + struct pipe_winsys *ws = pipe->winsys; + + assert(shader < PIPE_SHADER_TYPES); + assert(index == 0); + + if (buf) { + void *mapped; + if (buf->buffer && buf->buffer->size && + (mapped = ws->buffer_map(ws, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ))) + { + memcpy(nv10->constbuf[shader], mapped, buf->buffer->size); + nv10->constbuf_nr[shader] = + buf->buffer->size / (4 * sizeof(float)); + ws->buffer_unmap(ws, buf->buffer); + } + } +} + +static void +nv10_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct nv10_context *nv10 = nv10_context(pipe); + + nv10->framebuffer = (struct pipe_framebuffer_state*)fb; + + nv10->dirty |= NV10_NEW_FRAMEBUFFER; +} + +static void +nv10_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ + NOUVEAU_ERR("line stipple hahaha\n"); +} + +static void +nv10_set_scissor_state(struct pipe_context *pipe, + const struct pipe_scissor_state *s) +{ + struct nv10_context *nv10 = nv10_context(pipe); + + nv10->scissor = (struct pipe_scissor_state*)s; + + nv10->dirty |= NV10_NEW_SCISSOR; +} + +static void +nv10_set_viewport_state(struct pipe_context *pipe, + const struct pipe_viewport_state *vpt) +{ + struct nv10_context *nv10 = nv10_context(pipe); + + nv10->viewport = (struct pipe_viewport_state*)vpt; + + draw_set_viewport_state(nv10->draw, nv10->viewport); + + nv10->dirty |= NV10_NEW_VIEWPORT; +} + +static void +nv10_set_vertex_buffers(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_buffer *vb) +{ + struct nv10_context *nv10 = nv10_context(pipe); + + memcpy(nv10->vtxbuf, vb, sizeof(*vb) * count); + nv10->dirty |= NV10_NEW_VTXARRAYS; + + draw_set_vertex_buffers(nv10->draw, count, vb); +} + +static void +nv10_set_vertex_elements(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_element *ve) +{ + struct nv10_context *nv10 = nv10_context(pipe); + + memcpy(nv10->vtxelt, ve, sizeof(*ve) * count); + nv10->dirty |= NV10_NEW_VTXARRAYS; + + draw_set_vertex_elements(nv10->draw, count, ve); +} + +void +nv10_init_state_functions(struct nv10_context *nv10) +{ + nv10->pipe.create_blend_state = nv10_blend_state_create; + nv10->pipe.bind_blend_state = nv10_blend_state_bind; + nv10->pipe.delete_blend_state = nv10_blend_state_delete; + + nv10->pipe.create_sampler_state = nv10_sampler_state_create; + nv10->pipe.bind_sampler_states = nv10_sampler_state_bind; + nv10->pipe.delete_sampler_state = nv10_sampler_state_delete; + nv10->pipe.set_sampler_textures = nv10_set_sampler_texture; + + nv10->pipe.create_rasterizer_state = nv10_rasterizer_state_create; + nv10->pipe.bind_rasterizer_state = nv10_rasterizer_state_bind; + nv10->pipe.delete_rasterizer_state = nv10_rasterizer_state_delete; + + nv10->pipe.create_depth_stencil_alpha_state = + nv10_depth_stencil_alpha_state_create; + nv10->pipe.bind_depth_stencil_alpha_state = + nv10_depth_stencil_alpha_state_bind; + nv10->pipe.delete_depth_stencil_alpha_state = + nv10_depth_stencil_alpha_state_delete; + + nv10->pipe.create_vs_state = nv10_vp_state_create; + nv10->pipe.bind_vs_state = nv10_vp_state_bind; + nv10->pipe.delete_vs_state = nv10_vp_state_delete; + + nv10->pipe.create_fs_state = nv10_fp_state_create; + nv10->pipe.bind_fs_state = nv10_fp_state_bind; + nv10->pipe.delete_fs_state = nv10_fp_state_delete; + + nv10->pipe.set_blend_color = nv10_set_blend_color; + nv10->pipe.set_clip_state = nv10_set_clip_state; + nv10->pipe.set_constant_buffer = nv10_set_constant_buffer; + nv10->pipe.set_framebuffer_state = nv10_set_framebuffer_state; + nv10->pipe.set_polygon_stipple = nv10_set_polygon_stipple; + nv10->pipe.set_scissor_state = nv10_set_scissor_state; + nv10->pipe.set_viewport_state = nv10_set_viewport_state; + + nv10->pipe.set_vertex_buffers = nv10_set_vertex_buffers; + nv10->pipe.set_vertex_elements = nv10_set_vertex_elements; +} + diff --git a/src/gallium/drivers/nv10/nv10_state.h b/src/gallium/drivers/nv10/nv10_state.h new file mode 100644 index 0000000000..3a3fd0d4f4 --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_state.h @@ -0,0 +1,139 @@ +#ifndef __NV10_STATE_H__ +#define __NV10_STATE_H__ + +#include "pipe/p_state.h" +#include "tgsi/tgsi_scan.h" + +struct nv10_blend_state { + uint32_t b_enable; + uint32_t b_srcfunc; + uint32_t b_dstfunc; + + uint32_t c_mask; + + uint32_t d_enable; +}; + +struct nv10_sampler_state { + uint32_t wrap; + uint32_t en; + uint32_t filt; + uint32_t bcol; +}; + +struct nv10_rasterizer_state { + uint32_t shade_model; + + uint32_t line_width; + uint32_t line_smooth_en; + + uint32_t point_size; + + uint32_t poly_smooth_en; + + uint32_t poly_mode_front; + uint32_t poly_mode_back; + + uint32_t front_face; + uint32_t cull_face; + uint32_t cull_face_en; + + uint32_t point_sprite; + + const struct pipe_rasterizer_state *templ; +}; + +struct nv10_vertex_program_exec { + uint32_t data[4]; + boolean has_branch_offset; + int const_index; +}; + +struct nv10_vertex_program_data { + int index; /* immediates == -1 */ + float value[4]; +}; + +struct nv10_vertex_program { + const struct pipe_shader_state *pipe; + + boolean translated; + struct nv10_vertex_program_exec *insns; + unsigned nr_insns; + struct nv10_vertex_program_data *consts; + unsigned nr_consts; + + struct nouveau_resource *exec; + unsigned exec_start; + struct nouveau_resource *data; + unsigned data_start; + unsigned data_start_min; + + uint32_t ir; + uint32_t or; +}; + +struct nv10_fragment_program_data { + unsigned offset; + unsigned index; +}; + +struct nv10_fragment_program { + struct pipe_shader_state pipe; + struct tgsi_shader_info info; + + boolean translated; + boolean on_hw; + unsigned samplers; + + uint32_t *insn; + int insn_len; + + struct nv10_fragment_program_data *consts; + unsigned nr_consts; + + struct pipe_buffer *buffer; + + uint32_t fp_control; + uint32_t fp_reg_control; +}; + + +struct nv10_depth_stencil_alpha_state { + struct { + uint32_t func; + uint32_t write_enable; + uint32_t test_enable; + } depth; + + struct { + uint32_t enable; + uint32_t wmask; + uint32_t func; + uint32_t ref; + uint32_t vmask; + uint32_t fail; + uint32_t zfail; + uint32_t zpass; + } stencil; + + struct { + uint32_t enabled; + uint32_t func; + uint32_t ref; + } alpha; +}; + +struct nv10_miptree { + struct pipe_texture base; + + struct pipe_buffer *buffer; + uint total_size; + + struct { + uint pitch; + uint *image_offset; + } level[PIPE_MAX_TEXTURE_LEVELS]; +}; + +#endif diff --git a/src/gallium/drivers/nv10/nv10_state_emit.c b/src/gallium/drivers/nv10/nv10_state_emit.c new file mode 100644 index 0000000000..5dec618b93 --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_state_emit.c @@ -0,0 +1,306 @@ +#include "nv10_context.h" +#include "nv10_state.h" + +static void nv10_state_emit_blend(struct nv10_context* nv10) +{ + struct nv10_blend_state *b = nv10->blend; + + BEGIN_RING(celsius, NV10TCL_DITHER_ENABLE, 1); + OUT_RING (b->d_enable); + + BEGIN_RING(celsius, NV10TCL_BLEND_FUNC_ENABLE, 3); + OUT_RING (b->b_enable); + OUT_RING (b->b_srcfunc); + OUT_RING (b->b_dstfunc); + + BEGIN_RING(celsius, NV10TCL_COLOR_MASK, 1); + OUT_RING (b->c_mask); +} + +static void nv10_state_emit_blend_color(struct nv10_context* nv10) +{ + struct pipe_blend_color *c = nv10->blend_color; + + BEGIN_RING(celsius, NV10TCL_BLEND_COLOR, 1); + OUT_RING ((float_to_ubyte(c->color[3]) << 24)| + (float_to_ubyte(c->color[0]) << 16)| + (float_to_ubyte(c->color[1]) << 8) | + (float_to_ubyte(c->color[2]) << 0)); +} + +static void nv10_state_emit_rast(struct nv10_context* nv10) +{ + struct nv10_rasterizer_state *r = nv10->rast; + + BEGIN_RING(celsius, NV10TCL_SHADE_MODEL, 2); + OUT_RING (r->shade_model); + OUT_RING (r->line_width); + + + BEGIN_RING(celsius, NV10TCL_POINT_SIZE, 1); + OUT_RING (r->point_size); + + BEGIN_RING(celsius, NV10TCL_POLYGON_MODE_FRONT, 2); + OUT_RING (r->poly_mode_front); + OUT_RING (r->poly_mode_back); + + + BEGIN_RING(celsius, NV10TCL_CULL_FACE, 2); + OUT_RING (r->cull_face); + OUT_RING (r->front_face); + + BEGIN_RING(celsius, NV10TCL_LINE_SMOOTH_ENABLE, 2); + OUT_RING (r->line_smooth_en); + OUT_RING (r->poly_smooth_en); + + BEGIN_RING(celsius, NV10TCL_CULL_FACE_ENABLE, 1); + OUT_RING (r->cull_face_en); +} + +static void nv10_state_emit_dsa(struct nv10_context* nv10) +{ + struct nv10_depth_stencil_alpha_state *d = nv10->dsa; + + BEGIN_RING(celsius, NV10TCL_DEPTH_FUNC, 1); + OUT_RING (d->depth.func); + + BEGIN_RING(celsius, NV10TCL_DEPTH_WRITE_ENABLE, 1); + OUT_RING (d->depth.write_enable); + + BEGIN_RING(celsius, NV10TCL_DEPTH_TEST_ENABLE, 1); + OUT_RING (d->depth.test_enable); + +#if 0 + BEGIN_RING(celsius, NV10TCL_STENCIL_ENABLE, 1); + OUT_RING (d->stencil.enable); + BEGIN_RING(celsius, NV10TCL_STENCIL_MASK, 7); + OUT_RINGp ((uint32_t *)&(d->stencil.wmask), 7); +#endif + + BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_ENABLE, 1); + OUT_RING (d->alpha.enabled); + + BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_FUNC, 1); + OUT_RING (d->alpha.func); + + BEGIN_RING(celsius, NV10TCL_ALPHA_FUNC_REF, 1); + OUT_RING (d->alpha.ref); +} + +static void nv10_state_emit_viewport(struct nv10_context* nv10) +{ +} + +static void nv10_state_emit_scissor(struct nv10_context* nv10) +{ + // XXX this is so not working +/* struct pipe_scissor_state *s = nv10->scissor; + BEGIN_RING(celsius, NV10TCL_SCISSOR_HORIZ, 2); + OUT_RING (((s->maxx - s->minx) << 16) | s->minx); + OUT_RING (((s->maxy - s->miny) << 16) | s->miny);*/ +} + +static void nv10_state_emit_framebuffer(struct nv10_context* nv10) +{ + struct pipe_framebuffer_state* fb = nv10->framebuffer; + struct pipe_surface *rt, *zeta = NULL; + uint32_t rt_format, w, h; + int colour_format = 0, zeta_format = 0; + struct nv10_miptree *nv10mt = 0; + + w = fb->cbufs[0]->width; + h = fb->cbufs[0]->height; + colour_format = fb->cbufs[0]->format; + rt = fb->cbufs[0]; + + if (fb->zsbuf) { + if (colour_format) { + assert(w == fb->zsbuf->width); + assert(h == fb->zsbuf->height); + } else { + w = fb->zsbuf->width; + h = fb->zsbuf->height; + } + + zeta_format = fb->zsbuf->format; + zeta = fb->zsbuf; + } + + rt_format = NV10TCL_RT_FORMAT_TYPE_LINEAR; + + switch (colour_format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case 0: + rt_format |= NV10TCL_RT_FORMAT_COLOR_A8R8G8B8; + break; + case PIPE_FORMAT_R5G6B5_UNORM: + rt_format |= NV10TCL_RT_FORMAT_COLOR_R5G6B5; + break; + default: + assert(0); + } + + if (zeta) { + BEGIN_RING(celsius, NV10TCL_RT_PITCH, 1); + OUT_RING (rt->stride | (zeta->stride << 16)); + } else { + BEGIN_RING(celsius, NV10TCL_RT_PITCH, 1); + OUT_RING (rt->stride | (rt->stride << 16)); + } + + nv10mt = (struct nv10_miptree *)rt->texture; + nv10->rt[0] = nv10mt->buffer; + + if (zeta_format) + { + nv10mt = (struct nv10_miptree *)zeta->texture; + nv10->zeta = nv10mt->buffer; + } + + BEGIN_RING(celsius, NV10TCL_RT_HORIZ, 3); + OUT_RING ((w << 16) | 0); + OUT_RING ((h << 16) | 0); + OUT_RING (rt_format); + BEGIN_RING(celsius, NV10TCL_VIEWPORT_CLIP_HORIZ(0), 2); + OUT_RING (((w - 1) << 16) | 0 | 0x08000800); + OUT_RING (((h - 1) << 16) | 0 | 0x08000800); +} + +static void nv10_vertex_layout(struct nv10_context *nv10) +{ + struct nv10_fragment_program *fp = nv10->fragprog.current; + uint32_t src = 0; + int i; + struct vertex_info vinfo; + + memset(&vinfo, 0, sizeof(vinfo)); + + for (i = 0; i < fp->info.num_inputs; i++) { + switch (fp->info.input_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src++); + break; + case TGSI_SEMANTIC_COLOR: + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src++); + break; + default: + case TGSI_SEMANTIC_GENERIC: + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++); + break; + case TGSI_SEMANTIC_FOG: + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, src++); + break; + } + } + draw_compute_vertex_size(&vinfo); +} + +void +nv10_emit_hw_state(struct nv10_context *nv10) +{ + int i; + + if (nv10->dirty & NV10_NEW_VERTPROG) { + //nv10_vertprog_bind(nv10, nv10->vertprog.current); + nv10->dirty &= ~NV10_NEW_VERTPROG; + } + + if (nv10->dirty & NV10_NEW_FRAGPROG) { + nv10_fragprog_bind(nv10, nv10->fragprog.current); + /*XXX: clear NV10_NEW_FRAGPROG if no new program uploaded */ + nv10->dirty_samplers |= (1<<10); + nv10->dirty_samplers = 0; + } + + if (nv10->dirty_samplers || (nv10->dirty & NV10_NEW_FRAGPROG)) { + nv10_fragtex_bind(nv10); + nv10->dirty &= ~NV10_NEW_FRAGPROG; + } + + if (nv10->dirty & NV10_NEW_VTXARRAYS) { + nv10->dirty &= ~NV10_NEW_VTXARRAYS; + nv10_vertex_layout(nv10); + nv10_vtxbuf_bind(nv10); + } + + if (nv10->dirty & NV10_NEW_BLEND) { + nv10->dirty &= ~NV10_NEW_BLEND; + nv10_state_emit_blend(nv10); + } + + if (nv10->dirty & NV10_NEW_BLENDCOL) { + nv10->dirty &= ~NV10_NEW_BLENDCOL; + nv10_state_emit_blend_color(nv10); + } + + if (nv10->dirty & NV10_NEW_RAST) { + nv10->dirty &= ~NV10_NEW_RAST; + nv10_state_emit_rast(nv10); + } + + if (nv10->dirty & NV10_NEW_DSA) { + nv10->dirty &= ~NV10_NEW_DSA; + nv10_state_emit_dsa(nv10); + } + + if (nv10->dirty & NV10_NEW_VIEWPORT) { + nv10->dirty &= ~NV10_NEW_VIEWPORT; + nv10_state_emit_viewport(nv10); + } + + if (nv10->dirty & NV10_NEW_SCISSOR) { + nv10->dirty &= ~NV10_NEW_SCISSOR; + nv10_state_emit_scissor(nv10); + } + + if (nv10->dirty & NV10_NEW_FRAMEBUFFER) { + nv10->dirty &= ~NV10_NEW_FRAMEBUFFER; + nv10_state_emit_framebuffer(nv10); + } + + /* Emit relocs for every referenced buffer. + * This is to ensure the bufmgr has an accurate idea of how + * the buffer is used. This isn't very efficient, but we don't + * seem to take a significant performance hit. Will be improved + * at some point. Vertex arrays are emitted by nv10_vbo.c + */ + + /* Render target */ +// XXX figre out who's who for NV10TCL_DMA_* and fill accordingly +// BEGIN_RING(celsius, NV10TCL_DMA_COLOR0, 1); +// OUT_RELOCo(nv10->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(celsius, NV10TCL_COLOR_OFFSET, 1); + OUT_RELOCl(nv10->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + + if (nv10->zeta) { +// XXX +// BEGIN_RING(celsius, NV10TCL_DMA_ZETA, 1); +// OUT_RELOCo(nv10->zeta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(celsius, NV10TCL_ZETA_OFFSET, 1); + OUT_RELOCl(nv10->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + /* XXX for when we allocate LMA on nv17 */ +/* BEGIN_RING(celsius, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1); + OUT_RELOCl(nv10->zeta + lma_offset);*/ + } + + /* Vertex buffer */ + BEGIN_RING(celsius, NV10TCL_DMA_VTXBUF0, 1); + OUT_RELOCo(nv10->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(celsius, NV10TCL_COLOR_OFFSET, 1); + OUT_RELOCl(nv10->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + + /* Texture images */ + for (i = 0; i < 2; i++) { + if (!(nv10->fp_samplers & (1 << i))) + continue; + BEGIN_RING(celsius, NV10TCL_TX_OFFSET(i), 1); + OUT_RELOCl(nv10->tex[i].buffer, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_GART | NOUVEAU_BO_RD); + BEGIN_RING(celsius, NV10TCL_TX_FORMAT(i), 1); + OUT_RELOCd(nv10->tex[i].buffer, nv10->tex[i].format, + NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | + NOUVEAU_BO_OR, NV10TCL_TX_FORMAT_DMA0, + NV10TCL_TX_FORMAT_DMA1); + } +} + diff --git a/src/gallium/drivers/nv10/nv10_surface.c b/src/gallium/drivers/nv10/nv10_surface.c new file mode 100644 index 0000000000..2538151063 --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_surface.c @@ -0,0 +1,72 @@ + +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "nv10_context.h" +#include "pipe/p_defines.h" +#include "pipe/internal/p_winsys_screen.h" +#include "pipe/p_inlines.h" +#include "util/u_tile.h" + +static void +nv10_surface_copy(struct pipe_context *pipe, boolean do_flip, + struct pipe_surface *dest, unsigned destx, unsigned desty, + struct pipe_surface *src, unsigned srcx, unsigned srcy, + unsigned width, unsigned height) +{ + struct nv10_context *nv10 = nv10_context(pipe); + struct nv04_surface_2d *eng2d = nv10->screen->eng2d; + + if (do_flip) { + desty += height; + while (height--) { + eng2d->copy(eng2d, dest, destx, desty--, src, + srcx, srcy++, width, 1); + } + return; + } + + eng2d->copy(eng2d, dest, destx, desty, src, srcx, srcy, width, height); +} + +static void +nv10_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, + unsigned destx, unsigned desty, unsigned width, + unsigned height, unsigned value) +{ + struct nv10_context *nv10 = nv10_context(pipe); + struct nv04_surface_2d *eng2d = nv10->screen->eng2d; + + eng2d->fill(eng2d, dest, destx, desty, width, height, value); +} + +void +nv10_init_surface_functions(struct nv10_context *nv10) +{ + nv10->pipe.surface_copy = nv10_surface_copy; + nv10->pipe.surface_fill = nv10_surface_fill; +} diff --git a/src/gallium/drivers/nv10/nv10_vbo.c b/src/gallium/drivers/nv10/nv10_vbo.c new file mode 100644 index 0000000000..d0e788ac03 --- /dev/null +++ b/src/gallium/drivers/nv10/nv10_vbo.c @@ -0,0 +1,77 @@ +#include "draw/draw_context.h" +#include "pipe/p_context.h" +#include "pipe/p_state.h" + +#include "nv10_context.h" +#include "nv10_state.h" + +#include "nouveau/nouveau_channel.h" +#include "nouveau/nouveau_pushbuf.h" + +boolean nv10_draw_elements( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned prim, unsigned start, unsigned count) +{ + struct nv10_context *nv10 = nv10_context( pipe ); + struct draw_context *draw = nv10->draw; + unsigned i; + + nv10_emit_hw_state(nv10); + + /* + * Map vertex buffers + */ + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { + if (nv10->vtxbuf[i].buffer) { + void *buf + = pipe->winsys->buffer_map(pipe->winsys, + nv10->vtxbuf[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_vertex_buffer(draw, i, buf); + } + } + /* Map index buffer, if present */ + if (indexBuffer) { + void *mapped_indexes + = pipe->winsys->buffer_map(pipe->winsys, indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); + } + else { + /* no index/element buffer */ + draw_set_mapped_element_buffer(draw, 0, NULL); + } + + draw_set_mapped_constant_buffer(draw, + nv10->constbuf[PIPE_SHADER_VERTEX], + nv10->constbuf_nr[PIPE_SHADER_VERTEX]); + + /* draw! */ + draw_arrays(nv10->draw, prim, start, count); + + /* + * unmap vertex/index buffers + */ + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { + if (nv10->vtxbuf[i].buffer) { + pipe->winsys->buffer_unmap(pipe->winsys, nv10->vtxbuf[i].buffer); + draw_set_mapped_vertex_buffer(draw, i, NULL); + } + } + if (indexBuffer) { + pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer); + draw_set_mapped_element_buffer(draw, 0, NULL); + } + + return TRUE; +} + +boolean nv10_draw_arrays( struct pipe_context *pipe, + unsigned prim, unsigned start, unsigned count) +{ + return nv10_draw_elements(pipe, NULL, 0, prim, start, count); +} + + + diff --git a/src/gallium/drivers/nv20/Makefile b/src/gallium/drivers/nv20/Makefile new file mode 100644 index 0000000000..93e34f8e92 --- /dev/null +++ b/src/gallium/drivers/nv20/Makefile @@ -0,0 +1,20 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = nv20 + +C_SOURCES = \ + nv20_clear.c \ + nv20_context.c \ + nv20_fragprog.c \ + nv20_fragtex.c \ + nv20_miptree.c \ + nv20_prim_vbuf.c \ + nv20_screen.c \ + nv20_state.c \ + nv20_state_emit.c \ + nv20_surface.c \ + nv20_vbo.c +# nv20_vertprog.c + +include ../../Makefile.template diff --git a/src/gallium/drivers/nv20/nv20_clear.c b/src/gallium/drivers/nv20/nv20_clear.c new file mode 100644 index 0000000000..29f4afd87c --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_clear.c @@ -0,0 +1,13 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "nv20_context.h" + +void +nv20_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue) +{ + pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); + ps->status = PIPE_SURFACE_STATUS_CLEAR; +} diff --git a/src/gallium/drivers/nv20/nv20_context.c b/src/gallium/drivers/nv20/nv20_context.c new file mode 100644 index 0000000000..1659aec8fa --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_context.c @@ -0,0 +1,419 @@ +#include "draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/internal/p_winsys_screen.h" + +#include "nv20_context.h" +#include "nv20_screen.h" + +static void +nv20_flush(struct pipe_context *pipe, unsigned flags, + struct pipe_fence_handle **fence) +{ + struct nv20_context *nv20 = nv20_context(pipe); + + draw_flush(nv20->draw); + + FIRE_RING(fence); +} + +static void +nv20_destroy(struct pipe_context *pipe) +{ + struct nv20_context *nv20 = nv20_context(pipe); + + if (nv20->draw) + draw_destroy(nv20->draw); + + FREE(nv20); +} + +static void nv20_init_hwctx(struct nv20_context *nv20) +{ + struct nv20_screen *screen = nv20->screen; + struct nouveau_winsys *nvws = screen->nvws; + int i; + float projectionmatrix[16]; + const boolean is_nv25tcl = (nv20->screen->kelvin->grclass == NV25TCL); + + BEGIN_RING(kelvin, NV20TCL_DMA_NOTIFY, 1); + OUT_RING (screen->sync->handle); + BEGIN_RING(kelvin, NV20TCL_DMA_TEXTURE0, 2); + OUT_RING (nvws->channel->vram->handle); + OUT_RING (nvws->channel->gart->handle); /* TEXTURE1 */ + BEGIN_RING(kelvin, NV20TCL_DMA_COLOR, 2); + OUT_RING (nvws->channel->vram->handle); + OUT_RING (nvws->channel->vram->handle); /* ZETA */ + + BEGIN_RING(kelvin, NV20TCL_DMA_QUERY, 1); + OUT_RING (0); /* renouveau: beef0351, unique */ + + BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 2); + OUT_RING (0); + OUT_RING (0); + + BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 1); + OUT_RING ((0xfff << 16) | 0x0); + BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_VERT(0), 1); + OUT_RING ((0xfff << 16) | 0x0); + + for (i = 1; i < NV20TCL_VIEWPORT_CLIP_HORIZ__SIZE; i++) { + BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(i), 1); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_VERT(i), 1); + OUT_RING (0); + } + + BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_MODE, 1); + OUT_RING (0); + + BEGIN_RING(kelvin, 0x17e0, 3); + OUT_RINGf (0.0); + OUT_RINGf (0.0); + OUT_RINGf (1.0); + + if (is_nv25tcl) { + BEGIN_RING(kelvin, NV20TCL_TX_RCOMP, 1); + OUT_RING (NV20TCL_TX_RCOMP_LEQUAL | 0xdb0); + } else { + BEGIN_RING(kelvin, 0x1e68, 1); + OUT_RING (0x4b800000); /* 16777216.000000 */ + BEGIN_RING(kelvin, NV20TCL_TX_RCOMP, 1); + OUT_RING (NV20TCL_TX_RCOMP_LEQUAL); + } + + BEGIN_RING(kelvin, 0x290, 1); + OUT_RING ((0x10 << 16) | 1); + BEGIN_RING(kelvin, 0x9fc, 1); + OUT_RING (0); + BEGIN_RING(kelvin, 0x1d80, 1); + OUT_RING (1); + BEGIN_RING(kelvin, 0x9f8, 1); + OUT_RING (4); + BEGIN_RING(kelvin, 0x17ec, 3); + OUT_RINGf (0.0); + OUT_RINGf (1.0); + OUT_RINGf (0.0); + + if (is_nv25tcl) { + BEGIN_RING(kelvin, 0x1d88, 1); + OUT_RING (3); + + BEGIN_RING(kelvin, NV25TCL_DMA_IN_MEMORY9, 1); + OUT_RING (nvws->channel->vram->handle); + BEGIN_RING(kelvin, NV25TCL_DMA_IN_MEMORY8, 1); + OUT_RING (nvws->channel->vram->handle); + } + BEGIN_RING(kelvin, NV20TCL_DMA_FENCE, 1); + OUT_RING (0); /* renouveau: beef1e10 */ + + BEGIN_RING(kelvin, 0x1e98, 1); + OUT_RING (0); +#if 0 + if (is_nv25tcl) { + BEGIN_RING(NvSub3D, NV25TCL_DMA_IN_MEMORY4, 2); + OUT_RING (NvDmaTT); /* renouveau: beef0202 */ + OUT_RING (NvDmaFB); /* renouveau: beef0201 */ + + BEGIN_RING(NvSub3D, NV20TCL_DMA_TEXTURE1, 1); + OUT_RING (NvDmaTT); /* renouveau: beef0202 */ + } +#endif + BEGIN_RING(kelvin, NV20TCL_NOTIFY, 1); + OUT_RING (0); + + BEGIN_RING(kelvin, 0x120, 3); + OUT_RING (0); + OUT_RING (1); + OUT_RING (2); + +/* error: ILLEGAL_MTHD, PROTECTION_FAULT + BEGIN_RING(kelvin, NV20TCL_VIEWPORT_TRANSLATE_X, 4); + OUT_RINGf (0.0); + OUT_RINGf (512.0); + OUT_RINGf (0.0); + OUT_RINGf (0.0); +*/ + + if (is_nv25tcl) { + BEGIN_RING(kelvin, 0x022c, 2); + OUT_RING (0x280); + OUT_RING (0x07d28000); + } + +/* * illegal method, protection fault + BEGIN_RING(NvSub3D, 0x1c2c, 1); + OUT_RING (0); */ + + if (is_nv25tcl) { + BEGIN_RING(kelvin, 0x1da4, 1); + OUT_RING (0); + } + +/* * crashes with illegal method, protection fault + BEGIN_RING(NvSub3D, 0x1c18, 1); + OUT_RING (0x200); */ + + BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 2); + OUT_RING ((0 << 16) | 0); + OUT_RING ((0 << 16) | 0); + + /* *** Set state *** */ + + BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_FUNC, 2); + OUT_RING (NV20TCL_ALPHA_FUNC_FUNC_ALWAYS); + OUT_RING (0); /* NV20TCL_ALPHA_FUNC_REF */ + + for (i = 0; i < NV20TCL_TX_ENABLE__SIZE; ++i) { + BEGIN_RING(kelvin, NV20TCL_TX_ENABLE(i), 1); + OUT_RING (0); + } + BEGIN_RING(kelvin, NV20TCL_TX_SHADER_OP, 1); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_TX_SHADER_CULL_MODE, 1); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_RC_IN_ALPHA(0), 4); + OUT_RING (0x30d410d0); + OUT_RING (0); + OUT_RING (0); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_RC_OUT_RGB(0), 4); + OUT_RING (0x00000c00); + OUT_RING (0); + OUT_RING (0); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_RC_ENABLE, 1); + OUT_RING (0x00011101); + BEGIN_RING(kelvin, NV20TCL_RC_FINAL0, 2); + OUT_RING (0x130e0300); + OUT_RING (0x0c091c80); + BEGIN_RING(kelvin, NV20TCL_RC_OUT_ALPHA(0), 4); + OUT_RING (0x00000c00); + OUT_RING (0); + OUT_RING (0); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_RC_IN_RGB(0), 4); + OUT_RING (0x20c400c0); + OUT_RING (0); + OUT_RING (0); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_RC_COLOR0, 2); + OUT_RING (0); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_RC_CONSTANT_COLOR0(0), 4); + OUT_RING (0x035125a0); + OUT_RING (0); + OUT_RING (0x40002000); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_MULTISAMPLE_CONTROL, 1); + OUT_RING (0xffff0000); + + BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_DITHER_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_STENCIL_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_SRC, 4); + OUT_RING (NV20TCL_BLEND_FUNC_SRC_ONE); + OUT_RING (NV20TCL_BLEND_FUNC_DST_ZERO); + OUT_RING (0); /* NV20TCL_BLEND_COLOR */ + OUT_RING (NV20TCL_BLEND_EQUATION_FUNC_ADD); + BEGIN_RING(kelvin, NV20TCL_STENCIL_MASK, 7); + OUT_RING (0xff); + OUT_RING (NV20TCL_STENCIL_FUNC_FUNC_ALWAYS); + OUT_RING (0); /* NV20TCL_STENCIL_FUNC_REF */ + OUT_RING (0xff); /* NV20TCL_STENCIL_FUNC_MASK */ + OUT_RING (NV20TCL_STENCIL_OP_FAIL_KEEP); + OUT_RING (NV20TCL_STENCIL_OP_ZFAIL_KEEP); + OUT_RING (NV20TCL_STENCIL_OP_ZPASS_KEEP); + + BEGIN_RING(kelvin, NV20TCL_COLOR_LOGIC_OP_ENABLE, 2); + OUT_RING (0); + OUT_RING (NV20TCL_COLOR_LOGIC_OP_OP_COPY); + BEGIN_RING(kelvin, 0x17cc, 1); + OUT_RING (0); + if (is_nv25tcl) { + BEGIN_RING(kelvin, 0x1d84, 1); + OUT_RING (1); + } + BEGIN_RING(kelvin, NV20TCL_LIGHTING_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_LIGHT_CONTROL, 1); + OUT_RING (0x00020000); + BEGIN_RING(kelvin, NV20TCL_SEPARATE_SPECULAR_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_LIGHT_MODEL_TWO_SIDE_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_ENABLED_LIGHTS, 1); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_NORMALIZE_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_POLYGON_STIPPLE_PATTERN(0), + NV20TCL_POLYGON_STIPPLE_PATTERN__SIZE); + for (i = 0; i < NV20TCL_POLYGON_STIPPLE_PATTERN__SIZE; ++i) { + OUT_RING(0xffffffff); + } + + BEGIN_RING(kelvin, NV20TCL_POLYGON_OFFSET_POINT_ENABLE, 3); + OUT_RING (0); + OUT_RING (0); /* NV20TCL.POLYGON_OFFSET_LINE_ENABLE */ + OUT_RING (0); /* NV20TCL.POLYGON_OFFSET_FILL_ENABLE */ + BEGIN_RING(kelvin, NV20TCL_DEPTH_FUNC, 1); + OUT_RING (NV20TCL_DEPTH_FUNC_LESS); + BEGIN_RING(kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_POLYGON_OFFSET_FACTOR, 2); + OUT_RINGf (0.0); + OUT_RINGf (0.0); /* NV20TCL.POLYGON_OFFSET_UNITS */ + BEGIN_RING(kelvin, NV20TCL_DEPTH_UNK17D8, 1); + OUT_RING (1); + if (!is_nv25tcl) { + BEGIN_RING(kelvin, 0x1d84, 1); + OUT_RING (3); + } + BEGIN_RING(kelvin, NV20TCL_POINT_SIZE, 1); + if (!is_nv25tcl) { + OUT_RING (8); + } else { + OUT_RINGf (1.0); + } + if (!is_nv25tcl) { + BEGIN_RING(kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 2); + OUT_RING (0); + OUT_RING (0); /* NV20TCL.POINT_SMOOTH_ENABLE */ + } else { + BEGIN_RING(kelvin, NV20TCL_POINT_PARAMETERS_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(kelvin, 0x0a1c, 1); + OUT_RING (0x800); + } + BEGIN_RING(kelvin, NV20TCL_LINE_WIDTH, 1); + OUT_RING (8); + BEGIN_RING(kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_POLYGON_MODE_FRONT, 2); + OUT_RING (NV20TCL_POLYGON_MODE_FRONT_FILL); + OUT_RING (NV20TCL_POLYGON_MODE_BACK_FILL); + BEGIN_RING(kelvin, NV20TCL_CULL_FACE, 2); + OUT_RING (NV20TCL_CULL_FACE_BACK); + OUT_RING (NV20TCL_FRONT_FACE_CCW); + BEGIN_RING(kelvin, NV20TCL_POLYGON_SMOOTH_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_CULL_FACE_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_SHADE_MODEL, 1); + OUT_RING (NV20TCL_SHADE_MODEL_SMOOTH); + BEGIN_RING(kelvin, NV20TCL_POLYGON_STIPPLE_ENABLE, 1); + OUT_RING (0); + BEGIN_RING(kelvin, NV20TCL_TX_GEN_S(0), 4 * NV20TCL_TX_GEN_S__SIZE); + for (i=0; i < 4 * NV20TCL_TX_GEN_S__SIZE; ++i) { + OUT_RING(0); + } + BEGIN_RING(kelvin, NV20TCL_FOG_EQUATION_CONSTANT, 3); + OUT_RINGf (1.5); + OUT_RINGf (-0.090168); /* NV20TCL.FOG_EQUATION_LINEAR */ + OUT_RINGf (0.0); /* NV20TCL.FOG_EQUATION_QUADRATIC */ + BEGIN_RING(kelvin, NV20TCL_FOG_MODE, 2); + OUT_RING (NV20TCL_FOG_MODE_EXP_2); + OUT_RING (NV20TCL_FOG_COORD_DIST_COORD_FOG); + BEGIN_RING(kelvin, NV20TCL_FOG_ENABLE, 2); + OUT_RING (0); + OUT_RING (0); /* NV20TCL.FOG_COLOR */ + BEGIN_RING(kelvin, NV20TCL_ENGINE, 1); + OUT_RING (NV20TCL_ENGINE_FIXED); + + for (i = 0; i < NV20TCL_TX_MATRIX_ENABLE__SIZE; ++i) { + BEGIN_RING(kelvin, NV20TCL_TX_MATRIX_ENABLE(i), 1); + OUT_RING (0); + } + + BEGIN_RING(kelvin, NV20TCL_VTX_ATTR_4F_X(1), 4 * 15); + OUT_RINGf(1.0); OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(1.0); + OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(1.0); OUT_RINGf(1.0); + OUT_RINGf(1.0); OUT_RINGf(1.0); OUT_RINGf(1.0); OUT_RINGf(1.0); + for (i = 4; i < 16; ++i) { + OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(0.0); OUT_RINGf(1.0); + } + + BEGIN_RING(kelvin, NV20TCL_EDGEFLAG_ENABLE, 1); + OUT_RING (1); + BEGIN_RING(kelvin, NV20TCL_COLOR_MASK, 1); + OUT_RING (0x00010101); + BEGIN_RING(kelvin, NV20TCL_CLEAR_VALUE, 1); + OUT_RING (0); + + memset(projectionmatrix, 0, sizeof(projectionmatrix)); + projectionmatrix[0*4+0] = 1.0; + projectionmatrix[1*4+1] = 1.0; + projectionmatrix[2*4+2] = 16777215.0; + projectionmatrix[3*4+3] = 1.0; + BEGIN_RING(kelvin, NV20TCL_PROJECTION_MATRIX(0), 16); + for (i = 0; i < 16; i++) { + OUT_RINGf (projectionmatrix[i]); + } + + BEGIN_RING(kelvin, NV20TCL_DEPTH_RANGE_NEAR, 2); + OUT_RINGf (0.0); + OUT_RINGf (16777216.0); /* [0, 1] scaled approx to [0, 2^24] */ + + BEGIN_RING(kelvin, NV20TCL_VIEWPORT_SCALE0_X, 4); + OUT_RINGf (0.0); /* x-offset, w/2 + 1.031250 */ + OUT_RINGf (0.0); /* y-offset, h/2 + 0.030762 */ + OUT_RINGf (0.0); + OUT_RINGf (16777215.0); + + BEGIN_RING(kelvin, NV20TCL_VIEWPORT_SCALE1_X, 4); + OUT_RINGf (0.0); /* no effect?, w/2 */ + OUT_RINGf (0.0); /* no effect?, h/2 */ + OUT_RINGf (16777215.0 * 0.5); + OUT_RINGf (65535.0); + + FIRE_RING (NULL); +} + +static void +nv20_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) +{ +} + +struct pipe_context * +nv20_create(struct pipe_screen *pscreen, unsigned pctx_id) +{ + struct nv20_screen *screen = nv20_screen(pscreen); + struct pipe_winsys *ws = pscreen->winsys; + struct nv20_context *nv20; + struct nouveau_winsys *nvws = screen->nvws; + + nv20 = CALLOC(1, sizeof(struct nv20_context)); + if (!nv20) + return NULL; + nv20->screen = screen; + nv20->pctx_id = pctx_id; + + nv20->nvws = nvws; + + nv20->pipe.winsys = ws; + nv20->pipe.screen = pscreen; + nv20->pipe.destroy = nv20_destroy; + nv20->pipe.set_edgeflags = nv20_set_edgeflags; + nv20->pipe.draw_arrays = nv20_draw_arrays; + nv20->pipe.draw_elements = nv20_draw_elements; + nv20->pipe.clear = nv20_clear; + nv20->pipe.flush = nv20_flush; + + nv20_init_surface_functions(nv20); + nv20_init_state_functions(nv20); + + nv20->draw = draw_create(); + assert(nv20->draw); + draw_set_rasterize_stage(nv20->draw, nv20_draw_vbuf_stage(nv20)); + + nv20_init_hwctx(nv20); + + return &nv20->pipe; +} + diff --git a/src/gallium/drivers/nv20/nv20_context.h b/src/gallium/drivers/nv20/nv20_context.h new file mode 100644 index 0000000000..8ad926db20 --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_context.h @@ -0,0 +1,153 @@ +#ifndef __NV20_CONTEXT_H__ +#define __NV20_CONTEXT_H__ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_compiler.h" + +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "draw/draw_vertex.h" + +#include "nouveau/nouveau_winsys.h" +#include "nouveau/nouveau_gldefs.h" + +#define NOUVEAU_PUSH_CONTEXT(ctx) \ + struct nv20_screen *ctx = nv20->screen +#include "nouveau/nouveau_push.h" + +#include "nv20_state.h" + +#define NOUVEAU_ERR(fmt, args...) \ + fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); +#define NOUVEAU_MSG(fmt, args...) \ + fprintf(stderr, "nouveau: "fmt, ##args); + +#define NV20_NEW_VERTPROG (1 << 0) +#define NV20_NEW_FRAGPROG (1 << 1) +#define NV20_NEW_VTXARRAYS (1 << 2) +#define NV20_NEW_BLEND (1 << 3) +#define NV20_NEW_BLENDCOL (1 << 4) +#define NV20_NEW_RAST (1 << 5) +#define NV20_NEW_DSA (1 << 6) +#define NV20_NEW_VIEWPORT (1 << 7) +#define NV20_NEW_SCISSOR (1 << 8) +#define NV20_NEW_FRAMEBUFFER (1 << 9) + +#include "nv20_screen.h" + +struct nv20_context { + struct pipe_context pipe; + + struct nouveau_winsys *nvws; + struct nv20_screen *screen; + unsigned pctx_id; + + struct draw_context *draw; + + uint32_t dirty; + + struct nv20_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; + struct nv20_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; + unsigned dirty_samplers; + unsigned fp_samplers; + unsigned vp_samplers; + + uint32_t rt_enable; + struct pipe_buffer *rt[4]; + struct pipe_buffer *zeta; + uint32_t lma_offset; + + struct nv20_blend_state *blend; + struct pipe_blend_color *blend_color; + struct nv20_rasterizer_state *rast; + struct nv20_depth_stencil_alpha_state *dsa; + struct pipe_viewport_state *viewport; + struct pipe_scissor_state *scissor; + struct pipe_framebuffer_state *framebuffer; + + //struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; + float *constbuf[PIPE_SHADER_TYPES][32][4]; + unsigned constbuf_nr[PIPE_SHADER_TYPES]; + + struct vertex_info vertex_info; + + struct { + struct pipe_buffer *buffer; + uint32_t format; + } tex[2]; + + unsigned vb_enable; + struct { + struct pipe_buffer *buffer; + unsigned delta; + } vb[16]; + +/* struct { + + struct nouveau_resource *exec_heap; + struct nouveau_resource *data_heap; + + struct nv20_vertex_program *active; + + struct nv20_vertex_program *current; + } vertprog; +*/ + struct { + struct nv20_fragment_program *active; + + struct nv20_fragment_program *current; + struct pipe_buffer *constant_buf; + } fragprog; + + struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; + struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS]; +}; + +static INLINE struct nv20_context * +nv20_context(struct pipe_context *pipe) +{ + return (struct nv20_context *)pipe; +} + +extern void nv20_init_state_functions(struct nv20_context *nv20); +extern void nv20_init_surface_functions(struct nv20_context *nv20); + +extern void nv20_screen_init_miptree_functions(struct pipe_screen *pscreen); + +/* nv20_clear.c */ +extern void nv20_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue); + +/* nv20_draw.c */ +extern struct draw_stage *nv20_draw_render_stage(struct nv20_context *nv20); + +/* nv20_fragprog.c */ +extern void nv20_fragprog_bind(struct nv20_context *, + struct nv20_fragment_program *); +extern void nv20_fragprog_destroy(struct nv20_context *, + struct nv20_fragment_program *); + +/* nv20_fragtex.c */ +extern void nv20_fragtex_bind(struct nv20_context *); + +/* nv20_prim_vbuf.c */ +struct draw_stage *nv20_draw_vbuf_stage( struct nv20_context *nv20 ); +extern void nv20_vtxbuf_bind(struct nv20_context* nv20); + +/* nv20_state.c and friends */ +extern void nv20_emit_hw_state(struct nv20_context *nv20); +extern void nv20_state_tex_update(struct nv20_context *nv20); + +/* nv20_vbo.c */ +extern boolean nv20_draw_arrays(struct pipe_context *, unsigned mode, + unsigned start, unsigned count); +extern boolean nv20_draw_elements( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned prim, unsigned start, unsigned count); + + +#endif diff --git a/src/gallium/drivers/nv20/nv20_fragprog.c b/src/gallium/drivers/nv20/nv20_fragprog.c new file mode 100644 index 0000000000..4f496369dd --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_fragprog.c @@ -0,0 +1,21 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" + +#include "nv20_context.h" + +void +nv20_fragprog_bind(struct nv20_context *nv20, struct nv20_fragment_program *fp) +{ +} + +void +nv20_fragprog_destroy(struct nv20_context *nv20, + struct nv20_fragment_program *fp) +{ +} + diff --git a/src/gallium/drivers/nv20/nv20_fragtex.c b/src/gallium/drivers/nv20/nv20_fragtex.c new file mode 100644 index 0000000000..495a7be912 --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_fragtex.c @@ -0,0 +1,124 @@ +#include "nv20_context.h" +#include "nouveau/nouveau_util.h" + +#define _(m,tf) \ +{ \ + TRUE, \ + PIPE_FORMAT_##m, \ + NV20TCL_TX_FORMAT_FORMAT_##tf, \ +} + +struct nv20_texture_format { + boolean defined; + uint pipe; + int format; +}; + +static struct nv20_texture_format +nv20_texture_formats[] = { + _(A8R8G8B8_UNORM, A8R8G8B8), + _(A1R5G5B5_UNORM, A1R5G5B5), + _(A4R4G4B4_UNORM, A4R4G4B4), + _(L8_UNORM , L8 ), + _(A8_UNORM , A8 ), + _(A8L8_UNORM , A8L8 ), +/* _(RGB_DXT1 , DXT1, ), */ +/* _(RGBA_DXT1 , DXT1, ), */ +/* _(RGBA_DXT3 , DXT3, ), */ +/* _(RGBA_DXT5 , DXT5, ), */ + {}, +}; + +static struct nv20_texture_format * +nv20_fragtex_format(uint pipe_format) +{ + struct nv20_texture_format *tf = nv20_texture_formats; + + while (tf->defined) { + if (tf->pipe == pipe_format) + return tf; + tf++; + } + + return NULL; +} + + +static void +nv20_fragtex_build(struct nv20_context *nv20, int unit) +{ +#if 0 + struct nv20_sampler_state *ps = nv20->tex_sampler[unit]; + struct nv20_miptree *nv20mt = nv20->tex_miptree[unit]; + struct pipe_texture *pt = &nv20mt->base; + struct nv20_texture_format *tf; + uint32_t txf, txs, txp; + + tf = nv20_fragtex_format(pt->format); + if (!tf || !tf->defined) { + NOUVEAU_ERR("Unsupported texture format: 0x%x\n", pt->format); + return; + } + + txf = tf->format << 8; + txf |= (pt->last_level + 1) << 16; + txf |= log2i(pt->width[0]) << 20; + txf |= log2i(pt->height[0]) << 24; + txf |= log2i(pt->depth[0]) << 28; + txf |= 8; + + switch (pt->target) { + case PIPE_TEXTURE_CUBE: + txf |= NV10TCL_TX_FORMAT_CUBE_MAP; + /* fall-through */ + case PIPE_TEXTURE_2D: + txf |= (2<<4); + break; + case PIPE_TEXTURE_1D: + txf |= (1<<4); + break; + default: + NOUVEAU_ERR("Unknown target %d\n", pt->target); + return; + } + + BEGIN_RING(kelvin, NV10TCL_TX_OFFSET(unit), 8); + OUT_RELOCl(nv20mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + OUT_RELOCd(nv20mt->buffer,txf,NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_OR | NOUVEAU_BO_RD, 1/*VRAM*/,2/*TT*/); + OUT_RING (ps->wrap); + OUT_RING (0x40000000); /* enable */ + OUT_RING (txs); + OUT_RING (ps->filt | 0x2000 /* magic */); + OUT_RING ((pt->width[0] << 16) | pt->height[0]); + OUT_RING (ps->bcol); +#endif +} + +void +nv20_fragtex_bind(struct nv20_context *nv20) +{ +#if 0 + struct nv20_fragment_program *fp = nv20->fragprog.active; + unsigned samplers, unit; + + samplers = nv20->fp_samplers & ~fp->samplers; + while (samplers) { + unit = ffs(samplers) - 1; + samplers &= ~(1 << unit); + + BEGIN_RING(kelvin, NV10TCL_TX_ENABLE(unit), 1); + OUT_RING (0); + } + + samplers = nv20->dirty_samplers & fp->samplers; + while (samplers) { + unit = ffs(samplers) - 1; + samplers &= ~(1 << unit); + + nv20_fragtex_build(nv20, unit); + } + + nv20->fp_samplers = fp->samplers; +#endif +} + diff --git a/src/gallium/drivers/nv20/nv20_miptree.c b/src/gallium/drivers/nv20/nv20_miptree.c new file mode 100644 index 0000000000..ef7e9c5428 --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_miptree.c @@ -0,0 +1,206 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "nv20_context.h" +#include "nv20_screen.h" + +static void +nv20_miptree_layout(struct nv20_miptree *nv20mt) +{ + struct pipe_texture *pt = &nv20mt->base; + boolean swizzled = FALSE; + uint width = pt->width[0], height = pt->height[0]; + uint offset = 0; + int nr_faces, l, f; + + if (pt->target == PIPE_TEXTURE_CUBE) { + nr_faces = 6; + } else { + nr_faces = 1; + } + + for (l = 0; l <= pt->last_level; l++) { + pt->width[l] = width; + pt->height[l] = height; + pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); + pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); + + if (swizzled) + nv20mt->level[l].pitch = pt->nblocksx[l] * pt->block.size; + else + nv20mt->level[l].pitch = pt->nblocksx[0] * pt->block.size; + nv20mt->level[l].pitch = (nv20mt->level[l].pitch + 63) & ~63; + + nv20mt->level[l].image_offset = + CALLOC(nr_faces, sizeof(unsigned)); + + width = MAX2(1, width >> 1); + height = MAX2(1, height >> 1); + + } + + for (f = 0; f < nr_faces; f++) { + for (l = 0; l <= pt->last_level; l++) { + nv20mt->level[l].image_offset[f] = offset; + offset += nv20mt->level[l].pitch * pt->height[l]; + } + } + + nv20mt->total_size = offset; +} + +static struct pipe_texture * +nv20_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, + const unsigned *stride, struct pipe_buffer *pb) +{ + struct nv20_miptree *mt; + + /* Only supports 2D, non-mipmapped textures for the moment */ + if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || + pt->depth[0] != 1) + return NULL; + + mt = CALLOC_STRUCT(nv20_miptree); + if (!mt) + return NULL; + + mt->base = *pt; + mt->base.refcount = 1; + mt->base.screen = pscreen; + mt->level[0].pitch = stride[0]; + mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); + + pipe_buffer_reference(pscreen, &mt->buffer, pb); + return &mt->base; +} + +static struct pipe_texture * +nv20_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt) +{ + struct pipe_winsys *ws = screen->winsys; + struct nv20_miptree *mt; + unsigned buf_usage = PIPE_BUFFER_USAGE_PIXEL | + NOUVEAU_BUFFER_USAGE_TEXTURE; + + mt = MALLOC(sizeof(struct nv20_miptree)); + if (!mt) + return NULL; + mt->base = *pt; + mt->base.refcount = 1; + mt->base.screen = screen; + + /* Swizzled textures must be POT */ + if (pt->width[0] & (pt->width[0] - 1) || + pt->height[0] & (pt->height[0] - 1)) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + else + if (pt->tex_usage & (PIPE_TEXTURE_USAGE_PRIMARY | + PIPE_TEXTURE_USAGE_DISPLAY_TARGET)) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + else + if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + else { + switch (pt->format) { + /* TODO: Figure out which formats can be swizzled */ + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_R16_SNORM: + break; + default: + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + } + } + + if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) + buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE; + + nv20_miptree_layout(mt); + + mt->buffer = ws->buffer_create(ws, 256, buf_usage, mt->total_size); + if (!mt->buffer) { + FREE(mt); + return NULL; + } + + return &mt->base; +} + +static void +nv20_miptree_release(struct pipe_screen *screen, struct pipe_texture **pt) +{ + struct pipe_texture *mt = *pt; + + *pt = NULL; + if (--mt->refcount <= 0) { + struct nv20_miptree *nv20mt = (struct nv20_miptree *)mt; + int l; + + pipe_buffer_reference(screen, &nv20mt->buffer, NULL); + for (l = 0; l <= mt->last_level; l++) { + if (nv20mt->level[l].image_offset) + FREE(nv20mt->level[l].image_offset); + } + FREE(nv20mt); + } +} + +static struct pipe_surface * +nv20_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice, + unsigned flags) +{ + struct nv20_miptree *nv20mt = (struct nv20_miptree *)pt; + struct pipe_surface *ps; + + ps = CALLOC_STRUCT(pipe_surface); + if (!ps) + return NULL; + pipe_texture_reference(&ps->texture, pt); + ps->format = pt->format; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->block = pt->block; + ps->nblocksx = pt->nblocksx[level]; + ps->nblocksy = pt->nblocksy[level]; + ps->stride = nv20mt->level[level].pitch; + ps->usage = flags; + ps->status = PIPE_SURFACE_STATUS_DEFINED; + ps->refcount = 1; + + if (pt->target == PIPE_TEXTURE_CUBE) { + ps->offset = nv20mt->level[level].image_offset[face]; + } else + if (pt->target == PIPE_TEXTURE_3D) { + ps->offset = nv20mt->level[level].image_offset[zslice]; + } else { + ps->offset = nv20mt->level[level].image_offset[0]; + } + + return ps; +} + +static void +nv20_miptree_surface_release(struct pipe_screen *pscreen, + struct pipe_surface **psurface) +{ + struct pipe_surface *ps = *psurface; + + *psurface = NULL; + if (--ps->refcount > 0) + return; + + pipe_texture_reference(&ps->texture, NULL); + FREE(ps); +} + +void nv20_screen_init_miptree_functions(struct pipe_screen *pscreen) +{ + pscreen->texture_create = nv20_miptree_create; + pscreen->texture_blanket = nv20_miptree_blanket; + pscreen->texture_release = nv20_miptree_release; + pscreen->get_tex_surface = nv20_miptree_surface_get; + pscreen->tex_surface_release = nv20_miptree_surface_release; +} + diff --git a/src/gallium/drivers/nv20/nv20_prim_vbuf.c b/src/gallium/drivers/nv20/nv20_prim_vbuf.c new file mode 100644 index 0000000000..319e1f6557 --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_prim_vbuf.c @@ -0,0 +1,430 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Build post-transformation, post-clipping vertex buffers and element + * lists by hooking into the end of the primitive pipeline and + * manipulating the vertex_id field in the vertex headers. + * + * XXX: work in progress + * + * \author José Fonseca <jrfonseca@tungstengraphics.com> + * \author Keith Whitwell <keith@tungstengraphics.com> + */ + + +#include "util/u_debug.h" +#include "pipe/p_inlines.h" +#include "pipe/internal/p_winsys_screen.h" + +#include "nv20_context.h" +#include "nv20_state.h" + +#include "draw/draw_vbuf.h" + +/** + * Primitive renderer for nv20. + */ +struct nv20_vbuf_render { + struct vbuf_render base; + + struct nv20_context *nv20; + + /** Vertex buffer in VRAM */ + struct pipe_buffer *pbuffer; + + /** Vertex buffer in normal memory */ + void *mbuffer; + + /** Vertex size in bytes */ + /*unsigned vertex_size;*/ + + /** Hardware primitive */ + unsigned hwprim; +}; + +/** + * Basically a cast wrapper. + */ +static INLINE struct nv20_vbuf_render * +nv20_vbuf_render(struct vbuf_render *render) +{ + assert(render); + return (struct nv20_vbuf_render *)render; +} + +void nv20_vtxbuf_bind( struct nv20_context* nv20 ) +{ +#if 0 + int i; + for(i = 0; i < NV20TCL_VTXBUF_ADDRESS__SIZE; i++) { + BEGIN_RING(kelvin, NV20TCL_VTXBUF_ADDRESS(i), 1); + OUT_RING(0/*nv20->vtxbuf*/); + BEGIN_RING(kelvin, NV20TCL_VTXFMT(i) ,1); + OUT_RING(0/*XXX*/); + } +#endif +} + +static const struct vertex_info * +nv20_vbuf_render_get_vertex_info( struct vbuf_render *render ) +{ + struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render); + struct nv20_context *nv20 = nv20_render->nv20; + + nv20_emit_hw_state(nv20); + + return &nv20->vertex_info; +} + +static void * +nv20__allocate_mbuffer(struct nv20_vbuf_render *nv20_render, size_t size) +{ + nv20_render->mbuffer = MALLOC(size); + return nv20_render->mbuffer; +} + +static void +nv20__allocate_pbuffer(struct nv20_vbuf_render *nv20_render, size_t size) +{ + struct pipe_winsys *winsys = nv20_render->nv20->pipe.winsys; + nv20_render->pbuffer = winsys->buffer_create(winsys, 64, + PIPE_BUFFER_USAGE_VERTEX, size); +} + +static boolean +nv20_vbuf_render_allocate_vertices( struct vbuf_render *render, + ushort vertex_size, + ushort nr_vertices ) +{ + struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render); + size_t size = (size_t)vertex_size * (size_t)nr_vertices; + void *buf; + + assert(!nv20_render->pbuffer); + assert(!nv20_render->mbuffer); + + /* + * For small amount of vertices, don't bother with pipe vertex + * buffer, the data will be passed directly via the fifo. + */ + /* XXX: Pipe vertex buffers don't work. */ + if (0 && size > 16 * 1024) { + nv20__allocate_pbuffer(nv20_render, size); + /* umm yeah so this is ugly */ + buf = nv20_render->pbuffer; + } else { + buf = nv20__allocate_mbuffer(nv20_render, size); + } + + if (buf) + nv20_render->nv20->dirty |= NV20_NEW_VTXARRAYS; + + return buf ? TRUE : FALSE; +} + +static void * +nv20_vbuf_render_map_vertices( struct vbuf_render *render ) +{ + struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render); + struct pipe_winsys *winsys = nv20_render->nv20->pipe.winsys; + + if (nv20_render->pbuffer) { + return winsys->buffer_map(winsys, + nv20_render->pbuffer, + PIPE_BUFFER_USAGE_CPU_WRITE); + } else if (nv20_render->mbuffer) { + return nv20_render->mbuffer; + } else + assert(0); + + /* warnings be gone */ + return NULL; +} + +static void +nv20_vbuf_render_unmap_vertices( struct vbuf_render *render, + ushort min_index, + ushort max_index ) +{ + struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render); + struct pipe_winsys *winsys = nv20_render->nv20->pipe.winsys; + + if (nv20_render->pbuffer) + winsys->buffer_unmap(winsys, nv20_render->pbuffer); +} + +static boolean +nv20_vbuf_render_set_primitive( struct vbuf_render *render, + unsigned prim ) +{ + struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render); + unsigned hwp = nvgl_primitive(prim); + if (hwp == 0) + return FALSE; + + nv20_render->hwprim = hwp; + return TRUE; +} + +static uint32_t +nv20__vtxhwformat(unsigned stride, unsigned fields, unsigned type) +{ + return (stride << NV20TCL_VTXFMT_STRIDE_SHIFT) | + (fields << NV20TCL_VTXFMT_SIZE_SHIFT) | + (type << NV20TCL_VTXFMT_TYPE_SHIFT); +} + +static unsigned +nv20__emit_format(struct nv20_context *nv20, enum attrib_emit type, int hwattr) +{ + uint32_t hwfmt = 0; + unsigned fields; + + switch (type) { + case EMIT_OMIT: + hwfmt = nv20__vtxhwformat(0, 0, 2); + fields = 0; + break; + case EMIT_1F: + hwfmt = nv20__vtxhwformat(4, 1, 2); + fields = 1; + break; + case EMIT_2F: + hwfmt = nv20__vtxhwformat(8, 2, 2); + fields = 2; + break; + case EMIT_3F: + hwfmt = nv20__vtxhwformat(12, 3, 2); + fields = 3; + break; + case EMIT_4F: + hwfmt = nv20__vtxhwformat(16, 4, 2); + fields = 4; + break; + default: + NOUVEAU_ERR("unhandled attrib_emit %d\n", type); + return 0; + } + + BEGIN_RING(kelvin, NV20TCL_VTXFMT(hwattr), 1); + OUT_RING(hwfmt); + return fields; +} + +static unsigned +nv20__emit_vertex_array_format(struct nv20_context *nv20) +{ + struct vertex_info *vinfo = &nv20->vertex_info; + int hwattr = NV20TCL_VTXFMT__SIZE; + int attr = 0; + unsigned nr_fields = 0; + + while (hwattr-- > 0) { + if (vinfo->hwfmt[0] & (1 << hwattr)) { + nr_fields += nv20__emit_format(nv20, + vinfo->attrib[attr].emit, hwattr); + attr++; + } else + nv20__emit_format(nv20, EMIT_OMIT, hwattr); + } + + return nr_fields; +} + +static void +nv20__draw_mbuffer(struct nv20_vbuf_render *nv20_render, + const ushort *indices, + uint nr_indices) +{ + struct nv20_context *nv20 = nv20_render->nv20; + struct vertex_info *vinfo = &nv20->vertex_info; + unsigned nr_fields; + int max_push; + ubyte *data = nv20_render->mbuffer; + int vsz = 4 * vinfo->size; + + nr_fields = nv20__emit_vertex_array_format(nv20); + + BEGIN_RING(kelvin, NV20TCL_VERTEX_BEGIN_END, 1); + OUT_RING(nv20_render->hwprim); + + max_push = 1200 / nr_fields; + while (nr_indices) { + int i; + int push = MIN2(nr_indices, max_push); + + BEGIN_RING_NI(kelvin, NV20TCL_VERTEX_DATA, push * nr_fields); + for (i = 0; i < push; i++) { + /* XXX: fixme to handle other than floats? */ + int f = nr_fields; + float *attrv = (float*)&data[indices[i] * vsz]; + while (f-- > 0) + OUT_RINGf(*attrv++); + } + + nr_indices -= push; + indices += push; + } + + BEGIN_RING(kelvin, NV20TCL_VERTEX_BEGIN_END, 1); + OUT_RING(NV20TCL_VERTEX_BEGIN_END_STOP); +} + +static void +nv20__draw_pbuffer(struct nv20_vbuf_render *nv20_render, + const ushort *indices, + uint nr_indices) +{ + struct nv20_context *nv20 = nv20_render->nv20; + int push, i; + + NOUVEAU_ERR("nv20__draw_pbuffer: this path is broken.\n"); + + BEGIN_RING(kelvin, NV10TCL_VERTEX_ARRAY_OFFSET_POS, 1); + OUT_RELOCl(nv20_render->pbuffer, 0, + NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD); + + BEGIN_RING(kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); + OUT_RING(nv20_render->hwprim); + + if (nr_indices & 1) { + BEGIN_RING(kelvin, NV10TCL_VB_ELEMENT_U32, 1); + OUT_RING (indices[0]); + indices++; nr_indices--; + } + + while (nr_indices) { + // XXX too big/small ? check the size + push = MIN2(nr_indices, 1200 * 2); + + BEGIN_RING_NI(kelvin, NV10TCL_VB_ELEMENT_U16, push >> 1); + for (i = 0; i < push; i+=2) + OUT_RING((indices[i+1] << 16) | indices[i]); + + nr_indices -= push; + indices += push; + } + + BEGIN_RING(kelvin, NV10TCL_VERTEX_BUFFER_BEGIN_END, 1); + OUT_RING (0); +} + +static void +nv20_vbuf_render_draw( struct vbuf_render *render, + const ushort *indices, + uint nr_indices) +{ + struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render); + + nv20_emit_hw_state(nv20_render->nv20); + + if (nv20_render->pbuffer) + nv20__draw_pbuffer(nv20_render, indices, nr_indices); + else if (nv20_render->mbuffer) + nv20__draw_mbuffer(nv20_render, indices, nr_indices); + else + assert(0); +} + + +static void +nv20_vbuf_render_release_vertices( struct vbuf_render *render ) +{ + struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render); + struct nv20_context *nv20 = nv20_render->nv20; + struct pipe_screen *pscreen = &nv20->screen->pipe; + + if (nv20_render->pbuffer) { + pipe_buffer_reference(pscreen, &nv20_render->pbuffer, NULL); + } else if (nv20_render->mbuffer) { + FREE(nv20_render->mbuffer); + nv20_render->mbuffer = NULL; + } else + assert(0); +} + + +static void +nv20_vbuf_render_destroy( struct vbuf_render *render ) +{ + struct nv20_vbuf_render *nv20_render = nv20_vbuf_render(render); + + assert(!nv20_render->pbuffer); + assert(!nv20_render->mbuffer); + + FREE(nv20_render); +} + + +/** + * Create a new primitive render. + */ +static struct vbuf_render * +nv20_vbuf_render_create( struct nv20_context *nv20 ) +{ + struct nv20_vbuf_render *nv20_render = CALLOC_STRUCT(nv20_vbuf_render); + + nv20_render->nv20 = nv20; + + nv20_render->base.max_vertex_buffer_bytes = 16*1024; + nv20_render->base.max_indices = 1024; + nv20_render->base.get_vertex_info = nv20_vbuf_render_get_vertex_info; + nv20_render->base.allocate_vertices = + nv20_vbuf_render_allocate_vertices; + nv20_render->base.map_vertices = nv20_vbuf_render_map_vertices; + nv20_render->base.unmap_vertices = nv20_vbuf_render_unmap_vertices; + nv20_render->base.set_primitive = nv20_vbuf_render_set_primitive; + nv20_render->base.draw = nv20_vbuf_render_draw; + nv20_render->base.release_vertices = nv20_vbuf_render_release_vertices; + nv20_render->base.destroy = nv20_vbuf_render_destroy; + + return &nv20_render->base; +} + + +/** + * Create a new primitive vbuf/render stage. + */ +struct draw_stage *nv20_draw_vbuf_stage( struct nv20_context *nv20 ) +{ + struct vbuf_render *render; + struct draw_stage *stage; + + render = nv20_vbuf_render_create(nv20); + if(!render) + return NULL; + + stage = draw_vbuf_stage( nv20->draw, render ); + if(!stage) { + render->destroy(render); + return NULL; + } + + return stage; +} diff --git a/src/gallium/drivers/nv20/nv20_screen.c b/src/gallium/drivers/nv20/nv20_screen.c new file mode 100644 index 0000000000..5f2b7b4f71 --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_screen.c @@ -0,0 +1,222 @@ +#include "pipe/p_screen.h" +#include "util/u_simple_screen.h" + +#include "nv20_context.h" +#include "nv20_screen.h" + +static const char * +nv20_screen_get_name(struct pipe_screen *screen) +{ + struct nv20_screen *nv20screen = nv20_screen(screen); + struct nouveau_device *dev = nv20screen->nvws->channel->device; + static char buffer[128]; + + snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset); + return buffer; +} + +static const char * +nv20_screen_get_vendor(struct pipe_screen *screen) +{ + return "nouveau"; +} + +static int +nv20_screen_get_param(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 2; + case PIPE_CAP_NPOT_TEXTURES: + return 0; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 0; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 1; + case PIPE_CAP_POINT_SPRITE: + return 0; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 1; + case PIPE_CAP_OCCLUSION_QUERY: + return 0; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 0; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 12; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 0; + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 12; + case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: + return 0; + case NOUVEAU_CAP_HW_VTXBUF: + case NOUVEAU_CAP_HW_IDXBUF: + return 0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0; + } +} + +static float +nv20_screen_get_paramf(struct pipe_screen *screen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 10.0; + case PIPE_CAP_MAX_POINT_WIDTH: + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 64.0; + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 2.0; + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 4.0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0.0; + } +} + +static boolean +nv20_screen_is_format_supported(struct pipe_screen *screen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, unsigned geom_flags) +{ + if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z16_UNORM: + return TRUE; + default: + break; + } + } else { + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_A1R5G5B5_UNORM: + case PIPE_FORMAT_A4R4G4B4_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_I8_UNORM: + return TRUE; + default: + break; + } + } + + return FALSE; +} + +static void * +nv20_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, + unsigned flags ) +{ + struct pipe_winsys *ws = screen->winsys; + void *map; + struct nv20_miptree *nv20mt = (struct nv20_miptree *)surface->texture; + + map = ws->buffer_map(ws, nv20mt->buffer, flags); + if (!map) + return NULL; + + return map + surface->offset; +} + +static void +nv20_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) +{ + struct pipe_winsys *ws = screen->winsys; + struct nv20_miptree *nv20mt = (struct nv20_miptree *)surface->texture; + + ws->buffer_unmap(ws, nv20mt->buffer); +} + +static void +nv20_screen_destroy(struct pipe_screen *pscreen) +{ + struct nv20_screen *screen = nv20_screen(pscreen); + struct nouveau_winsys *nvws = screen->nvws; + + nvws->notifier_free(&screen->sync); + nvws->grobj_free(&screen->kelvin); + + FREE(pscreen); +} + +static struct pipe_buffer * +nv20_surface_buffer(struct pipe_surface *surf) +{ + struct nv20_miptree *mt = (struct nv20_miptree *)surf->texture; + + return mt->buffer; +} + +struct pipe_screen * +nv20_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) +{ + struct nv20_screen *screen = CALLOC_STRUCT(nv20_screen); + unsigned kelvin_class = 0; + unsigned chipset = nvws->channel->device->chipset; + int ret; + + if (!screen) + return NULL; + screen->nvws = nvws; + + /* 2D engine setup */ + screen->eng2d = nv04_surface_2d_init(nvws); + screen->eng2d->buf = nv20_surface_buffer; + + /* 3D object */ + if (chipset >= 0x25) + kelvin_class = NV25TCL; + else if (chipset >= 0x20) + kelvin_class = NV20TCL; + + if (!kelvin_class || chipset >= 0x30) { + NOUVEAU_ERR("Unknown nv2x chipset: nv%02x\n", chipset); + return NULL; + } + + ret = nvws->grobj_alloc(nvws, kelvin_class, &screen->kelvin); + if (ret) { + NOUVEAU_ERR("Error creating 3D object: %d\n", ret); + return FALSE; + } + + /* Notifier for sync purposes */ + ret = nvws->notifier_alloc(nvws, 1, &screen->sync); + if (ret) { + NOUVEAU_ERR("Error creating notifier object: %d\n", ret); + nv20_screen_destroy(&screen->pipe); + return NULL; + } + + screen->pipe.winsys = ws; + screen->pipe.destroy = nv20_screen_destroy; + + screen->pipe.get_name = nv20_screen_get_name; + screen->pipe.get_vendor = nv20_screen_get_vendor; + screen->pipe.get_param = nv20_screen_get_param; + screen->pipe.get_paramf = nv20_screen_get_paramf; + + screen->pipe.is_format_supported = nv20_screen_is_format_supported; + + screen->pipe.surface_map = nv20_surface_map; + screen->pipe.surface_unmap = nv20_surface_unmap; + + nv20_screen_init_miptree_functions(&screen->pipe); + u_simple_screen_init(&screen->pipe); + + return &screen->pipe; +} + diff --git a/src/gallium/drivers/nv20/nv20_screen.h b/src/gallium/drivers/nv20/nv20_screen.h new file mode 100644 index 0000000000..bf2f2c0d9f --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_screen.h @@ -0,0 +1,24 @@ +#ifndef __NV20_SCREEN_H__ +#define __NV20_SCREEN_H__ + +#include "pipe/p_screen.h" +#include "nv04/nv04_surface_2d.h" + +struct nv20_screen { + struct pipe_screen pipe; + + struct nouveau_winsys *nvws; + + /* HW graphics objects */ + struct nv04_surface_2d *eng2d; + struct nouveau_grobj *kelvin; + struct nouveau_notifier *sync; +}; + +static INLINE struct nv20_screen * +nv20_screen(struct pipe_screen *screen) +{ + return (struct nv20_screen *)screen; +} + +#endif diff --git a/src/gallium/drivers/nv20/nv20_state.c b/src/gallium/drivers/nv20/nv20_state.c new file mode 100644 index 0000000000..ecec4f49a0 --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_state.c @@ -0,0 +1,582 @@ +#include "draw/draw_context.h" +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" + +#include "tgsi/tgsi_parse.h" + +#include "nv20_context.h" +#include "nv20_state.h" + +static void * +nv20_blend_state_create(struct pipe_context *pipe, + const struct pipe_blend_state *cso) +{ + struct nv20_blend_state *cb; + + cb = MALLOC(sizeof(struct nv20_blend_state)); + + cb->b_enable = cso->blend_enable ? 1 : 0; + cb->b_srcfunc = ((nvgl_blend_func(cso->alpha_src_factor)<<16) | + (nvgl_blend_func(cso->rgb_src_factor))); + cb->b_dstfunc = ((nvgl_blend_func(cso->alpha_dst_factor)<<16) | + (nvgl_blend_func(cso->rgb_dst_factor))); + + cb->c_mask = (((cso->colormask & PIPE_MASK_A) ? (0x01<<24) : 0) | + ((cso->colormask & PIPE_MASK_R) ? (0x01<<16) : 0) | + ((cso->colormask & PIPE_MASK_G) ? (0x01<< 8) : 0) | + ((cso->colormask & PIPE_MASK_B) ? (0x01<< 0) : 0)); + + cb->d_enable = cso->dither ? 1 : 0; + + return (void *)cb; +} + +static void +nv20_blend_state_bind(struct pipe_context *pipe, void *blend) +{ + struct nv20_context *nv20 = nv20_context(pipe); + + nv20->blend = (struct nv20_blend_state*)blend; + + nv20->dirty |= NV20_NEW_BLEND; +} + +static void +nv20_blend_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + + +static INLINE unsigned +wrap_mode(unsigned wrap) { + unsigned ret; + + switch (wrap) { + case PIPE_TEX_WRAP_REPEAT: + ret = NV20TCL_TX_WRAP_S_REPEAT; + break; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + ret = NV20TCL_TX_WRAP_S_MIRRORED_REPEAT; + break; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + ret = NV20TCL_TX_WRAP_S_CLAMP_TO_EDGE; + break; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + ret = NV20TCL_TX_WRAP_S_CLAMP_TO_BORDER; + break; + case PIPE_TEX_WRAP_CLAMP: + ret = NV20TCL_TX_WRAP_S_CLAMP; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + case PIPE_TEX_WRAP_MIRROR_CLAMP: + default: + NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); + ret = NV20TCL_TX_WRAP_S_REPEAT; + break; + } + + return (ret >> NV20TCL_TX_WRAP_S_SHIFT); +} + +static void * +nv20_sampler_state_create(struct pipe_context *pipe, + const struct pipe_sampler_state *cso) +{ + struct nv20_sampler_state *ps; + uint32_t filter = 0; + + ps = MALLOC(sizeof(struct nv20_sampler_state)); + + ps->wrap = ((wrap_mode(cso->wrap_s) << NV20TCL_TX_WRAP_S_SHIFT) | + (wrap_mode(cso->wrap_t) << NV20TCL_TX_WRAP_T_SHIFT)); + + ps->en = 0; + if (cso->max_anisotropy > 1.0) { + /* no idea, binary driver sets it, works without it.. meh.. */ + ps->wrap |= (1 << 5); + +/* if (cso->max_anisotropy >= 8.0) { + ps->en |= NV20TCL_TX_ENABLE_ANISO_8X; + } else + if (cso->max_anisotropy >= 4.0) { + ps->en |= NV20TCL_TX_ENABLE_ANISO_4X; + } else { + ps->en |= NV20TCL_TX_ENABLE_ANISO_2X; + }*/ + } + + switch (cso->mag_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + filter |= NV20TCL_TX_FILTER_MAGNIFY_LINEAR; + break; + case PIPE_TEX_FILTER_NEAREST: + default: + filter |= NV20TCL_TX_FILTER_MAGNIFY_NEAREST; + break; + } + + switch (cso->min_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + filter |= + NV20TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + filter |= NV20TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + filter |= NV20TCL_TX_FILTER_MINIFY_LINEAR; + break; + } + break; + case PIPE_TEX_FILTER_NEAREST: + default: + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + filter |= + NV20TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + filter |= + NV20TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + filter |= NV20TCL_TX_FILTER_MINIFY_NEAREST; + break; + } + break; + } + + ps->filt = filter; + +/* if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + switch (cso->compare_func) { + case PIPE_FUNC_NEVER: + ps->wrap |= NV10TCL_TX_WRAP_RCOMP_NEVER; + break; + case PIPE_FUNC_GREATER: + ps->wrap |= NV10TCL_TX_WRAP_RCOMP_GREATER; + break; + case PIPE_FUNC_EQUAL: + ps->wrap |= NV10TCL_TX_WRAP_RCOMP_EQUAL; + break; + case PIPE_FUNC_GEQUAL: + ps->wrap |= NV10TCL_TX_WRAP_RCOMP_GEQUAL; + break; + case PIPE_FUNC_LESS: + ps->wrap |= NV10TCL_TX_WRAP_RCOMP_LESS; + break; + case PIPE_FUNC_NOTEQUAL: + ps->wrap |= NV10TCL_TX_WRAP_RCOMP_NOTEQUAL; + break; + case PIPE_FUNC_LEQUAL: + ps->wrap |= NV10TCL_TX_WRAP_RCOMP_LEQUAL; + break; + case PIPE_FUNC_ALWAYS: + ps->wrap |= NV10TCL_TX_WRAP_RCOMP_ALWAYS; + break; + default: + break; + } + }*/ + + ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) | + (float_to_ubyte(cso->border_color[0]) << 16) | + (float_to_ubyte(cso->border_color[1]) << 8) | + (float_to_ubyte(cso->border_color[2]) << 0)); + + return (void *)ps; +} + +static void +nv20_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) +{ + struct nv20_context *nv20 = nv20_context(pipe); + unsigned unit; + + for (unit = 0; unit < nr; unit++) { + nv20->tex_sampler[unit] = sampler[unit]; + nv20->dirty_samplers |= (1 << unit); + } +} + +static void +nv20_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void +nv20_set_sampler_texture(struct pipe_context *pipe, unsigned nr, + struct pipe_texture **miptree) +{ + struct nv20_context *nv20 = nv20_context(pipe); + unsigned unit; + + for (unit = 0; unit < nr; unit++) { + nv20->tex_miptree[unit] = (struct nv20_miptree *)miptree[unit]; + nv20->dirty_samplers |= (1 << unit); + } +} + +static void * +nv20_rasterizer_state_create(struct pipe_context *pipe, + const struct pipe_rasterizer_state *cso) +{ + struct nv20_rasterizer_state *rs; + int i; + + /*XXX: ignored: + * light_twoside + * offset_cw/ccw -nohw + * scissor + * point_smooth -nohw + * multisample + * offset_units / offset_scale + */ + rs = MALLOC(sizeof(struct nv20_rasterizer_state)); + + rs->templ = cso; + + rs->shade_model = cso->flatshade ? NV20TCL_SHADE_MODEL_FLAT : + NV20TCL_SHADE_MODEL_SMOOTH; + + rs->line_width = (unsigned char)(cso->line_width * 8.0) & 0xff; + rs->line_smooth_en = cso->line_smooth ? 1 : 0; + + /* XXX: nv20 and nv25 different! */ + rs->point_size = *(uint32_t*)&cso->point_size; + + rs->poly_smooth_en = cso->poly_smooth ? 1 : 0; + + if (cso->front_winding == PIPE_WINDING_CCW) { + rs->front_face = NV20TCL_FRONT_FACE_CCW; + rs->poly_mode_front = nvgl_polygon_mode(cso->fill_ccw); + rs->poly_mode_back = nvgl_polygon_mode(cso->fill_cw); + } else { + rs->front_face = NV20TCL_FRONT_FACE_CW; + rs->poly_mode_front = nvgl_polygon_mode(cso->fill_cw); + rs->poly_mode_back = nvgl_polygon_mode(cso->fill_ccw); + } + + switch (cso->cull_mode) { + case PIPE_WINDING_CCW: + rs->cull_face_en = 1; + if (cso->front_winding == PIPE_WINDING_CCW) + rs->cull_face = NV20TCL_CULL_FACE_FRONT; + else + rs->cull_face = NV20TCL_CULL_FACE_BACK; + break; + case PIPE_WINDING_CW: + rs->cull_face_en = 1; + if (cso->front_winding == PIPE_WINDING_CW) + rs->cull_face = NV20TCL_CULL_FACE_FRONT; + else + rs->cull_face = NV20TCL_CULL_FACE_BACK; + break; + case PIPE_WINDING_BOTH: + rs->cull_face_en = 1; + rs->cull_face = NV20TCL_CULL_FACE_FRONT_AND_BACK; + break; + case PIPE_WINDING_NONE: + default: + rs->cull_face_en = 0; + rs->cull_face = 0; + break; + } + + if (cso->point_sprite) { + rs->point_sprite = (1 << 0); + for (i = 0; i < 8; i++) { + if (cso->sprite_coord_mode[i] != PIPE_SPRITE_COORD_NONE) + rs->point_sprite |= (1 << (8 + i)); + } + } else { + rs->point_sprite = 0; + } + + return (void *)rs; +} + +static void +nv20_rasterizer_state_bind(struct pipe_context *pipe, void *rast) +{ + struct nv20_context *nv20 = nv20_context(pipe); + + nv20->rast = (struct nv20_rasterizer_state*)rast; + + draw_set_rasterizer_state(nv20->draw, (nv20->rast ? nv20->rast->templ : NULL)); + + nv20->dirty |= NV20_NEW_RAST; +} + +static void +nv20_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void * +nv20_depth_stencil_alpha_state_create(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *cso) +{ + struct nv20_depth_stencil_alpha_state *hw; + + hw = MALLOC(sizeof(struct nv20_depth_stencil_alpha_state)); + + hw->depth.func = nvgl_comparison_op(cso->depth.func); + hw->depth.write_enable = cso->depth.writemask ? 1 : 0; + hw->depth.test_enable = cso->depth.enabled ? 1 : 0; + + hw->stencil.enable = cso->stencil[0].enabled ? 1 : 0; + hw->stencil.wmask = cso->stencil[0].writemask; + hw->stencil.func = nvgl_comparison_op(cso->stencil[0].func); + hw->stencil.ref = cso->stencil[0].ref_value; + hw->stencil.vmask = cso->stencil[0].valuemask; + hw->stencil.fail = nvgl_stencil_op(cso->stencil[0].fail_op); + hw->stencil.zfail = nvgl_stencil_op(cso->stencil[0].zfail_op); + hw->stencil.zpass = nvgl_stencil_op(cso->stencil[0].zpass_op); + + hw->alpha.enabled = cso->alpha.enabled ? 1 : 0; + hw->alpha.func = nvgl_comparison_op(cso->alpha.func); + hw->alpha.ref = float_to_ubyte(cso->alpha.ref_value); + + return (void *)hw; +} + +static void +nv20_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *dsa) +{ + struct nv20_context *nv20 = nv20_context(pipe); + + nv20->dsa = (struct nv20_depth_stencil_alpha_state*)dsa; + + nv20->dirty |= NV20_NEW_DSA; +} + +static void +nv20_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void * +nv20_vp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *templ) +{ + struct nv20_context *nv20 = nv20_context(pipe); + + return draw_create_vertex_shader(nv20->draw, templ); +} + +static void +nv20_vp_state_bind(struct pipe_context *pipe, void *shader) +{ + struct nv20_context *nv20 = nv20_context(pipe); + + draw_bind_vertex_shader(nv20->draw, (struct draw_vertex_shader *) shader); + + nv20->dirty |= NV20_NEW_VERTPROG; +} + +static void +nv20_vp_state_delete(struct pipe_context *pipe, void *shader) +{ + struct nv20_context *nv20 = nv20_context(pipe); + + draw_delete_vertex_shader(nv20->draw, (struct draw_vertex_shader *) shader); +} + +static void * +nv20_fp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nv20_fragment_program *fp; + + fp = CALLOC(1, sizeof(struct nv20_fragment_program)); + fp->pipe.tokens = tgsi_dup_tokens(cso->tokens); + + tgsi_scan_shader(cso->tokens, &fp->info); + + return (void *)fp; +} + +static void +nv20_fp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv20_context *nv20 = nv20_context(pipe); + struct nv20_fragment_program *fp = hwcso; + + nv20->fragprog.current = fp; + nv20->dirty |= NV20_NEW_FRAGPROG; +} + +static void +nv20_fp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv20_context *nv20 = nv20_context(pipe); + struct nv20_fragment_program *fp = hwcso; + + nv20_fragprog_destroy(nv20, fp); + FREE((void*)fp->pipe.tokens); + FREE(fp); +} + +static void +nv20_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *bcol) +{ + struct nv20_context *nv20 = nv20_context(pipe); + + nv20->blend_color = (struct pipe_blend_color*)bcol; + + nv20->dirty |= NV20_NEW_BLENDCOL; +} + +static void +nv20_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) +{ + struct nv20_context *nv20 = nv20_context(pipe); + + draw_set_clip_state(nv20->draw, clip); +} + +static void +nv20_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, + const struct pipe_constant_buffer *buf ) +{ + struct nv20_context *nv20 = nv20_context(pipe); + struct pipe_winsys *ws = pipe->winsys; + + assert(shader < PIPE_SHADER_TYPES); + assert(index == 0); + + if (buf) { + void *mapped; + if (buf->buffer && buf->buffer->size && + (mapped = ws->buffer_map(ws, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ))) + { + memcpy(nv20->constbuf[shader], mapped, buf->buffer->size); + nv20->constbuf_nr[shader] = + buf->buffer->size / (4 * sizeof(float)); + ws->buffer_unmap(ws, buf->buffer); + } + } +} + +static void +nv20_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct nv20_context *nv20 = nv20_context(pipe); + + nv20->framebuffer = (struct pipe_framebuffer_state*)fb; + + nv20->dirty |= NV20_NEW_FRAMEBUFFER; +} + +static void +nv20_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ + NOUVEAU_ERR("line stipple hahaha\n"); +} + +static void +nv20_set_scissor_state(struct pipe_context *pipe, + const struct pipe_scissor_state *s) +{ + struct nv20_context *nv20 = nv20_context(pipe); + + nv20->scissor = (struct pipe_scissor_state*)s; + + nv20->dirty |= NV20_NEW_SCISSOR; +} + +static void +nv20_set_viewport_state(struct pipe_context *pipe, + const struct pipe_viewport_state *vpt) +{ + struct nv20_context *nv20 = nv20_context(pipe); + + nv20->viewport = (struct pipe_viewport_state*)vpt; + + draw_set_viewport_state(nv20->draw, nv20->viewport); + + nv20->dirty |= NV20_NEW_VIEWPORT; +} + +static void +nv20_set_vertex_buffers(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_buffer *vb) +{ + struct nv20_context *nv20 = nv20_context(pipe); + + memcpy(nv20->vtxbuf, vb, sizeof(*vb) * count); + nv20->dirty |= NV20_NEW_VTXARRAYS; + + draw_set_vertex_buffers(nv20->draw, count, vb); +} + +static void +nv20_set_vertex_elements(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_element *ve) +{ + struct nv20_context *nv20 = nv20_context(pipe); + + memcpy(nv20->vtxelt, ve, sizeof(*ve) * count); + nv20->dirty |= NV20_NEW_VTXARRAYS; + + draw_set_vertex_elements(nv20->draw, count, ve); +} + +void +nv20_init_state_functions(struct nv20_context *nv20) +{ + nv20->pipe.create_blend_state = nv20_blend_state_create; + nv20->pipe.bind_blend_state = nv20_blend_state_bind; + nv20->pipe.delete_blend_state = nv20_blend_state_delete; + + nv20->pipe.create_sampler_state = nv20_sampler_state_create; + nv20->pipe.bind_sampler_states = nv20_sampler_state_bind; + nv20->pipe.delete_sampler_state = nv20_sampler_state_delete; + nv20->pipe.set_sampler_textures = nv20_set_sampler_texture; + + nv20->pipe.create_rasterizer_state = nv20_rasterizer_state_create; + nv20->pipe.bind_rasterizer_state = nv20_rasterizer_state_bind; + nv20->pipe.delete_rasterizer_state = nv20_rasterizer_state_delete; + + nv20->pipe.create_depth_stencil_alpha_state = + nv20_depth_stencil_alpha_state_create; + nv20->pipe.bind_depth_stencil_alpha_state = + nv20_depth_stencil_alpha_state_bind; + nv20->pipe.delete_depth_stencil_alpha_state = + nv20_depth_stencil_alpha_state_delete; + + nv20->pipe.create_vs_state = nv20_vp_state_create; + nv20->pipe.bind_vs_state = nv20_vp_state_bind; + nv20->pipe.delete_vs_state = nv20_vp_state_delete; + + nv20->pipe.create_fs_state = nv20_fp_state_create; + nv20->pipe.bind_fs_state = nv20_fp_state_bind; + nv20->pipe.delete_fs_state = nv20_fp_state_delete; + + nv20->pipe.set_blend_color = nv20_set_blend_color; + nv20->pipe.set_clip_state = nv20_set_clip_state; + nv20->pipe.set_constant_buffer = nv20_set_constant_buffer; + nv20->pipe.set_framebuffer_state = nv20_set_framebuffer_state; + nv20->pipe.set_polygon_stipple = nv20_set_polygon_stipple; + nv20->pipe.set_scissor_state = nv20_set_scissor_state; + nv20->pipe.set_viewport_state = nv20_set_viewport_state; + + nv20->pipe.set_vertex_buffers = nv20_set_vertex_buffers; + nv20->pipe.set_vertex_elements = nv20_set_vertex_elements; +} + diff --git a/src/gallium/drivers/nv20/nv20_state.h b/src/gallium/drivers/nv20/nv20_state.h new file mode 100644 index 0000000000..34f402fdcb --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_state.h @@ -0,0 +1,139 @@ +#ifndef __NV20_STATE_H__ +#define __NV20_STATE_H__ + +#include "pipe/p_state.h" +#include "tgsi/tgsi_scan.h" + +struct nv20_blend_state { + uint32_t b_enable; + uint32_t b_srcfunc; + uint32_t b_dstfunc; + + uint32_t c_mask; + + uint32_t d_enable; +}; + +struct nv20_sampler_state { + uint32_t wrap; + uint32_t en; + uint32_t filt; + uint32_t bcol; +}; + +struct nv20_rasterizer_state { + uint32_t shade_model; + + uint32_t line_width; + uint32_t line_smooth_en; + + uint32_t point_size; + + uint32_t poly_smooth_en; + + uint32_t poly_mode_front; + uint32_t poly_mode_back; + + uint32_t front_face; + uint32_t cull_face; + uint32_t cull_face_en; + + uint32_t point_sprite; + + const struct pipe_rasterizer_state *templ; +}; + +struct nv20_vertex_program_exec { + uint32_t data[4]; + boolean has_branch_offset; + int const_index; +}; + +struct nv20_vertex_program_data { + int index; /* immediates == -1 */ + float value[4]; +}; + +struct nv20_vertex_program { + const struct pipe_shader_state *pipe; + + boolean translated; + struct nv20_vertex_program_exec *insns; + unsigned nr_insns; + struct nv20_vertex_program_data *consts; + unsigned nr_consts; + + struct nouveau_resource *exec; + unsigned exec_start; + struct nouveau_resource *data; + unsigned data_start; + unsigned data_start_min; + + uint32_t ir; + uint32_t or; +}; + +struct nv20_fragment_program_data { + unsigned offset; + unsigned index; +}; + +struct nv20_fragment_program { + struct pipe_shader_state pipe; + struct tgsi_shader_info info; + + boolean translated; + boolean on_hw; + unsigned samplers; + + uint32_t *insn; + int insn_len; + + struct nv20_fragment_program_data *consts; + unsigned nr_consts; + + struct pipe_buffer *buffer; + + uint32_t fp_control; + uint32_t fp_reg_control; +}; + + +struct nv20_depth_stencil_alpha_state { + struct { + uint32_t func; + uint32_t write_enable; + uint32_t test_enable; + } depth; + + struct { + uint32_t enable; + uint32_t wmask; + uint32_t func; + uint32_t ref; + uint32_t vmask; + uint32_t fail; + uint32_t zfail; + uint32_t zpass; + } stencil; + + struct { + uint32_t enabled; + uint32_t func; + uint32_t ref; + } alpha; +}; + +struct nv20_miptree { + struct pipe_texture base; + + struct pipe_buffer *buffer; + uint total_size; + + struct { + uint pitch; + uint *image_offset; + } level[PIPE_MAX_TEXTURE_LEVELS]; +}; + +#endif diff --git a/src/gallium/drivers/nv20/nv20_state_emit.c b/src/gallium/drivers/nv20/nv20_state_emit.c new file mode 100644 index 0000000000..0f4df9ca31 --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_state_emit.c @@ -0,0 +1,396 @@ +#include "nv20_context.h" +#include "nv20_state.h" +#include "draw/draw_context.h" + +static void nv20_state_emit_blend(struct nv20_context* nv20) +{ + struct nv20_blend_state *b = nv20->blend; + + BEGIN_RING(kelvin, NV20TCL_DITHER_ENABLE, 1); + OUT_RING (b->d_enable); + + BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_ENABLE, 1); + OUT_RING (b->b_enable); + + BEGIN_RING(kelvin, NV20TCL_BLEND_FUNC_SRC, 2); + OUT_RING (b->b_srcfunc); + OUT_RING (b->b_dstfunc); + + BEGIN_RING(kelvin, NV20TCL_COLOR_MASK, 1); + OUT_RING (b->c_mask); +} + +static void nv20_state_emit_blend_color(struct nv20_context* nv20) +{ + struct pipe_blend_color *c = nv20->blend_color; + + BEGIN_RING(kelvin, NV20TCL_BLEND_COLOR, 1); + OUT_RING ((float_to_ubyte(c->color[3]) << 24)| + (float_to_ubyte(c->color[0]) << 16)| + (float_to_ubyte(c->color[1]) << 8) | + (float_to_ubyte(c->color[2]) << 0)); +} + +static void nv20_state_emit_rast(struct nv20_context* nv20) +{ + struct nv20_rasterizer_state *r = nv20->rast; + + BEGIN_RING(kelvin, NV20TCL_SHADE_MODEL, 2); + OUT_RING (r->shade_model); + OUT_RING (r->line_width); + + + BEGIN_RING(kelvin, NV20TCL_POINT_SIZE, 1); + OUT_RING (r->point_size); + + BEGIN_RING(kelvin, NV20TCL_POLYGON_MODE_FRONT, 2); + OUT_RING (r->poly_mode_front); + OUT_RING (r->poly_mode_back); + + + BEGIN_RING(kelvin, NV20TCL_CULL_FACE, 2); + OUT_RING (r->cull_face); + OUT_RING (r->front_face); + + BEGIN_RING(kelvin, NV20TCL_LINE_SMOOTH_ENABLE, 2); + OUT_RING (r->line_smooth_en); + OUT_RING (r->poly_smooth_en); + + BEGIN_RING(kelvin, NV20TCL_CULL_FACE_ENABLE, 1); + OUT_RING (r->cull_face_en); +} + +static void nv20_state_emit_dsa(struct nv20_context* nv20) +{ + struct nv20_depth_stencil_alpha_state *d = nv20->dsa; + + BEGIN_RING(kelvin, NV20TCL_DEPTH_FUNC, 1); + OUT_RING (d->depth.func); + + BEGIN_RING(kelvin, NV20TCL_DEPTH_WRITE_ENABLE, 1); + OUT_RING (d->depth.write_enable); + + BEGIN_RING(kelvin, NV20TCL_DEPTH_TEST_ENABLE, 1); + OUT_RING (d->depth.test_enable); + + BEGIN_RING(kelvin, NV20TCL_DEPTH_UNK17D8, 1); + OUT_RING (1); + +#if 0 + BEGIN_RING(kelvin, NV20TCL_STENCIL_ENABLE, 1); + OUT_RING (d->stencil.enable); + BEGIN_RING(kelvin, NV20TCL_STENCIL_MASK, 7); + OUT_RINGp ((uint32_t *)&(d->stencil.wmask), 7); +#endif + + BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_ENABLE, 1); + OUT_RING (d->alpha.enabled); + + BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_FUNC, 1); + OUT_RING (d->alpha.func); + + BEGIN_RING(kelvin, NV20TCL_ALPHA_FUNC_REF, 1); + OUT_RING (d->alpha.ref); +} + +static void nv20_state_emit_viewport(struct nv20_context* nv20) +{ +} + +static void nv20_state_emit_scissor(struct nv20_context* nv20) +{ + /* NV20TCL_SCISSOR_* is probably a software method */ +/* struct pipe_scissor_state *s = nv20->scissor; + BEGIN_RING(kelvin, NV20TCL_SCISSOR_HORIZ, 2); + OUT_RING (((s->maxx - s->minx) << 16) | s->minx); + OUT_RING (((s->maxy - s->miny) << 16) | s->miny);*/ +} + +static void nv20_state_emit_framebuffer(struct nv20_context* nv20) +{ + struct pipe_framebuffer_state* fb = nv20->framebuffer; + struct pipe_surface *rt, *zeta = NULL; + uint32_t rt_format, w, h; + int colour_format = 0, zeta_format = 0; + struct nv20_miptree *nv20mt = 0; + + w = fb->cbufs[0]->width; + h = fb->cbufs[0]->height; + colour_format = fb->cbufs[0]->format; + rt = fb->cbufs[0]; + + if (fb->zsbuf) { + if (colour_format) { + assert(w == fb->zsbuf->width); + assert(h == fb->zsbuf->height); + } else { + w = fb->zsbuf->width; + h = fb->zsbuf->height; + } + + zeta_format = fb->zsbuf->format; + zeta = fb->zsbuf; + } + + rt_format = NV20TCL_RT_FORMAT_TYPE_LINEAR | 0x20; + + switch (colour_format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case 0: + rt_format |= NV20TCL_RT_FORMAT_COLOR_A8R8G8B8; + break; + case PIPE_FORMAT_R5G6B5_UNORM: + rt_format |= NV20TCL_RT_FORMAT_COLOR_R5G6B5; + break; + default: + assert(0); + } + + if (zeta) { + BEGIN_RING(kelvin, NV20TCL_RT_PITCH, 1); + OUT_RING (rt->stride | (zeta->stride << 16)); + } else { + BEGIN_RING(kelvin, NV20TCL_RT_PITCH, 1); + OUT_RING (rt->stride | (rt->stride << 16)); + } + + nv20mt = (struct nv20_miptree *)rt->texture; + nv20->rt[0] = nv20mt->buffer; + + if (zeta_format) + { + nv20mt = (struct nv20_miptree *)zeta->texture; + nv20->zeta = nv20mt->buffer; + } + + BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 3); + OUT_RING ((w << 16) | 0); + OUT_RING ((h << 16) | 0); /*NV20TCL_RT_VERT */ + OUT_RING (rt_format); /* NV20TCL_RT_FORMAT */ + BEGIN_RING(kelvin, NV20TCL_VIEWPORT_CLIP_HORIZ(0), 2); + OUT_RING (((w - 1) << 16) | 0); + OUT_RING (((h - 1) << 16) | 0); +} + +static void nv20_vertex_layout(struct nv20_context *nv20) +{ + struct nv20_fragment_program *fp = nv20->fragprog.current; + struct draw_context *dc = nv20->draw; + int src; + int i; + struct vertex_info *vinfo = &nv20->vertex_info; + const enum interp_mode colorInterp = INTERP_LINEAR; + boolean colors[2] = { FALSE }; + boolean generics[12] = { FALSE }; + boolean fog = FALSE; + + memset(vinfo, 0, sizeof(*vinfo)); + + /* + * Assumed NV20 hardware vertex attribute order: + * 0 position, 1 ?, 2 ?, 3 col0, + * 4 col1?, 5 ?, 6 ?, 7 ?, + * 8 ?, 9 tex0, 10 tex1, 11 tex2, + * 12 tex3, 13 ?, 14 ?, 15 ? + * unaccounted: wgh, nor, fog + * There are total 16 attrs. + * vinfo->hwfmt[0] has a used-bit corresponding to each of these. + * relation to TGSI_SEMANTIC_*: + * - POSITION: position (always used) + * - COLOR: col1, col0 + * - GENERIC: tex3, tex2, tex1, tex0, normal, weight + * - FOG: fog + */ + + for (i = 0; i < fp->info.num_inputs; i++) { + int isn = fp->info.input_semantic_name[i]; + int isi = fp->info.input_semantic_index[i]; + switch (isn) { + case TGSI_SEMANTIC_POSITION: + break; + case TGSI_SEMANTIC_COLOR: + assert(isi < 2); + colors[isi] = TRUE; + break; + case TGSI_SEMANTIC_GENERIC: + assert(isi < 12); + generics[isi] = TRUE; + break; + case TGSI_SEMANTIC_FOG: + fog = TRUE; + break; + default: + assert(0 && "unknown input_semantic_name"); + } + } + + /* always do position */ { + src = draw_find_vs_output(dc, TGSI_SEMANTIC_POSITION, 0); + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, src); + vinfo->hwfmt[0] |= (1 << 0); + } + + /* two unnamed generics */ + for (i = 4; i < 6; i++) { + if (!generics[i]) + continue; + src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i); + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); + vinfo->hwfmt[0] |= (1 << (i - 3)); + } + + if (colors[0]) { + src = draw_find_vs_output(dc, TGSI_SEMANTIC_COLOR, 0); + draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); + vinfo->hwfmt[0] |= (1 << 3); + } + + if (colors[1]) { + src = draw_find_vs_output(dc, TGSI_SEMANTIC_COLOR, 1); + draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); + vinfo->hwfmt[0] |= (1 << 4); + } + + /* four unnamed generics */ + for (i = 6; i < 10; i++) { + if (!generics[i]) + continue; + src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i); + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); + vinfo->hwfmt[0] |= (1 << (i - 1)); + } + + /* tex0, tex1, tex2, tex3 */ + for (i = 0; i < 4; i++) { + if (!generics[i]) + continue; + src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i); + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); + vinfo->hwfmt[0] |= (1 << (i + 9)); + } + + /* two unnamed generics */ + for (i = 10; i < 12; i++) { + if (!generics[i]) + continue; + src = draw_find_vs_output(dc, TGSI_SEMANTIC_GENERIC, i); + draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); + vinfo->hwfmt[0] |= (1 << (i + 3)); + } + + if (fog) { + src = draw_find_vs_output(dc, TGSI_SEMANTIC_FOG, 0); + draw_emit_vertex_attr(vinfo, EMIT_1F, INTERP_PERSPECTIVE, src); + vinfo->hwfmt[0] |= (1 << 15); + } + + draw_compute_vertex_size(vinfo); +} + +void +nv20_emit_hw_state(struct nv20_context *nv20) +{ + int i; + + if (nv20->dirty & NV20_NEW_VERTPROG) { + //nv20_vertprog_bind(nv20, nv20->vertprog.current); + nv20->dirty &= ~NV20_NEW_VERTPROG; + } + + if (nv20->dirty & NV20_NEW_FRAGPROG) { + nv20_fragprog_bind(nv20, nv20->fragprog.current); + /*XXX: clear NV20_NEW_FRAGPROG if no new program uploaded */ + nv20->dirty_samplers |= (1<<10); + nv20->dirty_samplers = 0; + } + + if (nv20->dirty_samplers || (nv20->dirty & NV20_NEW_FRAGPROG)) { + nv20_fragtex_bind(nv20); + nv20->dirty &= ~NV20_NEW_FRAGPROG; + } + + if (nv20->dirty & NV20_NEW_VTXARRAYS) { + nv20->dirty &= ~NV20_NEW_VTXARRAYS; + nv20_vertex_layout(nv20); + nv20_vtxbuf_bind(nv20); + } + + if (nv20->dirty & NV20_NEW_BLEND) { + nv20->dirty &= ~NV20_NEW_BLEND; + nv20_state_emit_blend(nv20); + } + + if (nv20->dirty & NV20_NEW_BLENDCOL) { + nv20->dirty &= ~NV20_NEW_BLENDCOL; + nv20_state_emit_blend_color(nv20); + } + + if (nv20->dirty & NV20_NEW_RAST) { + nv20->dirty &= ~NV20_NEW_RAST; + nv20_state_emit_rast(nv20); + } + + if (nv20->dirty & NV20_NEW_DSA) { + nv20->dirty &= ~NV20_NEW_DSA; + nv20_state_emit_dsa(nv20); + } + + if (nv20->dirty & NV20_NEW_VIEWPORT) { + nv20->dirty &= ~NV20_NEW_VIEWPORT; + nv20_state_emit_viewport(nv20); + } + + if (nv20->dirty & NV20_NEW_SCISSOR) { + nv20->dirty &= ~NV20_NEW_SCISSOR; + nv20_state_emit_scissor(nv20); + } + + if (nv20->dirty & NV20_NEW_FRAMEBUFFER) { + nv20->dirty &= ~NV20_NEW_FRAMEBUFFER; + nv20_state_emit_framebuffer(nv20); + } + + /* Emit relocs for every referenced buffer. + * This is to ensure the bufmgr has an accurate idea of how + * the buffer is used. This isn't very efficient, but we don't + * seem to take a significant performance hit. Will be improved + * at some point. Vertex arrays are emitted by nv20_vbo.c + */ + + /* Render target */ + BEGIN_RING(kelvin, NV20TCL_DMA_COLOR, 1); + OUT_RELOCo(nv20->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(kelvin, NV20TCL_COLOR_OFFSET, 1); + OUT_RELOCl(nv20->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + + if (nv20->zeta) { + BEGIN_RING(kelvin, NV20TCL_DMA_ZETA, 1); + OUT_RELOCo(nv20->zeta, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(kelvin, NV20TCL_ZETA_OFFSET, 1); + OUT_RELOCl(nv20->zeta, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + /* XXX for when we allocate LMA on nv17 */ +/* BEGIN_RING(kelvin, NV10TCL_LMA_DEPTH_BUFFER_OFFSET, 1); + OUT_RELOCl(nv20->zeta + lma_offset);*/ + } + + /* Vertex buffer */ + BEGIN_RING(kelvin, NV20TCL_DMA_VTXBUF0, 1); + OUT_RELOCo(nv20->rt[0], NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + BEGIN_RING(kelvin, NV20TCL_COLOR_OFFSET, 1); + OUT_RELOCl(nv20->rt[0], 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + + /* Texture images */ + for (i = 0; i < 2; i++) { + if (!(nv20->fp_samplers & (1 << i))) + continue; + BEGIN_RING(kelvin, NV20TCL_TX_OFFSET(i), 1); + OUT_RELOCl(nv20->tex[i].buffer, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_GART | NOUVEAU_BO_RD); + BEGIN_RING(kelvin, NV20TCL_TX_FORMAT(i), 1); + OUT_RELOCd(nv20->tex[i].buffer, nv20->tex[i].format, + NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | + NOUVEAU_BO_OR, NV20TCL_TX_FORMAT_DMA0, + NV20TCL_TX_FORMAT_DMA1); + } +} + diff --git a/src/gallium/drivers/nv20/nv20_surface.c b/src/gallium/drivers/nv20/nv20_surface.c new file mode 100644 index 0000000000..6cd607583c --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_surface.c @@ -0,0 +1,72 @@ + +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "nv20_context.h" +#include "pipe/p_defines.h" +#include "pipe/internal/p_winsys_screen.h" +#include "pipe/p_inlines.h" +#include "util/u_tile.h" + +static void +nv20_surface_copy(struct pipe_context *pipe, boolean do_flip, + struct pipe_surface *dest, unsigned destx, unsigned desty, + struct pipe_surface *src, unsigned srcx, unsigned srcy, + unsigned width, unsigned height) +{ + struct nv20_context *nv20 = nv20_context(pipe); + struct nv04_surface_2d *eng2d = nv20->screen->eng2d; + + if (do_flip) { + desty += height; + while (height--) { + eng2d->copy(eng2d, dest, destx, desty--, src, + srcx, srcy++, width, 1); + } + return; + } + + eng2d->copy(eng2d, dest, destx, desty, src, srcx, srcy, width, height); +} + +static void +nv20_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, + unsigned destx, unsigned desty, unsigned width, + unsigned height, unsigned value) +{ + struct nv20_context *nv20 = nv20_context(pipe); + struct nv04_surface_2d *eng2d = nv20->screen->eng2d; + + eng2d->fill(eng2d, dest, destx, desty, width, height, value); +} + +void +nv20_init_surface_functions(struct nv20_context *nv20) +{ + nv20->pipe.surface_copy = nv20_surface_copy; + nv20->pipe.surface_fill = nv20_surface_fill; +} diff --git a/src/gallium/drivers/nv20/nv20_vbo.c b/src/gallium/drivers/nv20/nv20_vbo.c new file mode 100644 index 0000000000..24d8f4bef0 --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_vbo.c @@ -0,0 +1,78 @@ +#include "draw/draw_context.h" +#include "pipe/p_context.h" +#include "pipe/p_state.h" + +#include "nv20_context.h" +#include "nv20_state.h" + +#include "nouveau/nouveau_channel.h" +#include "nouveau/nouveau_pushbuf.h" + +boolean nv20_draw_elements( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned prim, unsigned start, unsigned count) +{ + struct nv20_context *nv20 = nv20_context( pipe ); + struct draw_context *draw = nv20->draw; + unsigned i; + + nv20_emit_hw_state(nv20); + + /* + * Map vertex buffers + */ + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { + if (nv20->vtxbuf[i].buffer) { + void *buf + = pipe->winsys->buffer_map(pipe->winsys, + nv20->vtxbuf[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_vertex_buffer(draw, i, buf); + } + } + /* Map index buffer, if present */ + if (indexBuffer) { + void *mapped_indexes + = pipe->winsys->buffer_map(pipe->winsys, indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); + } + else { + /* no index/element buffer */ + draw_set_mapped_element_buffer(draw, 0, NULL); + } + + draw_set_mapped_constant_buffer(draw, + nv20->constbuf[PIPE_SHADER_VERTEX], + nv20->constbuf_nr[PIPE_SHADER_VERTEX]); + + /* draw! */ + draw_arrays(nv20->draw, prim, start, count); + + /* + * unmap vertex/index buffers + */ + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { + if (nv20->vtxbuf[i].buffer) { + pipe->winsys->buffer_unmap(pipe->winsys, nv20->vtxbuf[i].buffer); + draw_set_mapped_vertex_buffer(draw, i, NULL); + } + } + if (indexBuffer) { + pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer); + draw_set_mapped_element_buffer(draw, 0, NULL); + } + + draw_flush(nv20->draw); + return TRUE; +} + +boolean nv20_draw_arrays( struct pipe_context *pipe, + unsigned prim, unsigned start, unsigned count) +{ + return nv20_draw_elements(pipe, NULL, 0, prim, start, count); +} + + + diff --git a/src/gallium/drivers/nv20/nv20_vertprog.c b/src/gallium/drivers/nv20/nv20_vertprog.c new file mode 100644 index 0000000000..5db0e807ff --- /dev/null +++ b/src/gallium/drivers/nv20/nv20_vertprog.c @@ -0,0 +1,838 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_dump.h" + +#include "nv20_context.h" +#include "nv20_state.h" + +/* TODO (at least...): + * 1. Indexed consts + ARL + * 2. Arb. swz/negation + * 3. NV_vp11, NV_vp2, NV_vp3 features + * - extra arith opcodes + * - branching + * - texture sampling + * - indexed attribs + * - indexed results + * 4. bugs + */ + +#define SWZ_X 0 +#define SWZ_Y 1 +#define SWZ_Z 2 +#define SWZ_W 3 +#define MASK_X 8 +#define MASK_Y 4 +#define MASK_Z 2 +#define MASK_W 1 +#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) +#define DEF_SCALE 0 +#define DEF_CTEST 0 +#include "nv20_shader.h" + +#define swz(s,x,y,z,w) nv20_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) +#define neg(s) nv20_sr_neg((s)) +#define abs(s) nv20_sr_abs((s)) + +struct nv20_vpc { + struct nv20_vertex_program *vp; + + struct nv20_vertex_program_exec *vpi; + + unsigned output_map[PIPE_MAX_SHADER_OUTPUTS]; + + int high_temp; + int temp_temp_count; + + struct nv20_sreg *imm; + unsigned nr_imm; +}; + +static struct nv20_sreg +temp(struct nv20_vpc *vpc) +{ + int idx; + + idx = vpc->temp_temp_count++; + idx += vpc->high_temp + 1; + return nv20_sr(NV30SR_TEMP, idx); +} + +static struct nv20_sreg +constant(struct nv20_vpc *vpc, int pipe, float x, float y, float z, float w) +{ + struct nv20_vertex_program *vp = vpc->vp; + struct nv20_vertex_program_data *vpd; + int idx; + + if (pipe >= 0) { + for (idx = 0; idx < vp->nr_consts; idx++) { + if (vp->consts[idx].index == pipe) + return nv20_sr(NV30SR_CONST, idx); + } + } + + idx = vp->nr_consts++; + vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts); + vpd = &vp->consts[idx]; + + vpd->index = pipe; + vpd->value[0] = x; + vpd->value[1] = y; + vpd->value[2] = z; + vpd->value[3] = w; + return nv20_sr(NV30SR_CONST, idx); +} + +#define arith(cc,s,o,d,m,s0,s1,s2) \ + nv20_vp_arith((cc), (s), NV30_VP_INST_##o, (d), (m), (s0), (s1), (s2)) + +static void +emit_src(struct nv20_vpc *vpc, uint32_t *hw, int pos, struct nv20_sreg src) +{ + struct nv20_vertex_program *vp = vpc->vp; + uint32_t sr = 0; + + switch (src.type) { + case NV30SR_TEMP: + sr |= (NV30_VP_SRC_REG_TYPE_TEMP << NV30_VP_SRC_REG_TYPE_SHIFT); + sr |= (src.index << NV30_VP_SRC_TEMP_SRC_SHIFT); + break; + case NV30SR_INPUT: + sr |= (NV30_VP_SRC_REG_TYPE_INPUT << + NV30_VP_SRC_REG_TYPE_SHIFT); + vp->ir |= (1 << src.index); + hw[1] |= (src.index << NV30_VP_INST_INPUT_SRC_SHIFT); + break; + case NV30SR_CONST: + sr |= (NV30_VP_SRC_REG_TYPE_CONST << + NV30_VP_SRC_REG_TYPE_SHIFT); + assert(vpc->vpi->const_index == -1 || + vpc->vpi->const_index == src.index); + vpc->vpi->const_index = src.index; + break; + case NV30SR_NONE: + sr |= (NV30_VP_SRC_REG_TYPE_INPUT << + NV30_VP_SRC_REG_TYPE_SHIFT); + break; + default: + assert(0); + } + + if (src.negate) + sr |= NV30_VP_SRC_NEGATE; + + if (src.abs) + hw[0] |= (1 << (21 + pos)); + + sr |= ((src.swz[0] << NV30_VP_SRC_SWZ_X_SHIFT) | + (src.swz[1] << NV30_VP_SRC_SWZ_Y_SHIFT) | + (src.swz[2] << NV30_VP_SRC_SWZ_Z_SHIFT) | + (src.swz[3] << NV30_VP_SRC_SWZ_W_SHIFT)); + +/* + * |VVV| + * d�.�b + * \u/ + * + */ + + switch (pos) { + case 0: + hw[1] |= ((sr & NV30_VP_SRC0_HIGH_MASK) >> + NV30_VP_SRC0_HIGH_SHIFT) << NV30_VP_INST_SRC0H_SHIFT; + hw[2] |= (sr & NV30_VP_SRC0_LOW_MASK) << + NV30_VP_INST_SRC0L_SHIFT; + break; + case 1: + hw[2] |= sr << NV30_VP_INST_SRC1_SHIFT; + break; + case 2: + hw[2] |= ((sr & NV30_VP_SRC2_HIGH_MASK) >> + NV30_VP_SRC2_HIGH_SHIFT) << NV30_VP_INST_SRC2H_SHIFT; + hw[3] |= (sr & NV30_VP_SRC2_LOW_MASK) << + NV30_VP_INST_SRC2L_SHIFT; + break; + default: + assert(0); + } +} + +static void +emit_dst(struct nv20_vpc *vpc, uint32_t *hw, int slot, struct nv20_sreg dst) +{ + struct nv20_vertex_program *vp = vpc->vp; + + switch (dst.type) { + case NV30SR_TEMP: + hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT); + break; + case NV30SR_OUTPUT: + switch (dst.index) { + case NV30_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break; + case NV30_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; + case NV30_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break; + case NV30_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break; + case NV30_VP_INST_DEST_FOGC : vp->or |= (1 << 4); break; + case NV30_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break; + case NV30_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break; + case NV30_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break; + case NV30_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break; + case NV30_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break; + case NV30_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break; + case NV30_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break; + case NV30_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break; + case NV30_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break; + default: + break; + } + + hw[3] |= (dst.index << NV30_VP_INST_DEST_SHIFT); + hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK | (1<<20); + + /*XXX: no way this is entirely correct, someone needs to + * figure out what exactly it is. + */ + hw[3] |= 0x800; + break; + default: + assert(0); + } +} + +static void +nv20_vp_arith(struct nv20_vpc *vpc, int slot, int op, + struct nv20_sreg dst, int mask, + struct nv20_sreg s0, struct nv20_sreg s1, + struct nv20_sreg s2) +{ + struct nv20_vertex_program *vp = vpc->vp; + uint32_t *hw; + + vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi)); + vpc->vpi = &vp->insns[vp->nr_insns - 1]; + memset(vpc->vpi, 0, sizeof(*vpc->vpi)); + vpc->vpi->const_index = -1; + + hw = vpc->vpi->data; + + hw[0] |= (NV30_VP_INST_COND_TR << NV30_VP_INST_COND_SHIFT); + hw[0] |= ((0 << NV30_VP_INST_COND_SWZ_X_SHIFT) | + (1 << NV30_VP_INST_COND_SWZ_Y_SHIFT) | + (2 << NV30_VP_INST_COND_SWZ_Z_SHIFT) | + (3 << NV30_VP_INST_COND_SWZ_W_SHIFT)); + + hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT); +// hw[3] |= NV30_VP_INST_SCA_DEST_TEMP_MASK; +// hw[3] |= (mask << NV30_VP_INST_VEC_WRITEMASK_SHIFT); + + if (dst.type == NV30SR_OUTPUT) { + if (slot) + hw[3] |= (mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT); + else + hw[3] |= (mask << NV30_VP_INST_VDEST_WRITEMASK_SHIFT); + } else { + if (slot) + hw[3] |= (mask << NV30_VP_INST_STEMP_WRITEMASK_SHIFT); + else + hw[3] |= (mask << NV30_VP_INST_VTEMP_WRITEMASK_SHIFT); + } + + emit_dst(vpc, hw, slot, dst); + emit_src(vpc, hw, 0, s0); + emit_src(vpc, hw, 1, s1); + emit_src(vpc, hw, 2, s2); +} + +static INLINE struct nv20_sreg +tgsi_src(struct nv20_vpc *vpc, const struct tgsi_full_src_register *fsrc) { + struct nv20_sreg src; + + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + src = nv20_sr(NV30SR_INPUT, fsrc->SrcRegister.Index); + break; + case TGSI_FILE_CONSTANT: + src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0); + break; + case TGSI_FILE_IMMEDIATE: + src = vpc->imm[fsrc->SrcRegister.Index]; + break; + case TGSI_FILE_TEMPORARY: + if (vpc->high_temp < fsrc->SrcRegister.Index) + vpc->high_temp = fsrc->SrcRegister.Index; + src = nv20_sr(NV30SR_TEMP, fsrc->SrcRegister.Index); + break; + default: + NOUVEAU_ERR("bad src file\n"); + break; + } + + src.abs = fsrc->SrcRegisterExtMod.Absolute; + src.negate = fsrc->SrcRegister.Negate; + src.swz[0] = fsrc->SrcRegister.SwizzleX; + src.swz[1] = fsrc->SrcRegister.SwizzleY; + src.swz[2] = fsrc->SrcRegister.SwizzleZ; + src.swz[3] = fsrc->SrcRegister.SwizzleW; + return src; +} + +static INLINE struct nv20_sreg +tgsi_dst(struct nv20_vpc *vpc, const struct tgsi_full_dst_register *fdst) { + struct nv20_sreg dst; + + switch (fdst->DstRegister.File) { + case TGSI_FILE_OUTPUT: + dst = nv20_sr(NV30SR_OUTPUT, + vpc->output_map[fdst->DstRegister.Index]); + + break; + case TGSI_FILE_TEMPORARY: + dst = nv20_sr(NV30SR_TEMP, fdst->DstRegister.Index); + if (vpc->high_temp < dst.index) + vpc->high_temp = dst.index; + break; + default: + NOUVEAU_ERR("bad dst file\n"); + break; + } + + return dst; +} + +static INLINE int +tgsi_mask(uint tgsi) +{ + int mask = 0; + + if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; + if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; + if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; + if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; + return mask; +} + +static boolean +nv20_vertprog_parse_instruction(struct nv20_vpc *vpc, + const struct tgsi_full_instruction *finst) +{ + struct nv20_sreg src[3], dst, tmp; + struct nv20_sreg none = nv20_sr(NV30SR_NONE, 0); + int mask; + int ai = -1, ci = -1; + int i; + + if (finst->Instruction.Opcode == TGSI_OPCODE_END) + return TRUE; + + vpc->temp_temp_count = 0; + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->FullSrcRegisters[i]; + if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { + src[i] = tgsi_src(vpc, fsrc); + } + } + + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->FullSrcRegisters[i]; + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + if (ai == -1 || ai == fsrc->SrcRegister.Index) { + ai = fsrc->SrcRegister.Index; + src[i] = tgsi_src(vpc, fsrc); + } else { + src[i] = temp(vpc); + arith(vpc, 0, OP_MOV, src[i], MASK_ALL, + tgsi_src(vpc, fsrc), none, none); + } + break; + /*XXX: index comparison is broken now that consts come from + * two different register files. + */ + case TGSI_FILE_CONSTANT: + case TGSI_FILE_IMMEDIATE: + if (ci == -1 || ci == fsrc->SrcRegister.Index) { + ci = fsrc->SrcRegister.Index; + src[i] = tgsi_src(vpc, fsrc); + } else { + src[i] = temp(vpc); + arith(vpc, 0, OP_MOV, src[i], MASK_ALL, + tgsi_src(vpc, fsrc), none, none); + } + break; + case TGSI_FILE_TEMPORARY: + /* handled above */ + break; + default: + NOUVEAU_ERR("bad src file\n"); + return FALSE; + } + } + + dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]); + mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); + + switch (finst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none); + break; + case TGSI_OPCODE_ADD: + arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]); + break; + case TGSI_OPCODE_ARL: + arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_DP3: + arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DP4: + arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DPH: + arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DST: + arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_EX2: + arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_EXP: + arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_FLR: + arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_FRC: + arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_LG2: + arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_LIT: + arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_LOG: + arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_MAD: + arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]); + break; + case TGSI_OPCODE_MAX: + arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MIN: + arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MOV: + arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_MUL: + arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_POW: + tmp = temp(vpc); + arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none, + swz(src[0], X, X, X, X)); + arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X), + swz(src[1], X, X, X, X), none); + arith(vpc, 1, OP_EX2, dst, mask, none, none, + swz(tmp, X, X, X, X)); + break; + case TGSI_OPCODE_RCP: + arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_RET: + break; + case TGSI_OPCODE_RSQ: + arith(vpc, 1, OP_RSQ, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_SGE: + arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SGT: + arith(vpc, 0, OP_SGT, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SLT: + arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SUB: + arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1])); + break; + case TGSI_OPCODE_XPD: + tmp = temp(vpc); + arith(vpc, 0, OP_MUL, tmp, mask, + swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); + arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W), + swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), + neg(tmp)); + break; + default: + NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); + return FALSE; + } + + return TRUE; +} + +static boolean +nv20_vertprog_parse_decl_output(struct nv20_vpc *vpc, + const struct tgsi_full_declaration *fdec) +{ + int hw; + + switch (fdec->Semantic.SemanticName) { + case TGSI_SEMANTIC_POSITION: + hw = NV30_VP_INST_DEST_POS; + break; + case TGSI_SEMANTIC_COLOR: + if (fdec->Semantic.SemanticIndex == 0) { + hw = NV30_VP_INST_DEST_COL0; + } else + if (fdec->Semantic.SemanticIndex == 1) { + hw = NV30_VP_INST_DEST_COL1; + } else { + NOUVEAU_ERR("bad colour semantic index\n"); + return FALSE; + } + break; + case TGSI_SEMANTIC_BCOLOR: + if (fdec->Semantic.SemanticIndex == 0) { + hw = NV30_VP_INST_DEST_BFC0; + } else + if (fdec->Semantic.SemanticIndex == 1) { + hw = NV30_VP_INST_DEST_BFC1; + } else { + NOUVEAU_ERR("bad bcolour semantic index\n"); + return FALSE; + } + break; + case TGSI_SEMANTIC_FOG: + hw = NV30_VP_INST_DEST_FOGC; + break; + case TGSI_SEMANTIC_PSIZE: + hw = NV30_VP_INST_DEST_PSZ; + break; + case TGSI_SEMANTIC_GENERIC: + if (fdec->Semantic.SemanticIndex <= 7) { + hw = NV30_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex); + } else { + NOUVEAU_ERR("bad generic semantic index\n"); + return FALSE; + } + break; + default: + NOUVEAU_ERR("bad output semantic\n"); + return FALSE; + } + + vpc->output_map[fdec->DeclarationRange.First] = hw; + return TRUE; +} + +static boolean +nv20_vertprog_prepare(struct nv20_vpc *vpc) +{ + struct tgsi_parse_context p; + int nr_imm = 0; + + tgsi_parse_init(&p, vpc->vp->pipe.tokens); + while (!tgsi_parse_end_of_tokens(&p)) { + const union tgsi_full_token *tok = &p.FullToken; + + tgsi_parse_token(&p); + switch(tok->Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: + nr_imm++; + break; + default: + break; + } + } + tgsi_parse_free(&p); + + if (nr_imm) { + vpc->imm = CALLOC(nr_imm, sizeof(struct nv20_sreg)); + assert(vpc->imm); + } + + return TRUE; +} + +static void +nv20_vertprog_translate(struct nv20_context *nv20, + struct nv20_vertex_program *vp) +{ + struct tgsi_parse_context parse; + struct nv20_vpc *vpc = NULL; + + tgsi_dump(vp->pipe.tokens,0); + + vpc = CALLOC(1, sizeof(struct nv20_vpc)); + if (!vpc) + return; + vpc->vp = vp; + vpc->high_temp = -1; + + if (!nv20_vertprog_prepare(vpc)) { + FREE(vpc); + return; + } + + tgsi_parse_init(&parse, vp->pipe.tokens); + + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + { + const struct tgsi_full_declaration *fdec; + fdec = &parse.FullToken.FullDeclaration; + switch (fdec->Declaration.File) { + case TGSI_FILE_OUTPUT: + if (!nv20_vertprog_parse_decl_output(vpc, fdec)) + goto out_err; + break; + default: + break; + } + } + break; + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + const struct tgsi_full_immediate *imm; + + imm = &parse.FullToken.FullImmediate; + assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); + assert(imm->Immediate.NrTokens == 4 + 1); + vpc->imm[vpc->nr_imm++] = + constant(vpc, -1, + imm->u.ImmediateFloat32[0].Float, + imm->u.ImmediateFloat32[1].Float, + imm->u.ImmediateFloat32[2].Float, + imm->u.ImmediateFloat32[3].Float); + } + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + const struct tgsi_full_instruction *finst; + finst = &parse.FullToken.FullInstruction; + if (!nv20_vertprog_parse_instruction(vpc, finst)) + goto out_err; + } + break; + default: + break; + } + } + + vp->insns[vp->nr_insns - 1].data[3] |= NV30_VP_INST_LAST; + vp->translated = TRUE; +out_err: + tgsi_parse_free(&parse); + FREE(vpc); +} + +static boolean +nv20_vertprog_validate(struct nv20_context *nv20) +{ + struct nouveau_winsys *nvws = nv20->nvws; + struct pipe_winsys *ws = nv20->pipe.winsys; + struct nouveau_grobj *rankine = nv20->screen->rankine; + struct nv20_vertex_program *vp; + struct pipe_buffer *constbuf; + boolean upload_code = FALSE, upload_data = FALSE; + int i; + + vp = nv20->vertprog; + constbuf = nv20->constbuf[PIPE_SHADER_VERTEX]; + + /* Translate TGSI shader into hw bytecode */ + if (!vp->translated) { + nv20_vertprog_translate(nv20, vp); + if (!vp->translated) + return FALSE; + } + + /* Allocate hw vtxprog exec slots */ + if (!vp->exec) { + struct nouveau_resource *heap = nv20->screen->vp_exec_heap; + struct nouveau_stateobj *so; + uint vplen = vp->nr_insns; + + if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) { + while (heap->next && heap->size < vplen) { + struct nv20_vertex_program *evict; + + evict = heap->next->priv; + nvws->res_free(&evict->exec); + } + + if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) + assert(0); + } + + so = so_new(2, 0); + so_method(so, rankine, NV34TCL_VP_START_FROM_ID, 1); + so_data (so, vp->exec->start); + so_ref(so, &vp->so); + + upload_code = TRUE; + } + + /* Allocate hw vtxprog const slots */ + if (vp->nr_consts && !vp->data) { + struct nouveau_resource *heap = nv20->screen->vp_data_heap; + + if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) { + while (heap->next && heap->size < vp->nr_consts) { + struct nv20_vertex_program *evict; + + evict = heap->next->priv; + nvws->res_free(&evict->data); + } + + if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) + assert(0); + } + + /*XXX: handle this some day */ + assert(vp->data->start >= vp->data_start_min); + + upload_data = TRUE; + if (vp->data_start != vp->data->start) + upload_code = TRUE; + } + + /* If exec or data segments moved we need to patch the program to + * fixup offsets and register IDs. + */ + if (vp->exec_start != vp->exec->start) { + for (i = 0; i < vp->nr_insns; i++) { + struct nv20_vertex_program_exec *vpi = &vp->insns[i]; + + if (vpi->has_branch_offset) { + assert(0); + } + } + + vp->exec_start = vp->exec->start; + } + + if (vp->nr_consts && vp->data_start != vp->data->start) { + for (i = 0; i < vp->nr_insns; i++) { + struct nv20_vertex_program_exec *vpi = &vp->insns[i]; + + if (vpi->const_index >= 0) { + vpi->data[1] &= ~NV30_VP_INST_CONST_SRC_MASK; + vpi->data[1] |= + (vpi->const_index + vp->data->start) << + NV30_VP_INST_CONST_SRC_SHIFT; + + } + } + + vp->data_start = vp->data->start; + } + + /* Update + Upload constant values */ + if (vp->nr_consts) { + float *map = NULL; + + if (constbuf) { + map = ws->buffer_map(ws, constbuf, + PIPE_BUFFER_USAGE_CPU_READ); + } + + for (i = 0; i < vp->nr_consts; i++) { + struct nv20_vertex_program_data *vpd = &vp->consts[i]; + + if (vpd->index >= 0) { + if (!upload_data && + !memcmp(vpd->value, &map[vpd->index * 4], + 4 * sizeof(float))) + continue; + memcpy(vpd->value, &map[vpd->index * 4], + 4 * sizeof(float)); + } + + BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_CONST_ID, 5); + OUT_RING (i + vp->data->start); + OUT_RINGp ((uint32_t *)vpd->value, 4); + } + + if (constbuf) { + ws->buffer_unmap(ws, constbuf); + } + } + + /* Upload vtxprog */ + if (upload_code) { +#if 0 + for (i = 0; i < vp->nr_insns; i++) { + NOUVEAU_MSG("VP inst %d: 0x%08x 0x%08x 0x%08x 0x%08x\n", + i, vp->insns[i].data[0], vp->insns[i].data[1], + vp->insns[i].data[2], vp->insns[i].data[3]); + } +#endif + BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_FROM_ID, 1); + OUT_RING (vp->exec->start); + for (i = 0; i < vp->nr_insns; i++) { + BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_INST(0), 4); + OUT_RINGp (vp->insns[i].data, 4); + } + } + + if (vp->so != nv20->state.hw[NV30_STATE_VERTPROG]) { + so_ref(vp->so, &nv20->state.hw[NV30_STATE_VERTPROG]); + return TRUE; + } + + return FALSE; +} + +void +nv20_vertprog_destroy(struct nv20_context *nv20, struct nv20_vertex_program *vp) +{ + struct nouveau_winsys *nvws = nv20->screen->nvws; + + vp->translated = FALSE; + + if (vp->nr_insns) { + FREE(vp->insns); + vp->insns = NULL; + vp->nr_insns = 0; + } + + if (vp->nr_consts) { + FREE(vp->consts); + vp->consts = NULL; + vp->nr_consts = 0; + } + + nvws->res_free(&vp->exec); + vp->exec_start = 0; + nvws->res_free(&vp->data); + vp->data_start = 0; + vp->data_start_min = 0; + + vp->ir = vp->or = 0; + so_ref(NULL, &vp->so); +} + +struct nv20_state_entry nv20_state_vertprog = { + .validate = nv20_vertprog_validate, + .dirty = { + .pipe = NV30_NEW_VERTPROG /*| NV30_NEW_UCP*/, + .hw = NV30_STATE_VERTPROG, + } +}; diff --git a/src/gallium/drivers/nv30/Makefile b/src/gallium/drivers/nv30/Makefile new file mode 100644 index 0000000000..4c29e2eab3 --- /dev/null +++ b/src/gallium/drivers/nv30/Makefile @@ -0,0 +1,28 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = nv30 + +C_SOURCES = \ + nv30_clear.c \ + nv30_context.c \ + nv30_draw.c \ + nv30_fragprog.c \ + nv30_fragtex.c \ + nv30_miptree.c \ + nv30_query.c \ + nv30_screen.c \ + nv30_state.c \ + nv30_state_blend.c \ + nv30_state_emit.c \ + nv30_state_fb.c \ + nv30_state_rasterizer.c \ + nv30_state_scissor.c \ + nv30_state_stipple.c \ + nv30_state_viewport.c \ + nv30_state_zsa.c \ + nv30_surface.c \ + nv30_vbo.c \ + nv30_vertprog.c + +include ../../Makefile.template diff --git a/src/gallium/drivers/nv30/nv30_clear.c b/src/gallium/drivers/nv30/nv30_clear.c new file mode 100644 index 0000000000..8c3ca204d5 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_clear.c @@ -0,0 +1,13 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "nv30_context.h" + +void +nv30_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue) +{ + pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); + ps->status = PIPE_SURFACE_STATUS_CLEAR; +} diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c new file mode 100644 index 0000000000..61654f8756 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_context.c @@ -0,0 +1,72 @@ +#include "draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/internal/p_winsys_screen.h" + +#include "nv30_context.h" +#include "nv30_screen.h" + +static void +nv30_flush(struct pipe_context *pipe, unsigned flags, + struct pipe_fence_handle **fence) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + if (flags & PIPE_FLUSH_TEXTURE_CACHE) { + BEGIN_RING(rankine, 0x1fd8, 1); + OUT_RING (2); + BEGIN_RING(rankine, 0x1fd8, 1); + OUT_RING (1); + } + + FIRE_RING(fence); +} + +static void +nv30_destroy(struct pipe_context *pipe) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + if (nv30->draw) + draw_destroy(nv30->draw); + FREE(nv30); +} + +struct pipe_context * +nv30_create(struct pipe_screen *pscreen, unsigned pctx_id) +{ + struct nv30_screen *screen = nv30_screen(pscreen); + struct pipe_winsys *ws = pscreen->winsys; + struct nv30_context *nv30; + struct nouveau_winsys *nvws = screen->nvws; + + nv30 = CALLOC(1, sizeof(struct nv30_context)); + if (!nv30) + return NULL; + nv30->screen = screen; + nv30->pctx_id = pctx_id; + + nv30->nvws = nvws; + + nv30->pipe.winsys = ws; + nv30->pipe.screen = pscreen; + nv30->pipe.destroy = nv30_destroy; + nv30->pipe.draw_arrays = nv30_draw_arrays; + nv30->pipe.draw_elements = nv30_draw_elements; + nv30->pipe.clear = nv30_clear; + nv30->pipe.flush = nv30_flush; + + nv30_init_query_functions(nv30); + nv30_init_surface_functions(nv30); + nv30_init_state_functions(nv30); + + /* Create, configure, and install fallback swtnl path */ + nv30->draw = draw_create(); + draw_wide_point_threshold(nv30->draw, 9999999.0); + draw_wide_line_threshold(nv30->draw, 9999999.0); + draw_enable_line_stipple(nv30->draw, FALSE); + draw_enable_point_sprites(nv30->draw, FALSE); + draw_set_rasterize_stage(nv30->draw, nv30_draw_render_stage(nv30)); + + return &nv30->pipe; +} + diff --git a/src/gallium/drivers/nv30/nv30_context.h b/src/gallium/drivers/nv30/nv30_context.h new file mode 100644 index 0000000000..b933769700 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_context.h @@ -0,0 +1,212 @@ +#ifndef __NV30_CONTEXT_H__ +#define __NV30_CONTEXT_H__ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_compiler.h" + +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "draw/draw_vertex.h" + +#include "nouveau/nouveau_winsys.h" +#include "nouveau/nouveau_gldefs.h" + +#define NOUVEAU_PUSH_CONTEXT(ctx) \ + struct nv30_screen *ctx = nv30->screen +#include "nouveau/nouveau_push.h" +#include "nouveau/nouveau_stateobj.h" + +#include "nv30_state.h" + +#define NOUVEAU_ERR(fmt, args...) \ + fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); +#define NOUVEAU_MSG(fmt, args...) \ + fprintf(stderr, "nouveau: "fmt, ##args); + +enum nv30_state_index { + NV30_STATE_FB = 0, + NV30_STATE_VIEWPORT = 1, + NV30_STATE_BLEND = 2, + NV30_STATE_RAST = 3, + NV30_STATE_ZSA = 4, + NV30_STATE_BCOL = 5, + NV30_STATE_CLIP = 6, + NV30_STATE_SCISSOR = 7, + NV30_STATE_STIPPLE = 8, + NV30_STATE_FRAGPROG = 9, + NV30_STATE_VERTPROG = 10, + NV30_STATE_FRAGTEX0 = 11, + NV30_STATE_FRAGTEX1 = 12, + NV30_STATE_FRAGTEX2 = 13, + NV30_STATE_FRAGTEX3 = 14, + NV30_STATE_FRAGTEX4 = 15, + NV30_STATE_FRAGTEX5 = 16, + NV30_STATE_FRAGTEX6 = 17, + NV30_STATE_FRAGTEX7 = 18, + NV30_STATE_FRAGTEX8 = 19, + NV30_STATE_FRAGTEX9 = 20, + NV30_STATE_FRAGTEX10 = 21, + NV30_STATE_FRAGTEX11 = 22, + NV30_STATE_FRAGTEX12 = 23, + NV30_STATE_FRAGTEX13 = 24, + NV30_STATE_FRAGTEX14 = 25, + NV30_STATE_FRAGTEX15 = 26, + NV30_STATE_VERTTEX0 = 27, + NV30_STATE_VERTTEX1 = 28, + NV30_STATE_VERTTEX2 = 29, + NV30_STATE_VERTTEX3 = 30, + NV30_STATE_VTXBUF = 31, + NV30_STATE_VTXFMT = 32, + NV30_STATE_VTXATTR = 33, + NV30_STATE_MAX = 34 +}; + +#include "nv30_screen.h" + +#define NV30_NEW_BLEND (1 << 0) +#define NV30_NEW_RAST (1 << 1) +#define NV30_NEW_ZSA (1 << 2) +#define NV30_NEW_SAMPLER (1 << 3) +#define NV30_NEW_FB (1 << 4) +#define NV30_NEW_STIPPLE (1 << 5) +#define NV30_NEW_SCISSOR (1 << 6) +#define NV30_NEW_VIEWPORT (1 << 7) +#define NV30_NEW_BCOL (1 << 8) +#define NV30_NEW_VERTPROG (1 << 9) +#define NV30_NEW_FRAGPROG (1 << 10) +#define NV30_NEW_ARRAYS (1 << 11) +#define NV30_NEW_UCP (1 << 12) + +struct nv30_rasterizer_state { + struct pipe_rasterizer_state pipe; + struct nouveau_stateobj *so; +}; + +struct nv30_zsa_state { + struct pipe_depth_stencil_alpha_state pipe; + struct nouveau_stateobj *so; +}; + +struct nv30_blend_state { + struct pipe_blend_state pipe; + struct nouveau_stateobj *so; +}; + + +struct nv30_state { + unsigned scissor_enabled; + unsigned stipple_enabled; + unsigned viewport_bypass; + unsigned fp_samplers; + + uint64_t dirty; + struct nouveau_stateobj *hw[NV30_STATE_MAX]; +}; + +struct nv30_context { + struct pipe_context pipe; + + struct nouveau_winsys *nvws; + struct nv30_screen *screen; + unsigned pctx_id; + + struct draw_context *draw; + + /* HW state derived from pipe states */ + struct nv30_state state; + + /* Context state */ + unsigned dirty; + struct pipe_scissor_state scissor; + unsigned stipple[32]; + struct nv30_vertex_program *vertprog; + struct nv30_fragment_program *fragprog; + struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; + unsigned constbuf_nr[PIPE_SHADER_TYPES]; + struct nv30_rasterizer_state *rasterizer; + struct nv30_zsa_state *zsa; + struct nv30_blend_state *blend; + struct pipe_blend_color blend_colour; + struct pipe_viewport_state viewport; + struct pipe_framebuffer_state framebuffer; + struct pipe_buffer *idxbuf; + unsigned idxbuf_format; + struct nv30_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; + struct nv30_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; + unsigned nr_samplers; + unsigned nr_textures; + unsigned dirty_samplers; + struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; + unsigned vtxbuf_nr; + struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS]; + unsigned vtxelt_nr; + const unsigned *edgeflags; +}; + +static INLINE struct nv30_context * +nv30_context(struct pipe_context *pipe) +{ + return (struct nv30_context *)pipe; +} + +struct nv30_state_entry { + boolean (*validate)(struct nv30_context *nv30); + struct { + unsigned pipe; + unsigned hw; + } dirty; +}; + +extern void nv30_init_state_functions(struct nv30_context *nv30); +extern void nv30_init_surface_functions(struct nv30_context *nv30); +extern void nv30_init_query_functions(struct nv30_context *nv30); + +extern void nv30_screen_init_miptree_functions(struct pipe_screen *pscreen); + +/* nv30_draw.c */ +extern struct draw_stage *nv30_draw_render_stage(struct nv30_context *nv30); + +/* nv30_vertprog.c */ +extern void nv30_vertprog_destroy(struct nv30_context *, + struct nv30_vertex_program *); + +/* nv30_fragprog.c */ +extern void nv30_fragprog_destroy(struct nv30_context *, + struct nv30_fragment_program *); + +/* nv30_fragtex.c */ +extern void nv30_fragtex_bind(struct nv30_context *); + +/* nv30_state.c and friends */ +extern boolean nv30_state_validate(struct nv30_context *nv30); +extern void nv30_state_emit(struct nv30_context *nv30); +extern struct nv30_state_entry nv30_state_rasterizer; +extern struct nv30_state_entry nv30_state_scissor; +extern struct nv30_state_entry nv30_state_stipple; +extern struct nv30_state_entry nv30_state_fragprog; +extern struct nv30_state_entry nv30_state_vertprog; +extern struct nv30_state_entry nv30_state_blend; +extern struct nv30_state_entry nv30_state_blend_colour; +extern struct nv30_state_entry nv30_state_zsa; +extern struct nv30_state_entry nv30_state_viewport; +extern struct nv30_state_entry nv30_state_framebuffer; +extern struct nv30_state_entry nv30_state_fragtex; +extern struct nv30_state_entry nv30_state_vbo; + +/* nv30_vbo.c */ +extern boolean nv30_draw_arrays(struct pipe_context *, unsigned mode, + unsigned start, unsigned count); +extern boolean nv30_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, + unsigned count); + +/* nv30_clear.c */ +extern void nv30_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue); + +#endif diff --git a/src/gallium/drivers/nv30/nv30_draw.c b/src/gallium/drivers/nv30/nv30_draw.c new file mode 100644 index 0000000000..74fc138c05 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_draw.c @@ -0,0 +1,61 @@ +#include "draw/draw_pipe.h" + +#include "nv30_context.h" + +struct nv30_draw_stage { + struct draw_stage draw; + struct nv30_context *nv30; +}; + +static void +nv30_draw_point(struct draw_stage *draw, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv30_draw_line(struct draw_stage *draw, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv30_draw_tri(struct draw_stage *draw, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv30_draw_flush(struct draw_stage *draw, unsigned flags) +{ +} + +static void +nv30_draw_reset_stipple_counter(struct draw_stage *draw) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv30_draw_destroy(struct draw_stage *draw) +{ + FREE(draw); +} + +struct draw_stage * +nv30_draw_render_stage(struct nv30_context *nv30) +{ + struct nv30_draw_stage *nv30draw = CALLOC_STRUCT(nv30_draw_stage); + + nv30draw->nv30 = nv30; + nv30draw->draw.draw = nv30->draw; + nv30draw->draw.point = nv30_draw_point; + nv30draw->draw.line = nv30_draw_line; + nv30draw->draw.tri = nv30_draw_tri; + nv30draw->draw.flush = nv30_draw_flush; + nv30draw->draw.reset_stipple_counter = nv30_draw_reset_stipple_counter; + nv30draw->draw.destroy = nv30_draw_destroy; + + return &nv30draw->draw; +} + diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c new file mode 100644 index 0000000000..320ba3f4bf --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -0,0 +1,911 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" + +#include "nv30_context.h" + +#define SWZ_X 0 +#define SWZ_Y 1 +#define SWZ_Z 2 +#define SWZ_W 3 +#define MASK_X 1 +#define MASK_Y 2 +#define MASK_Z 4 +#define MASK_W 8 +#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) +#define DEF_SCALE NV30_FP_OP_DST_SCALE_1X +#define DEF_CTEST NV30_FP_OP_COND_TR +#include "nv30_shader.h" + +#define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) +#define neg(s) nv30_sr_neg((s)) +#define abs(s) nv30_sr_abs((s)) +#define scale(s,v) nv30_sr_scale((s), NV30_FP_OP_DST_SCALE_##v) + +#define MAX_CONSTS 128 +#define MAX_IMM 32 +struct nv30_fpc { + struct nv30_fragment_program *fp; + + uint attrib_map[PIPE_MAX_SHADER_INPUTS]; + + int high_temp; + int temp_temp_count; + int num_regs; + + uint depth_id; + uint colour_id; + + unsigned inst_offset; + + struct { + int pipe; + float vals[4]; + } consts[MAX_CONSTS]; + int nr_consts; + + struct nv30_sreg imm[MAX_IMM]; + unsigned nr_imm; +}; + +static INLINE struct nv30_sreg +temp(struct nv30_fpc *fpc) +{ + int idx; + + idx = fpc->temp_temp_count++; + idx += fpc->high_temp + 1; + return nv30_sr(NV30SR_TEMP, idx); +} + +static INLINE struct nv30_sreg +constant(struct nv30_fpc *fpc, int pipe, float vals[4]) +{ + int idx; + + if (fpc->nr_consts == MAX_CONSTS) + assert(0); + idx = fpc->nr_consts++; + + fpc->consts[idx].pipe = pipe; + if (pipe == -1) + memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float)); + return nv30_sr(NV30SR_CONST, idx); +} + +#define arith(cc,s,o,d,m,s0,s1,s2) \ + nv30_fp_arith((cc), (s), NV30_FP_OP_OPCODE_##o, \ + (d), (m), (s0), (s1), (s2)) +#define tex(cc,s,o,u,d,m,s0,s1,s2) \ + nv30_fp_tex((cc), (s), NV30_FP_OP_OPCODE_##o, (u), \ + (d), (m), (s0), none, none) + +static void +grow_insns(struct nv30_fpc *fpc, int size) +{ + struct nv30_fragment_program *fp = fpc->fp; + + fp->insn_len += size; + fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len); +} + +static void +emit_src(struct nv30_fpc *fpc, int pos, struct nv30_sreg src) +{ + struct nv30_fragment_program *fp = fpc->fp; + uint32_t *hw = &fp->insn[fpc->inst_offset]; + uint32_t sr = 0; + + switch (src.type) { + case NV30SR_INPUT: + sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT); + hw[0] |= (src.index << NV30_FP_OP_INPUT_SRC_SHIFT); + break; + case NV30SR_OUTPUT: + sr |= NV30_FP_REG_SRC_HALF; + /* fall-through */ + case NV30SR_TEMP: + sr |= (NV30_FP_REG_TYPE_TEMP << NV30_FP_REG_TYPE_SHIFT); + sr |= (src.index << NV30_FP_REG_SRC_SHIFT); + break; + case NV30SR_CONST: + grow_insns(fpc, 4); + hw = &fp->insn[fpc->inst_offset]; + if (fpc->consts[src.index].pipe >= 0) { + struct nv30_fragment_program_data *fpd; + + fp->consts = realloc(fp->consts, ++fp->nr_consts * + sizeof(*fpd)); + fpd = &fp->consts[fp->nr_consts - 1]; + fpd->offset = fpc->inst_offset + 4; + fpd->index = fpc->consts[src.index].pipe; + memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4); + } else { + memcpy(&fp->insn[fpc->inst_offset + 4], + fpc->consts[src.index].vals, + sizeof(uint32_t) * 4); + } + + sr |= (NV30_FP_REG_TYPE_CONST << NV30_FP_REG_TYPE_SHIFT); + break; + case NV30SR_NONE: + sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT); + break; + default: + assert(0); + } + + if (src.negate) + sr |= NV30_FP_REG_NEGATE; + + if (src.abs) + hw[1] |= (1 << (29 + pos)); + + sr |= ((src.swz[0] << NV30_FP_REG_SWZ_X_SHIFT) | + (src.swz[1] << NV30_FP_REG_SWZ_Y_SHIFT) | + (src.swz[2] << NV30_FP_REG_SWZ_Z_SHIFT) | + (src.swz[3] << NV30_FP_REG_SWZ_W_SHIFT)); + + hw[pos + 1] |= sr; +} + +static void +emit_dst(struct nv30_fpc *fpc, struct nv30_sreg dst) +{ + struct nv30_fragment_program *fp = fpc->fp; + uint32_t *hw = &fp->insn[fpc->inst_offset]; + + switch (dst.type) { + case NV30SR_TEMP: + if (fpc->num_regs < (dst.index + 1)) + fpc->num_regs = dst.index + 1; + break; + case NV30SR_OUTPUT: + if (dst.index == 1) { + fp->fp_control |= 0xe; + } else { + hw[0] |= NV30_FP_OP_OUT_REG_HALF; + } + break; + case NV30SR_NONE: + hw[0] |= (1 << 30); + break; + default: + assert(0); + } + + hw[0] |= (dst.index << NV30_FP_OP_OUT_REG_SHIFT); +} + +static void +nv30_fp_arith(struct nv30_fpc *fpc, int sat, int op, + struct nv30_sreg dst, int mask, + struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2) +{ + struct nv30_fragment_program *fp = fpc->fp; + uint32_t *hw; + + fpc->inst_offset = fp->insn_len; + grow_insns(fpc, 4); + hw = &fp->insn[fpc->inst_offset]; + memset(hw, 0, sizeof(uint32_t) * 4); + + if (op == NV30_FP_OP_OPCODE_KIL) + fp->fp_control |= NV34TCL_FP_CONTROL_USES_KIL; + hw[0] |= (op << NV30_FP_OP_OPCODE_SHIFT); + hw[0] |= (mask << NV30_FP_OP_OUTMASK_SHIFT); + hw[2] |= (dst.dst_scale << NV30_FP_OP_DST_SCALE_SHIFT); + + if (sat) + hw[0] |= NV30_FP_OP_OUT_SAT; + + if (dst.cc_update) + hw[0] |= NV30_FP_OP_COND_WRITE_ENABLE; + hw[1] |= (dst.cc_test << NV30_FP_OP_COND_SHIFT); + hw[1] |= ((dst.cc_swz[0] << NV30_FP_OP_COND_SWZ_X_SHIFT) | + (dst.cc_swz[1] << NV30_FP_OP_COND_SWZ_Y_SHIFT) | + (dst.cc_swz[2] << NV30_FP_OP_COND_SWZ_Z_SHIFT) | + (dst.cc_swz[3] << NV30_FP_OP_COND_SWZ_W_SHIFT)); + + emit_dst(fpc, dst); + emit_src(fpc, 0, s0); + emit_src(fpc, 1, s1); + emit_src(fpc, 2, s2); +} + +static void +nv30_fp_tex(struct nv30_fpc *fpc, int sat, int op, int unit, + struct nv30_sreg dst, int mask, + struct nv30_sreg s0, struct nv30_sreg s1, struct nv30_sreg s2) +{ + struct nv30_fragment_program *fp = fpc->fp; + + nv30_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2); + + fp->insn[fpc->inst_offset] |= (unit << NV30_FP_OP_TEX_UNIT_SHIFT); + fp->samplers |= (1 << unit); +} + +static INLINE struct nv30_sreg +tgsi_src(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc) +{ + struct nv30_sreg src; + + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + src = nv30_sr(NV30SR_INPUT, + fpc->attrib_map[fsrc->SrcRegister.Index]); + break; + case TGSI_FILE_CONSTANT: + src = constant(fpc, fsrc->SrcRegister.Index, NULL); + break; + case TGSI_FILE_IMMEDIATE: + assert(fsrc->SrcRegister.Index < fpc->nr_imm); + src = fpc->imm[fsrc->SrcRegister.Index]; + break; + case TGSI_FILE_TEMPORARY: + src = nv30_sr(NV30SR_TEMP, fsrc->SrcRegister.Index + 1); + if (fpc->high_temp < src.index) + fpc->high_temp = src.index; + break; + /* This is clearly insane, but gallium hands us shaders like this. + * Luckily fragprog results are just temp regs.. + */ + case TGSI_FILE_OUTPUT: + if (fsrc->SrcRegister.Index == fpc->colour_id) + return nv30_sr(NV30SR_OUTPUT, 0); + else + return nv30_sr(NV30SR_OUTPUT, 1); + break; + default: + NOUVEAU_ERR("bad src file\n"); + break; + } + + src.abs = fsrc->SrcRegisterExtMod.Absolute; + src.negate = fsrc->SrcRegister.Negate; + src.swz[0] = fsrc->SrcRegister.SwizzleX; + src.swz[1] = fsrc->SrcRegister.SwizzleY; + src.swz[2] = fsrc->SrcRegister.SwizzleZ; + src.swz[3] = fsrc->SrcRegister.SwizzleW; + return src; +} + +static INLINE struct nv30_sreg +tgsi_dst(struct nv30_fpc *fpc, const struct tgsi_full_dst_register *fdst) { + int idx; + + switch (fdst->DstRegister.File) { + case TGSI_FILE_OUTPUT: + if (fdst->DstRegister.Index == fpc->colour_id) + return nv30_sr(NV30SR_OUTPUT, 0); + else + return nv30_sr(NV30SR_OUTPUT, 1); + break; + case TGSI_FILE_TEMPORARY: + idx = fdst->DstRegister.Index + 1; + if (fpc->high_temp < idx) + fpc->high_temp = idx; + return nv30_sr(NV30SR_TEMP, idx); + case TGSI_FILE_NULL: + return nv30_sr(NV30SR_NONE, 0); + default: + NOUVEAU_ERR("bad dst file %d\n", fdst->DstRegister.File); + return nv30_sr(NV30SR_NONE, 0); + } +} + +static INLINE int +tgsi_mask(uint tgsi) +{ + int mask = 0; + + if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; + if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; + if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; + if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; + return mask; +} + +static boolean +src_native_swz(struct nv30_fpc *fpc, const struct tgsi_full_src_register *fsrc, + struct nv30_sreg *src) +{ + const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0); + struct nv30_sreg tgsi = tgsi_src(fpc, fsrc); + uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0; + uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX, + fsrc->SrcRegisterExtSwz.NegateY, + fsrc->SrcRegisterExtSwz.NegateZ, + fsrc->SrcRegisterExtSwz.NegateW }; + uint c; + + for (c = 0; c < 4; c++) { + switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + mask |= (1 << c); + break; + case TGSI_EXTSWIZZLE_ZERO: + zero_mask |= (1 << c); + tgsi.swz[c] = SWZ_X; + break; + case TGSI_EXTSWIZZLE_ONE: + one_mask |= (1 << c); + tgsi.swz[c] = SWZ_X; + break; + default: + assert(0); + } + + if (!tgsi.negate && neg[c]) + neg_mask |= (1 << c); + } + + if (mask == MASK_ALL && !neg_mask) + return TRUE; + + *src = temp(fpc); + + if (mask) + arith(fpc, 0, MOV, *src, mask, tgsi, none, none); + + if (zero_mask) + arith(fpc, 0, SFL, *src, zero_mask, *src, none, none); + + if (one_mask) + arith(fpc, 0, STR, *src, one_mask, *src, none, none); + + if (neg_mask) { + struct nv30_sreg one = temp(fpc); + arith(fpc, 0, STR, one, neg_mask, one, none, none); + arith(fpc, 0, MUL, *src, neg_mask, *src, neg(one), none); + } + + return FALSE; +} + +static boolean +nv30_fragprog_parse_instruction(struct nv30_fpc *fpc, + const struct tgsi_full_instruction *finst) +{ + const struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0); + struct nv30_sreg src[3], dst, tmp; + int mask, sat, unit = 0; + int ai = -1, ci = -1; + int i; + + if (finst->Instruction.Opcode == TGSI_OPCODE_END) + return TRUE; + + fpc->temp_temp_count = 0; + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->FullSrcRegisters[i]; + if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { + src[i] = tgsi_src(fpc, fsrc); + } + } + + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->FullSrcRegisters[i]; + + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + case TGSI_FILE_CONSTANT: + case TGSI_FILE_TEMPORARY: + if (!src_native_swz(fpc, fsrc, &src[i])) + continue; + break; + default: + break; + } + + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + if (ai == -1 || ai == fsrc->SrcRegister.Index) { + ai = fsrc->SrcRegister.Index; + src[i] = tgsi_src(fpc, fsrc); + } else { + NOUVEAU_MSG("extra src attr %d\n", + fsrc->SrcRegister.Index); + src[i] = temp(fpc); + arith(fpc, 0, MOV, src[i], MASK_ALL, + tgsi_src(fpc, fsrc), none, none); + } + break; + case TGSI_FILE_CONSTANT: + case TGSI_FILE_IMMEDIATE: + if (ci == -1 || ci == fsrc->SrcRegister.Index) { + ci = fsrc->SrcRegister.Index; + src[i] = tgsi_src(fpc, fsrc); + } else { + src[i] = temp(fpc); + arith(fpc, 0, MOV, src[i], MASK_ALL, + tgsi_src(fpc, fsrc), none, none); + } + break; + case TGSI_FILE_TEMPORARY: + /* handled above */ + break; + case TGSI_FILE_SAMPLER: + unit = fsrc->SrcRegister.Index; + break; + case TGSI_FILE_OUTPUT: + break; + default: + NOUVEAU_ERR("bad src file\n"); + return FALSE; + } + } + + dst = tgsi_dst(fpc, &finst->FullDstRegisters[0]); + mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); + sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE); + + switch (finst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + arith(fpc, sat, MOV, dst, mask, abs(src[0]), none, none); + break; + case TGSI_OPCODE_ADD: + arith(fpc, sat, ADD, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_CMP: + tmp = temp(fpc); + arith(fpc, sat, MOV, dst, mask, src[2], none, none); + tmp.cc_update = 1; + arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none); + dst.cc_test = NV30_VP_INST_COND_LT; + arith(fpc, sat, MOV, dst, mask, src[1], none, none); + break; + case TGSI_OPCODE_COS: + arith(fpc, sat, COS, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_DP3: + arith(fpc, sat, DP3, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DP4: + arith(fpc, sat, DP4, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DPH: + tmp = temp(fpc); + arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[1], none); + arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X), + swz(src[1], W, W, W, W), none); + break; + case TGSI_OPCODE_DST: + arith(fpc, sat, DST, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_EX2: + arith(fpc, sat, EX2, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_FLR: + arith(fpc, sat, FLR, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_FRC: + arith(fpc, sat, FRC, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_KILP: + arith(fpc, 0, KIL, none, 0, none, none, none); + break; + case TGSI_OPCODE_KIL: + dst = nv30_sr(NV30SR_NONE, 0); + dst.cc_update = 1; + arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none); + dst.cc_update = 0; dst.cc_test = NV30_FP_OP_COND_LT; + arith(fpc, 0, KIL, dst, 0, none, none, none); + break; + case TGSI_OPCODE_LG2: + arith(fpc, sat, LG2, dst, mask, src[0], none, none); + break; +// case TGSI_OPCODE_LIT: + case TGSI_OPCODE_LRP: + arith(fpc, sat, LRP, dst, mask, src[0], src[1], src[2]); + break; + case TGSI_OPCODE_MAD: + arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]); + break; + case TGSI_OPCODE_MAX: + arith(fpc, sat, MAX, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MIN: + arith(fpc, sat, MIN, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MOV: + arith(fpc, sat, MOV, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_MUL: + arith(fpc, sat, MUL, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_NOISE1: + case TGSI_OPCODE_NOISE2: + case TGSI_OPCODE_NOISE3: + case TGSI_OPCODE_NOISE4: + arith(fpc, sat, SFL, dst, mask, none, none, none); + break; + case TGSI_OPCODE_POW: + arith(fpc, sat, POW, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_RCP: + arith(fpc, sat, RCP, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_RET: + assert(0); + break; + case TGSI_OPCODE_RFL: + arith(fpc, 0, RFL, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_RSQ: + arith(fpc, sat, RSQ, dst, mask, abs(swz(src[0], X, X, X, X)), none, none); + break; + case TGSI_OPCODE_SCS: + if (mask & MASK_X) { + arith(fpc, sat, COS, dst, MASK_X, + swz(src[0], X, X, X, X), none, none); + } + if (mask & MASK_Y) { + arith(fpc, sat, SIN, dst, MASK_Y, + swz(src[0], X, X, X, X), none, none); + } + break; + case TGSI_OPCODE_SIN: + arith(fpc, sat, SIN, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_SGE: + arith(fpc, sat, SGE, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SGT: + arith(fpc, sat, SGT, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SLT: + arith(fpc, sat, SLT, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SUB: + arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none); + break; + case TGSI_OPCODE_TEX: + tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_TXB: + tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_TXP: + tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_XPD: + tmp = temp(fpc); + arith(fpc, 0, MUL, tmp, mask, + swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); + arith(fpc, sat, MAD, dst, (mask & ~MASK_W), + swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), + neg(tmp)); + break; + default: + NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); + return FALSE; + } + + return TRUE; +} + +static boolean +nv30_fragprog_parse_decl_attrib(struct nv30_fpc *fpc, + const struct tgsi_full_declaration *fdec) +{ + int hw; + + switch (fdec->Semantic.SemanticName) { + case TGSI_SEMANTIC_POSITION: + hw = NV30_FP_OP_INPUT_SRC_POSITION; + break; + case TGSI_SEMANTIC_COLOR: + if (fdec->Semantic.SemanticIndex == 0) { + hw = NV30_FP_OP_INPUT_SRC_COL0; + } else + if (fdec->Semantic.SemanticIndex == 1) { + hw = NV30_FP_OP_INPUT_SRC_COL1; + } else { + NOUVEAU_ERR("bad colour semantic index\n"); + return FALSE; + } + break; + case TGSI_SEMANTIC_FOG: + hw = NV30_FP_OP_INPUT_SRC_FOGC; + break; + case TGSI_SEMANTIC_GENERIC: + if (fdec->Semantic.SemanticIndex <= 7) { + hw = NV30_FP_OP_INPUT_SRC_TC(fdec->Semantic. + SemanticIndex); + } else { + NOUVEAU_ERR("bad generic semantic index\n"); + return FALSE; + } + break; + default: + NOUVEAU_ERR("bad input semantic\n"); + return FALSE; + } + + fpc->attrib_map[fdec->DeclarationRange.First] = hw; + return TRUE; +} + +static boolean +nv30_fragprog_parse_decl_output(struct nv30_fpc *fpc, + const struct tgsi_full_declaration *fdec) +{ + switch (fdec->Semantic.SemanticName) { + case TGSI_SEMANTIC_POSITION: + fpc->depth_id = fdec->DeclarationRange.First; + break; + case TGSI_SEMANTIC_COLOR: + fpc->colour_id = fdec->DeclarationRange.First; + break; + default: + NOUVEAU_ERR("bad output semantic\n"); + return FALSE; + } + + return TRUE; +} + +static boolean +nv30_fragprog_prepare(struct nv30_fpc *fpc) +{ + struct tgsi_parse_context p; + /*int high_temp = -1, i;*/ + + tgsi_parse_init(&p, fpc->fp->pipe.tokens); + while (!tgsi_parse_end_of_tokens(&p)) { + const union tgsi_full_token *tok = &p.FullToken; + + tgsi_parse_token(&p); + switch(tok->Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + { + const struct tgsi_full_declaration *fdec; + fdec = &p.FullToken.FullDeclaration; + switch (fdec->Declaration.File) { + case TGSI_FILE_INPUT: + if (!nv30_fragprog_parse_decl_attrib(fpc, fdec)) + goto out_err; + break; + case TGSI_FILE_OUTPUT: + if (!nv30_fragprog_parse_decl_output(fpc, fdec)) + goto out_err; + break; + /*case TGSI_FILE_TEMPORARY: + if (fdec->DeclarationRange.Last > high_temp) { + high_temp = + fdec->DeclarationRange.Last; + } + break;*/ + default: + break; + } + } + break; + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + struct tgsi_full_immediate *imm; + float vals[4]; + + imm = &p.FullToken.FullImmediate; + assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); + assert(fpc->nr_imm < MAX_IMM); + + vals[0] = imm->u.ImmediateFloat32[0].Float; + vals[1] = imm->u.ImmediateFloat32[1].Float; + vals[2] = imm->u.ImmediateFloat32[2].Float; + vals[3] = imm->u.ImmediateFloat32[3].Float; + fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals); + } + break; + default: + break; + } + } + tgsi_parse_free(&p); + + /*if (++high_temp) { + fpc->r_temp = CALLOC(high_temp, sizeof(struct nv30_sreg)); + for (i = 0; i < high_temp; i++) + fpc->r_temp[i] = temp(fpc); + fpc->r_temps_discard = 0; + }*/ + + return TRUE; + +out_err: + /*if (fpc->r_temp) + FREE(fpc->r_temp);*/ + tgsi_parse_free(&p); + return FALSE; +} + +static void +nv30_fragprog_translate(struct nv30_context *nv30, + struct nv30_fragment_program *fp) +{ + struct tgsi_parse_context parse; + struct nv30_fpc *fpc = NULL; + + tgsi_dump(fp->pipe.tokens,0); + + fpc = CALLOC(1, sizeof(struct nv30_fpc)); + if (!fpc) + return; + fpc->fp = fp; + fpc->high_temp = -1; + fpc->num_regs = 2; + + if (!nv30_fragprog_prepare(fpc)) { + FREE(fpc); + return; + } + + tgsi_parse_init(&parse, fp->pipe.tokens); + + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + const struct tgsi_full_instruction *finst; + + finst = &parse.FullToken.FullInstruction; + if (!nv30_fragprog_parse_instruction(fpc, finst)) + goto out_err; + } + break; + default: + break; + } + } + + fp->fp_control |= (fpc->num_regs-1)/2; + fp->fp_reg_control = (1<<16)|0x4; + + /* Terminate final instruction */ + fp->insn[fpc->inst_offset] |= 0x00000001; + + /* Append NOP + END instruction, may or may not be necessary. */ + fpc->inst_offset = fp->insn_len; + grow_insns(fpc, 4); + fp->insn[fpc->inst_offset + 0] = 0x00000001; + fp->insn[fpc->inst_offset + 1] = 0x00000000; + fp->insn[fpc->inst_offset + 2] = 0x00000000; + fp->insn[fpc->inst_offset + 3] = 0x00000000; + + fp->translated = TRUE; + fp->on_hw = FALSE; +out_err: + tgsi_parse_free(&parse); + FREE(fpc); +} + +static void +nv30_fragprog_upload(struct nv30_context *nv30, + struct nv30_fragment_program *fp) +{ + struct pipe_winsys *ws = nv30->pipe.winsys; + const uint32_t le = 1; + uint32_t *map; + int i; + + map = ws->buffer_map(ws, fp->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + +#if 0 + for (i = 0; i < fp->insn_len; i++) { + fflush(stdout); fflush(stderr); + NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]); + fflush(stdout); fflush(stderr); + } +#endif + + if ((*(const uint8_t *)&le)) { + for (i = 0; i < fp->insn_len; i++) { + map[i] = fp->insn[i]; + } + } else { + /* Weird swapping for big-endian chips */ + for (i = 0; i < fp->insn_len; i++) { + map[i] = ((fp->insn[i] & 0xffff) << 16) | + ((fp->insn[i] >> 16) & 0xffff); + } + } + + ws->buffer_unmap(ws, fp->buffer); +} + +static boolean +nv30_fragprog_validate(struct nv30_context *nv30) +{ + struct nv30_fragment_program *fp = nv30->fragprog; + struct pipe_buffer *constbuf = + nv30->constbuf[PIPE_SHADER_FRAGMENT]; + struct pipe_winsys *ws = nv30->pipe.winsys; + struct nouveau_stateobj *so; + boolean new_consts = FALSE; + int i; + + if (fp->translated) + goto update_constants; + + /*nv30->fallback_swrast &= ~NV30_NEW_FRAGPROG;*/ + nv30_fragprog_translate(nv30, fp); + if (!fp->translated) { + /*nv30->fallback_swrast |= NV30_NEW_FRAGPROG;*/ + return FALSE; + } + + fp->buffer = ws->buffer_create(ws, 0x100, 0, fp->insn_len * 4); + nv30_fragprog_upload(nv30, fp); + + so = so_new(8, 1); + so_method(so, nv30->screen->rankine, NV34TCL_FP_ACTIVE_PROGRAM, 1); + so_reloc (so, fp->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | + NOUVEAU_BO_RD | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, + NV34TCL_FP_ACTIVE_PROGRAM_DMA0, NV34TCL_FP_ACTIVE_PROGRAM_DMA1); + so_method(so, nv30->screen->rankine, NV34TCL_FP_CONTROL, 1); + so_data (so, fp->fp_control); + so_method(so, nv30->screen->rankine, NV34TCL_FP_REG_CONTROL, 1); + so_data (so, fp->fp_reg_control); + so_method(so, nv30->screen->rankine, NV34TCL_TX_UNITS_ENABLE, 1); + so_data (so, fp->samplers); + so_ref(so, &fp->so); + +update_constants: + if (fp->nr_consts) { + float *map; + + map = ws->buffer_map(ws, constbuf, PIPE_BUFFER_USAGE_CPU_READ); + for (i = 0; i < fp->nr_consts; i++) { + struct nv30_fragment_program_data *fpd = &fp->consts[i]; + uint32_t *p = &fp->insn[fpd->offset]; + uint32_t *cb = (uint32_t *)&map[fpd->index * 4]; + + if (!memcmp(p, cb, 4 * sizeof(float))) + continue; + memcpy(p, cb, 4 * sizeof(float)); + new_consts = TRUE; + } + ws->buffer_unmap(ws, constbuf); + + if (new_consts) + nv30_fragprog_upload(nv30, fp); + } + + if (new_consts || fp->so != nv30->state.hw[NV30_STATE_FRAGPROG]) { + so_ref(fp->so, &nv30->state.hw[NV30_STATE_FRAGPROG]); + return TRUE; + } + + return FALSE; +} + +void +nv30_fragprog_destroy(struct nv30_context *nv30, + struct nv30_fragment_program *fp) +{ + if (fp->insn_len) + FREE(fp->insn); +} + +struct nv30_state_entry nv30_state_fragprog = { + .validate = nv30_fragprog_validate, + .dirty = { + .pipe = NV30_NEW_FRAGPROG, + .hw = NV30_STATE_FRAGPROG + } +}; diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c new file mode 100644 index 0000000000..b1d2663af3 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_fragtex.c @@ -0,0 +1,163 @@ +#include "nv30_context.h" +#include "nouveau/nouveau_util.h" + +#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w) \ +{ \ + TRUE, \ + PIPE_FORMAT_##m, \ + NV34TCL_TX_FORMAT_FORMAT_##tf, \ + (NV34TCL_TX_SWIZZLE_S0_X_##ts0x | NV34TCL_TX_SWIZZLE_S0_Y_##ts0y | \ + NV34TCL_TX_SWIZZLE_S0_Z_##ts0z | NV34TCL_TX_SWIZZLE_S0_W_##ts0w | \ + NV34TCL_TX_SWIZZLE_S1_X_##ts1x | NV34TCL_TX_SWIZZLE_S1_Y_##ts1y | \ + NV34TCL_TX_SWIZZLE_S1_Z_##ts1z | NV34TCL_TX_SWIZZLE_S1_W_##ts1w) \ +} + +struct nv30_texture_format { + boolean defined; + uint pipe; + int format; + int swizzle; +}; + +static struct nv30_texture_format +nv30_texture_formats[] = { + _(A8R8G8B8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W), + _(A1R5G5B5_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W), + _(A4R4G4B4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W), + _(R5G6B5_UNORM , R5G6B5 , S1, S1, S1, ONE, X, Y, Z, W), + _(L8_UNORM , L8 , S1, S1, S1, ONE, X, X, X, X), + _(A8_UNORM , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X), + _(I8_UNORM , L8 , S1, S1, S1, S1, X, X, X, X), + _(A8L8_UNORM , A8L8 , S1, S1, S1, S1, X, X, X, Y), +// _(Z16_UNORM , Z16 , S1, S1, S1, ONE, X, X, X, X), +// _(Z24S8_UNORM , Z24 , S1, S1, S1, ONE, X, X, X, X), + _(DXT1_RGB , DXT1 , S1, S1, S1, ONE, X, Y, Z, W), + _(DXT1_RGBA , DXT1 , S1, S1, S1, S1, X, Y, Z, W), + _(DXT3_RGBA , DXT3 , S1, S1, S1, S1, X, Y, Z, W), + _(DXT5_RGBA , DXT5 , S1, S1, S1, S1, X, Y, Z, W), + {}, +}; + +static struct nv30_texture_format * +nv30_fragtex_format(uint pipe_format) +{ + struct nv30_texture_format *tf = nv30_texture_formats; + char fs[128]; + + while (tf->defined) { + if (tf->pipe == pipe_format) + return tf; + tf++; + } + + NOUVEAU_ERR("unknown texture format %s\n", pf_name(pipe_format)); + return NULL; +} + + +static struct nouveau_stateobj * +nv30_fragtex_build(struct nv30_context *nv30, int unit) +{ + struct nv30_sampler_state *ps = nv30->tex_sampler[unit]; + struct nv30_miptree *nv30mt = nv30->tex_miptree[unit]; + struct pipe_texture *pt = &nv30mt->base; + struct nv30_texture_format *tf; + struct nouveau_stateobj *so; + uint32_t txf, txs , txp; + unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; + + tf = nv30_fragtex_format(pt->format); + if (!tf) + assert(0); + + txf = tf->format; + txf |= ((pt->last_level>0) ? NV34TCL_TX_FORMAT_MIPMAP : 0); + txf |= log2i(pt->width[0]) << 20; + txf |= log2i(pt->height[0]) << 24; + txf |= log2i(pt->depth[0]) << 28; + txf |= NV34TCL_TX_FORMAT_NO_BORDER | 0x10000; + + switch (pt->target) { + case PIPE_TEXTURE_CUBE: + txf |= NV34TCL_TX_FORMAT_CUBIC; + /* fall-through */ + case PIPE_TEXTURE_2D: + txf |= NV34TCL_TX_FORMAT_DIMS_2D; + break; + case PIPE_TEXTURE_3D: + txf |= NV34TCL_TX_FORMAT_DIMS_3D; + break; + case PIPE_TEXTURE_1D: + txf |= NV34TCL_TX_FORMAT_DIMS_1D; + break; + default: + NOUVEAU_ERR("Unknown target %d\n", pt->target); + return NULL; + } + + if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { + txp = 0; + } else { + txp = nv30mt->level[0].pitch; + txf |= (1<<13) /*FIXME: NV34TCL_TX_FORMAT_LINEAR ? */; + } + + txs = tf->swizzle; + + so = so_new(16, 2); + so_method(so, nv30->screen->rankine, NV34TCL_TX_OFFSET(unit), 8); + so_reloc (so, nv30mt->buffer, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0); + so_reloc (so, nv30mt->buffer, txf, tex_flags | NOUVEAU_BO_OR, + NV34TCL_TX_FORMAT_DMA0, NV34TCL_TX_FORMAT_DMA1); + so_data (so, ps->wrap); + so_data (so, NV34TCL_TX_ENABLE_ENABLE | ps->en); + so_data (so, txs); + so_data (so, ps->filt | 0x2000 /*voodoo*/); + so_data (so, (pt->width[0] << NV34TCL_TX_NPOT_SIZE_W_SHIFT) | + pt->height[0]); + so_data (so, ps->bcol); + + return so; +} + +static boolean +nv30_fragtex_validate(struct nv30_context *nv30) +{ + struct nv30_fragment_program *fp = nv30->fragprog; + struct nv30_state *state = &nv30->state; + struct nouveau_stateobj *so; + unsigned samplers, unit; + + samplers = state->fp_samplers & ~fp->samplers; + while (samplers) { + unit = ffs(samplers) - 1; + samplers &= ~(1 << unit); + + so = so_new(2, 0); + so_method(so, nv30->screen->rankine, NV34TCL_TX_ENABLE(unit), 1); + so_data (so, 0); + so_ref(so, &nv30->state.hw[NV30_STATE_FRAGTEX0 + unit]); + state->dirty |= (1ULL << (NV30_STATE_FRAGTEX0 + unit)); + } + + samplers = nv30->dirty_samplers & fp->samplers; + while (samplers) { + unit = ffs(samplers) - 1; + samplers &= ~(1 << unit); + + so = nv30_fragtex_build(nv30, unit); + so_ref(so, &nv30->state.hw[NV30_STATE_FRAGTEX0 + unit]); + state->dirty |= (1ULL << (NV30_STATE_FRAGTEX0 + unit)); + } + + nv30->state.fp_samplers = fp->samplers; + return FALSE; +} + +struct nv30_state_entry nv30_state_fragtex = { + .validate = nv30_fragtex_validate, + .dirty = { + .pipe = NV30_NEW_SAMPLER | NV30_NEW_FRAGPROG, + .hw = 0 + } +}; diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c new file mode 100644 index 0000000000..b11ed8c24e --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_miptree.c @@ -0,0 +1,235 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "nv30_context.h" + +static void +nv30_miptree_layout(struct nv30_miptree *nv30mt) +{ + struct pipe_texture *pt = &nv30mt->base; + uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0]; + uint offset = 0; + int nr_faces, l, f; + uint wide_pitch = pt->tex_usage & (PIPE_TEXTURE_USAGE_SAMPLER | + PIPE_TEXTURE_USAGE_DEPTH_STENCIL | + PIPE_TEXTURE_USAGE_RENDER_TARGET | + PIPE_TEXTURE_USAGE_DISPLAY_TARGET | + PIPE_TEXTURE_USAGE_PRIMARY); + + if (pt->target == PIPE_TEXTURE_CUBE) { + nr_faces = 6; + } else + if (pt->target == PIPE_TEXTURE_3D) { + nr_faces = pt->depth[0]; + } else { + nr_faces = 1; + } + + for (l = 0; l <= pt->last_level; l++) { + pt->width[l] = width; + pt->height[l] = height; + pt->depth[l] = depth; + pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); + pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); + + if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) + nv30mt->level[l].pitch = align(pt->width[0] * pt->block.size, 64); + else + nv30mt->level[l].pitch = pt->width[l] * pt->block.size; + + nv30mt->level[l].image_offset = + CALLOC(nr_faces, sizeof(unsigned)); + + width = MAX2(1, width >> 1); + height = MAX2(1, height >> 1); + depth = MAX2(1, depth >> 1); + } + + for (f = 0; f < nr_faces; f++) { + for (l = 0; l < pt->last_level; l++) { + nv30mt->level[l].image_offset[f] = offset; + + if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR) && + pt->width[l + 1] > 1 && pt->height[l + 1] > 1) + offset += align(nv30mt->level[l].pitch * pt->height[l], 64); + else + offset += nv30mt->level[l].pitch * pt->height[l]; + } + + nv30mt->level[l].image_offset[f] = offset; + offset += nv30mt->level[l].pitch * pt->height[l]; + } + + nv30mt->total_size = offset; +} + +static struct pipe_texture * +nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) +{ + struct pipe_winsys *ws = pscreen->winsys; + struct nv30_miptree *mt; + + mt = MALLOC(sizeof(struct nv30_miptree)); + if (!mt) + return NULL; + mt->base = *pt; + mt->base.refcount = 1; + mt->base.screen = pscreen; + mt->shadow_tex = NULL; + mt->shadow_surface = NULL; + + /* Swizzled textures must be POT */ + if (pt->width[0] & (pt->width[0] - 1) || + pt->height[0] & (pt->height[0] - 1)) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + else + if (pt->tex_usage & (PIPE_TEXTURE_USAGE_PRIMARY | + PIPE_TEXTURE_USAGE_DISPLAY_TARGET | + PIPE_TEXTURE_USAGE_DEPTH_STENCIL)) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + else + if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + else { + switch (pt->format) { + /* TODO: Figure out which formats can be swizzled */ + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_R16_SNORM: + { + if (debug_get_bool_option("NOUVEAU_NO_SWIZZLE", FALSE)) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + break; + } + default: + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + } + } + + nv30_miptree_layout(mt); + + mt->buffer = ws->buffer_create(ws, 256, + PIPE_BUFFER_USAGE_PIXEL | + NOUVEAU_BUFFER_USAGE_TEXTURE, + mt->total_size); + if (!mt->buffer) { + FREE(mt); + return NULL; + } + + return &mt->base; +} + +static struct pipe_texture * +nv30_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, + const unsigned *stride, struct pipe_buffer *pb) +{ + struct nv30_miptree *mt; + + /* Only supports 2D, non-mipmapped textures for the moment */ + if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || + pt->depth[0] != 1) + return NULL; + + mt = CALLOC_STRUCT(nv30_miptree); + if (!mt) + return NULL; + + mt->base = *pt; + mt->base.refcount = 1; + mt->base.screen = pscreen; + mt->level[0].pitch = stride[0]; + mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); + + pipe_buffer_reference(pscreen, &mt->buffer, pb); + return &mt->base; +} + +static void +nv30_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **ppt) +{ + struct pipe_texture *pt = *ppt; + struct nv30_miptree *mt = (struct nv30_miptree *)pt; + int l; + + *ppt = NULL; + if (--pt->refcount) + return; + + pipe_buffer_reference(pscreen, &mt->buffer, NULL); + for (l = 0; l <= pt->last_level; l++) { + if (mt->level[l].image_offset) + FREE(mt->level[l].image_offset); + } + + if (mt->shadow_tex) { + if (mt->shadow_surface) + pscreen->tex_surface_release(pscreen, &mt->shadow_surface); + nv30_miptree_release(pscreen, &mt->shadow_tex); + } + + FREE(mt); +} + +static struct pipe_surface * +nv30_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice, + unsigned flags) +{ + struct nv30_miptree *nv30mt = (struct nv30_miptree *)pt; + struct pipe_surface *ps; + + ps = CALLOC_STRUCT(pipe_surface); + if (!ps) + return NULL; + pipe_texture_reference(&ps->texture, pt); + ps->format = pt->format; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->block = pt->block; + ps->nblocksx = pt->nblocksx[level]; + ps->nblocksy = pt->nblocksy[level]; + ps->stride = nv30mt->level[level].pitch; + ps->usage = flags; + ps->status = PIPE_SURFACE_STATUS_DEFINED; + ps->refcount = 1; + ps->face = face; + ps->level = level; + ps->zslice = zslice; + + if (pt->target == PIPE_TEXTURE_CUBE) { + ps->offset = nv30mt->level[level].image_offset[face]; + } else + if (pt->target == PIPE_TEXTURE_3D) { + ps->offset = nv30mt->level[level].image_offset[zslice]; + } else { + ps->offset = nv30mt->level[level].image_offset[0]; + } + + return ps; +} + +static void +nv30_miptree_surface_del(struct pipe_screen *pscreen, + struct pipe_surface **psurface) +{ + struct pipe_surface *ps = *psurface; + + *psurface = NULL; + if (--ps->refcount > 0) + return; + + pipe_texture_reference(&ps->texture, NULL); + FREE(ps); +} + +void +nv30_screen_init_miptree_functions(struct pipe_screen *pscreen) +{ + pscreen->texture_create = nv30_miptree_create; + pscreen->texture_blanket = nv30_miptree_blanket; + pscreen->texture_release = nv30_miptree_release; + pscreen->get_tex_surface = nv30_miptree_surface_new; + pscreen->tex_surface_release = nv30_miptree_surface_del; +} diff --git a/src/gallium/drivers/nv30/nv30_query.c b/src/gallium/drivers/nv30/nv30_query.c new file mode 100644 index 0000000000..2f974cf5c4 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_query.c @@ -0,0 +1,122 @@ +#include "pipe/p_context.h" + +#include "nv30_context.h" + +struct nv30_query { + struct nouveau_resource *object; + unsigned type; + boolean ready; + uint64_t result; +}; + +static INLINE struct nv30_query * +nv30_query(struct pipe_query *pipe) +{ + return (struct nv30_query *)pipe; +} + +static struct pipe_query * +nv30_query_create(struct pipe_context *pipe, unsigned query_type) +{ + struct nv30_query *q; + + q = CALLOC(1, sizeof(struct nv30_query)); + q->type = query_type; + + return (struct pipe_query *)q; +} + +static void +nv30_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_query *q = nv30_query(pq); + + if (q->object) + nv30->nvws->res_free(&q->object); + FREE(q); +} + +static void +nv30_query_begin(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_query *q = nv30_query(pq); + + assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); + + /* Happens when end_query() is called, then another begin_query() + * without querying the result in-between. For now we'll wait for + * the existing query to notify completion, but it could be better. + */ + if (q->object) { + uint64_t tmp; + pipe->get_query_result(pipe, pq, 1, &tmp); + } + + if (nv30->nvws->res_alloc(nv30->screen->query_heap, 1, NULL, &q->object)) + assert(0); + nv30->nvws->notifier_reset(nv30->screen->query, q->object->start); + + BEGIN_RING(rankine, NV34TCL_QUERY_RESET, 1); + OUT_RING (1); + BEGIN_RING(rankine, NV34TCL_QUERY_UNK17CC, 1); + OUT_RING (1); + + q->ready = FALSE; +} + +static void +nv30_query_end(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_query *q = nv30_query(pq); + + BEGIN_RING(rankine, NV34TCL_QUERY_GET, 1); + OUT_RING ((0x01 << NV34TCL_QUERY_GET_UNK24_SHIFT) | + ((q->object->start * 32) << NV34TCL_QUERY_GET_OFFSET_SHIFT)); + FIRE_RING(NULL); +} + +static boolean +nv30_query_result(struct pipe_context *pipe, struct pipe_query *pq, + boolean wait, uint64_t *result) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_query *q = nv30_query(pq); + struct nouveau_winsys *nvws = nv30->nvws; + + assert(q->object && q->type == PIPE_QUERY_OCCLUSION_COUNTER); + + if (!q->ready) { + unsigned status; + + status = nvws->notifier_status(nv30->screen->query, + q->object->start); + if (status != NV_NOTIFY_STATE_STATUS_COMPLETED) { + if (wait == FALSE) + return FALSE; + nvws->notifier_wait(nv30->screen->query, q->object->start, + NV_NOTIFY_STATE_STATUS_COMPLETED, + 0); + } + + q->result = nvws->notifier_retval(nv30->screen->query, + q->object->start); + q->ready = TRUE; + nvws->res_free(&q->object); + } + + *result = q->result; + return TRUE; +} + +void +nv30_init_query_functions(struct nv30_context *nv30) +{ + nv30->pipe.create_query = nv30_query_create; + nv30->pipe.destroy_query = nv30_query_destroy; + nv30->pipe.begin_query = nv30_query_begin; + nv30->pipe.end_query = nv30_query_end; + nv30->pipe.get_query_result = nv30_query_result; +} diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c new file mode 100644 index 0000000000..c97a73f0b1 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -0,0 +1,401 @@ +#include "pipe/p_screen.h" +#include "util/u_simple_screen.h" + +#include "nv30_context.h" +#include "nv30_screen.h" + +#define NV30TCL_CHIPSET_3X_MASK 0x00000003 +#define NV34TCL_CHIPSET_3X_MASK 0x00000010 +#define NV35TCL_CHIPSET_3X_MASK 0x000001e0 + +static const char * +nv30_screen_get_name(struct pipe_screen *pscreen) +{ + struct nv30_screen *screen = nv30_screen(pscreen); + struct nouveau_device *dev = screen->nvws->channel->device; + static char buffer[128]; + + snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset); + return buffer; +} + +static const char * +nv30_screen_get_vendor(struct pipe_screen *pscreen) +{ + return "nouveau"; +} + +static int +nv30_screen_get_param(struct pipe_screen *pscreen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 16; + case PIPE_CAP_NPOT_TEXTURES: + return 0; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_S3TC: + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 1; + case PIPE_CAP_POINT_SPRITE: + return 1; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 2; + case PIPE_CAP_OCCLUSION_QUERY: + return 1; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 13; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 10; + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 13; + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + return 0; + case PIPE_CAP_TEXTURE_MIRROR_REPEAT: + return 1; + case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: + return 0; + case NOUVEAU_CAP_HW_VTXBUF: + case NOUVEAU_CAP_HW_IDXBUF: + return 1; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0; + } +} + +static float +nv30_screen_get_paramf(struct pipe_screen *pscreen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 10.0; + case PIPE_CAP_MAX_POINT_WIDTH: + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 64.0; + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 8.0; + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 4.0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0.0; + } +} + +static boolean +nv30_screen_surface_format_supported(struct pipe_screen *pscreen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, unsigned geom_flags) +{ + if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z16_UNORM: + return TRUE; + default: + break; + } + } else { + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_A1R5G5B5_UNORM: + case PIPE_FORMAT_A4R4G4B4_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_I8_UNORM: + case PIPE_FORMAT_A8L8_UNORM: + case PIPE_FORMAT_Z16_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + return TRUE; + default: + break; + } + } + + return FALSE; +} + +static struct pipe_buffer * +nv30_surface_buffer(struct pipe_surface *surf) +{ + struct nv30_miptree *mt = (struct nv30_miptree *)surf->texture; + + return mt->buffer; +} + +static void * +nv30_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, + unsigned flags ) +{ + struct pipe_winsys *ws = screen->winsys; + struct pipe_surface *surface_to_map; + void *map; + + if (!(surface->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { + struct nv30_miptree *mt = (struct nv30_miptree *)surface->texture; + + if (!mt->shadow_tex) { + unsigned old_tex_usage = surface->texture->tex_usage; + surface->texture->tex_usage = NOUVEAU_TEXTURE_USAGE_LINEAR | + PIPE_TEXTURE_USAGE_DYNAMIC; + mt->shadow_tex = screen->texture_create(screen, surface->texture); + surface->texture->tex_usage = old_tex_usage; + + assert(mt->shadow_tex->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR); + } + + mt->shadow_surface = screen->get_tex_surface + ( + screen, mt->shadow_tex, + surface->face, surface->level, surface->zslice, + surface->usage + ); + + surface_to_map = mt->shadow_surface; + } + else + surface_to_map = surface; + + assert(surface_to_map); + + map = ws->buffer_map(ws, nv30_surface_buffer(surface_to_map), flags); + if (!map) + return NULL; + + return map + surface_to_map->offset; +} + +static void +nv30_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) +{ + struct pipe_winsys *ws = screen->winsys; + struct pipe_surface *surface_to_unmap; + + /* TODO: Copy from shadow just before push buffer is flushed instead. + There are probably some programs that map/unmap excessively + before rendering. */ + if (!(surface->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { + struct nv30_miptree *mt = (struct nv30_miptree *)surface->texture; + + assert(mt->shadow_tex); + + surface_to_unmap = mt->shadow_surface; + } + else + surface_to_unmap = surface; + + assert(surface_to_unmap); + + ws->buffer_unmap(ws, nv30_surface_buffer(surface_to_unmap)); + + if (surface_to_unmap != surface) { + struct nv30_screen *nvscreen = nv30_screen(screen); + + nvscreen->eng2d->copy(nvscreen->eng2d, surface, 0, 0, + surface_to_unmap, 0, 0, + surface->width, surface->height); + + screen->tex_surface_release(screen, &surface_to_unmap); + } +} + +static void +nv30_screen_destroy(struct pipe_screen *pscreen) +{ + struct nv30_screen *screen = nv30_screen(pscreen); + struct nouveau_winsys *nvws = screen->nvws; + + nvws->res_free(&screen->vp_exec_heap); + nvws->res_free(&screen->vp_data_heap); + nvws->res_free(&screen->query_heap); + nvws->notifier_free(&screen->query); + nvws->notifier_free(&screen->sync); + nvws->grobj_free(&screen->rankine); + + FREE(pscreen); +} + +struct pipe_screen * +nv30_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) +{ + struct nv30_screen *screen = CALLOC_STRUCT(nv30_screen); + struct nouveau_stateobj *so; + unsigned rankine_class = 0; + unsigned chipset = nvws->channel->device->chipset; + int ret, i; + + if (!screen) + return NULL; + screen->nvws = nvws; + + /* 2D engine setup */ + screen->eng2d = nv04_surface_2d_init(nvws); + screen->eng2d->buf = nv30_surface_buffer; + + /* 3D object */ + switch (chipset & 0xf0) { + case 0x30: + if (NV30TCL_CHIPSET_3X_MASK & (1 << (chipset & 0x0f))) + rankine_class = 0x0397; + else + if (NV34TCL_CHIPSET_3X_MASK & (1 << (chipset & 0x0f))) + rankine_class = 0x0697; + else + if (NV35TCL_CHIPSET_3X_MASK & (1 << (chipset & 0x0f))) + rankine_class = 0x0497; + break; + default: + break; + } + + if (!rankine_class) { + NOUVEAU_ERR("Unknown nv3x chipset: nv%02x\n", chipset); + return NULL; + } + + ret = nvws->grobj_alloc(nvws, rankine_class, &screen->rankine); + if (ret) { + NOUVEAU_ERR("Error creating 3D object: %d\n", ret); + return FALSE; + } + + /* Notifier for sync purposes */ + ret = nvws->notifier_alloc(nvws, 1, &screen->sync); + if (ret) { + NOUVEAU_ERR("Error creating notifier object: %d\n", ret); + nv30_screen_destroy(&screen->pipe); + return NULL; + } + + /* Query objects */ + ret = nvws->notifier_alloc(nvws, 32, &screen->query); + if (ret) { + NOUVEAU_ERR("Error initialising query objects: %d\n", ret); + nv30_screen_destroy(&screen->pipe); + return NULL; + } + + ret = nvws->res_init(&screen->query_heap, 0, 32); + if (ret) { + NOUVEAU_ERR("Error initialising query object heap: %d\n", ret); + nv30_screen_destroy(&screen->pipe); + return NULL; + } + + /* Vtxprog resources */ + if (nvws->res_init(&screen->vp_exec_heap, 0, 256) || + nvws->res_init(&screen->vp_data_heap, 0, 256)) { + nv30_screen_destroy(&screen->pipe); + return NULL; + } + + /* Static rankine initialisation */ + so = so_new(128, 0); + so_method(so, screen->rankine, NV34TCL_DMA_NOTIFY, 1); + so_data (so, screen->sync->handle); + so_method(so, screen->rankine, NV34TCL_DMA_TEXTURE0, 2); + so_data (so, nvws->channel->vram->handle); + so_data (so, nvws->channel->gart->handle); + so_method(so, screen->rankine, NV34TCL_DMA_COLOR1, 1); + so_data (so, nvws->channel->vram->handle); + so_method(so, screen->rankine, NV34TCL_DMA_COLOR0, 2); + so_data (so, nvws->channel->vram->handle); + so_data (so, nvws->channel->vram->handle); + so_method(so, screen->rankine, NV34TCL_DMA_VTXBUF0, 2); + so_data (so, nvws->channel->vram->handle); + so_data (so, nvws->channel->gart->handle); +/* so_method(so, screen->rankine, NV34TCL_DMA_FENCE, 2); + so_data (so, 0); + so_data (so, screen->query->handle);*/ + so_method(so, screen->rankine, NV34TCL_DMA_IN_MEMORY7, 1); + so_data (so, nvws->channel->vram->handle); + so_method(so, screen->rankine, NV34TCL_DMA_IN_MEMORY8, 1); + so_data (so, nvws->channel->vram->handle); + + for (i=1; i<8; i++) { + so_method(so, screen->rankine, NV34TCL_VIEWPORT_CLIP_HORIZ(i), 1); + so_data (so, 0); + so_method(so, screen->rankine, NV34TCL_VIEWPORT_CLIP_VERT(i), 1); + so_data (so, 0); + } + + so_method(so, screen->rankine, 0x220, 1); + so_data (so, 1); + + so_method(so, screen->rankine, 0x03b0, 1); + so_data (so, 0x00100000); + so_method(so, screen->rankine, 0x1454, 1); + so_data (so, 0); + so_method(so, screen->rankine, 0x1d80, 1); + so_data (so, 3); + so_method(so, screen->rankine, 0x1450, 1); + so_data (so, 0x00030004); + + /* NEW */ + so_method(so, screen->rankine, 0x1e98, 1); + so_data (so, 0); + so_method(so, screen->rankine, 0x17e0, 3); + so_data (so, fui(0.0)); + so_data (so, fui(0.0)); + so_data (so, fui(1.0)); + so_method(so, screen->rankine, 0x1f80, 16); + for (i=0; i<16; i++) { + so_data (so, (i==8) ? 0x0000ffff : 0); + } + + so_method(so, screen->rankine, 0x120, 3); + so_data (so, 0); + so_data (so, 1); + so_data (so, 2); + + so_method(so, screen->rankine, 0x1d88, 1); + so_data (so, 0x00001200); + + so_method(so, screen->rankine, NV34TCL_RC_ENABLE, 1); + so_data (so, 0); + + so_method(so, screen->rankine, NV34TCL_DEPTH_RANGE_NEAR, 2); + so_data (so, fui(0.0)); + so_data (so, fui(1.0)); + + so_method(so, screen->rankine, NV34TCL_MULTISAMPLE_CONTROL, 1); + so_data (so, 0xffff0000); + + /* enables use of vp rather than fixed-function somehow */ + so_method(so, screen->rankine, 0x1e94, 1); + so_data (so, 0x13); + + so_emit(nvws, so); + so_ref(NULL, &so); + nvws->push_flush(nvws, 0, NULL); + + screen->pipe.winsys = ws; + screen->pipe.destroy = nv30_screen_destroy; + + screen->pipe.get_name = nv30_screen_get_name; + screen->pipe.get_vendor = nv30_screen_get_vendor; + screen->pipe.get_param = nv30_screen_get_param; + screen->pipe.get_paramf = nv30_screen_get_paramf; + + screen->pipe.is_format_supported = nv30_screen_surface_format_supported; + + screen->pipe.surface_map = nv30_surface_map; + screen->pipe.surface_unmap = nv30_surface_unmap; + + nv30_screen_init_miptree_functions(&screen->pipe); + u_simple_screen_init(&screen->pipe); + + return &screen->pipe; +} diff --git a/src/gallium/drivers/nv30/nv30_screen.h b/src/gallium/drivers/nv30/nv30_screen.h new file mode 100644 index 0000000000..b11e470f94 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_screen.h @@ -0,0 +1,37 @@ +#ifndef __NV30_SCREEN_H__ +#define __NV30_SCREEN_H__ + +#include "pipe/p_screen.h" +#include "nv04/nv04_surface_2d.h" + +struct nv30_screen { + struct pipe_screen pipe; + + struct nouveau_winsys *nvws; + + unsigned cur_pctx; + + /* HW graphics objects */ + struct nv04_surface_2d *eng2d; + struct nouveau_grobj *rankine; + struct nouveau_notifier *sync; + + /* Query object resources */ + struct nouveau_notifier *query; + struct nouveau_resource *query_heap; + + /* Vtxprog resources */ + struct nouveau_resource *vp_exec_heap; + struct nouveau_resource *vp_data_heap; + + /* Current 3D state of channel */ + struct nouveau_stateobj *state[NV30_STATE_MAX]; +}; + +static INLINE struct nv30_screen * +nv30_screen(struct pipe_screen *screen) +{ + return (struct nv30_screen *)screen; +} + +#endif diff --git a/src/gallium/drivers/nv30/nv30_shader.h b/src/gallium/drivers/nv30/nv30_shader.h new file mode 100644 index 0000000000..dd3a36f78f --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_shader.h @@ -0,0 +1,490 @@ +#ifndef __NV30_SHADER_H__ +#define __NV30_SHADER_H__ + +/* Vertex programs instruction set + * + * 128bit opcodes, split into 4 32-bit ones for ease of use. + * + * Non-native instructions + * ABS - MOV + NV40_VP_INST0_DEST_ABS + * POW - EX2 + MUL + LG2 + * SUB - ADD, second source negated + * SWZ - MOV + * XPD - + * + * Register access + * - Only one INPUT can be accessed per-instruction (move extras into TEMPs) + * - Only one CONST can be accessed per-instruction (move extras into TEMPs) + * + * Relative Addressing + * According to the value returned for + * MAX_PROGRAM_NATIVE_ADDRESS_REGISTERS_ARB + * + * there are only two address registers available. The destination in the + * ARL instruction is set to TEMP <n> (The temp isn't actually written). + * + * When using vanilla ARB_v_p, the proprietary driver will squish both the + * available ADDRESS regs into the first hardware reg in the X and Y + * components. + * + * To use an address reg as an index into consts, the CONST_SRC is set to + * (const_base + offset) and INDEX_CONST is set. + * + * To access the second address reg use ADDR_REG_SELECT_1. A particular + * component of the address regs is selected with ADDR_SWZ. + * + * Only one address register can be accessed per instruction. + * + * Conditional execution (see NV_vertex_program{2,3} for details) Conditional + * execution of an instruction is enabled by setting COND_TEST_ENABLE, and + * selecting the condition which will allow the test to pass with + * COND_{FL,LT,...}. It is possible to swizzle the values in the condition + * register, which allows for testing against an individual component. + * + * Branching: + * + * The BRA/CAL instructions seem to follow a slightly different opcode + * layout. The destination instruction ID (IADDR) overlaps a source field. + * Instruction ID's seem to be numbered based on the UPLOAD_FROM_ID FIFO + * command, and is incremented automatically on each UPLOAD_INST FIFO + * command. + * + * Conditional branching is achieved by using the condition tests described + * above. There doesn't appear to be dedicated looping instructions, but + * this can be done using a temp reg + conditional branching. + * + * Subroutines may be uploaded before the main program itself, but the first + * executed instruction is determined by the PROGRAM_START_ID FIFO command. + * + */ + +/* DWORD 0 */ + +#define NV30_VP_INST_ADDR_REG_SELECT_1 (1 << 24) +#define NV30_VP_INST_SRC2_ABS (1 << 23) /* guess */ +#define NV30_VP_INST_SRC1_ABS (1 << 22) /* guess */ +#define NV30_VP_INST_SRC0_ABS (1 << 21) /* guess */ +#define NV30_VP_INST_VEC_RESULT (1 << 20) +#define NV30_VP_INST_DEST_TEMP_ID_SHIFT 16 +#define NV30_VP_INST_DEST_TEMP_ID_MASK (0x0F << 16) +#define NV30_VP_INST_COND_UPDATE_ENABLE (1<<15) +#define NV30_VP_INST_VEC_DEST_TEMP_MASK (0xF << 16) +#define NV30_VP_INST_COND_TEST_ENABLE (1<<14) +#define NV30_VP_INST_COND_SHIFT 11 +#define NV30_VP_INST_COND_MASK (0x07 << 11) +# define NV30_VP_INST_COND_FL 0 /* guess */ +# define NV30_VP_INST_COND_LT 1 +# define NV30_VP_INST_COND_EQ 2 +# define NV30_VP_INST_COND_LE 3 +# define NV30_VP_INST_COND_GT 4 +# define NV30_VP_INST_COND_NE 5 +# define NV30_VP_INST_COND_GE 6 +# define NV30_VP_INST_COND_TR 7 /* guess */ +#define NV30_VP_INST_COND_SWZ_X_SHIFT 9 +#define NV30_VP_INST_COND_SWZ_X_MASK (0x03 << 9) +#define NV30_VP_INST_COND_SWZ_Y_SHIFT 7 +#define NV30_VP_INST_COND_SWZ_Y_MASK (0x03 << 7) +#define NV30_VP_INST_COND_SWZ_Z_SHIFT 5 +#define NV30_VP_INST_COND_SWZ_Z_MASK (0x03 << 5) +#define NV30_VP_INST_COND_SWZ_W_SHIFT 3 +#define NV30_VP_INST_COND_SWZ_W_MASK (0x03 << 3) +#define NV30_VP_INST_COND_SWZ_ALL_SHIFT 3 +#define NV30_VP_INST_COND_SWZ_ALL_MASK (0xFF << 3) +#define NV30_VP_INST_ADDR_SWZ_SHIFT 1 +#define NV30_VP_INST_ADDR_SWZ_MASK (0x03 << 1) +#define NV30_VP_INST_SCA_OPCODEH_SHIFT 0 +#define NV30_VP_INST_SCA_OPCODEH_MASK (0x01 << 0) + +/* DWORD 1 */ +#define NV30_VP_INST_SCA_OPCODEL_SHIFT 28 +#define NV30_VP_INST_SCA_OPCODEL_MASK (0x0F << 28) +# define NV30_VP_INST_OP_NOP 0x00 +# define NV30_VP_INST_OP_RCP 0x02 +# define NV30_VP_INST_OP_RCC 0x03 +# define NV30_VP_INST_OP_RSQ 0x04 +# define NV30_VP_INST_OP_EXP 0x05 +# define NV30_VP_INST_OP_LOG 0x06 +# define NV30_VP_INST_OP_LIT 0x07 +# define NV30_VP_INST_OP_BRA 0x09 +# define NV30_VP_INST_OP_CAL 0x0B +# define NV30_VP_INST_OP_RET 0x0C +# define NV30_VP_INST_OP_LG2 0x0D +# define NV30_VP_INST_OP_EX2 0x0E +# define NV30_VP_INST_OP_SIN 0x0F +# define NV30_VP_INST_OP_COS 0x10 +#define NV30_VP_INST_VEC_OPCODE_SHIFT 23 +#define NV30_VP_INST_VEC_OPCODE_MASK (0x1F << 23) +# define NV30_VP_INST_OP_NOPV 0x00 +# define NV30_VP_INST_OP_MOV 0x01 +# define NV30_VP_INST_OP_MUL 0x02 +# define NV30_VP_INST_OP_ADD 0x03 +# define NV30_VP_INST_OP_MAD 0x04 +# define NV30_VP_INST_OP_DP3 0x05 +# define NV30_VP_INST_OP_DP4 0x07 +# define NV30_VP_INST_OP_DPH 0x06 +# define NV30_VP_INST_OP_DST 0x08 +# define NV30_VP_INST_OP_MIN 0x09 +# define NV30_VP_INST_OP_MAX 0x0A +# define NV30_VP_INST_OP_SLT 0x0B +# define NV30_VP_INST_OP_SGE 0x0C +# define NV30_VP_INST_OP_ARL 0x0D +# define NV30_VP_INST_OP_FRC 0x0E +# define NV30_VP_INST_OP_FLR 0x0F +# define NV30_VP_INST_OP_SEQ 0x10 +# define NV30_VP_INST_OP_SFL 0x11 +# define NV30_VP_INST_OP_SGT 0x12 +# define NV30_VP_INST_OP_SLE 0x13 +# define NV30_VP_INST_OP_SNE 0x14 +# define NV30_VP_INST_OP_STR 0x15 +# define NV30_VP_INST_OP_SSG 0x16 +# define NV30_VP_INST_OP_ARR 0x17 +# define NV30_VP_INST_OP_ARA 0x18 +#define NV30_VP_INST_CONST_SRC_SHIFT 14 +#define NV30_VP_INST_CONST_SRC_MASK (0xFF << 14) +#define NV30_VP_INST_INPUT_SRC_SHIFT 9 /*NV20*/ +#define NV30_VP_INST_INPUT_SRC_MASK (0x0F << 9) /*NV20*/ +# define NV30_VP_INST_IN_POS 0 /* These seem to match the bindings specified in */ +# define NV30_VP_INST_IN_WEIGHT 1 /* the ARB_v_p spec (2.14.3.1) */ +# define NV30_VP_INST_IN_NORMAL 2 +# define NV30_VP_INST_IN_COL0 3 /* Should probably confirm them all though */ +# define NV30_VP_INST_IN_COL1 4 +# define NV30_VP_INST_IN_FOGC 5 +# define NV30_VP_INST_IN_TC0 8 +# define NV30_VP_INST_IN_TC(n) (8+n) +#define NV30_VP_INST_SRC0H_SHIFT 0 /*NV20*/ +#define NV30_VP_INST_SRC0H_MASK (0x1FF << 0) /*NV20*/ + +/* Please note: the IADDR fields overlap other fields because they are used + * only for branch instructions. See Branching: label above + * + * DWORD 2 + */ +#define NV30_VP_INST_SRC0L_SHIFT 26 /*NV20*/ +#define NV30_VP_INST_SRC0L_MASK (0x3F <<26) /* NV30_VP_SRC0_LOW_MASK << 26 */ +#define NV30_VP_INST_SRC1_SHIFT 11 /*NV20*/ +#define NV30_VP_INST_SRC1_MASK (0x7FFF<<11) /*NV20*/ +#define NV30_VP_INST_SRC2H_SHIFT 0 /*NV20*/ +#define NV30_VP_INST_SRC2H_MASK (0x7FF << 0) /* NV30_VP_SRC2_HIGH_MASK >> 4*/ +#define NV30_VP_INST_IADDR_SHIFT 2 +#define NV30_VP_INST_IADDR_MASK (0xF << 28) /* NV30_VP_SRC2_LOW_MASK << 28 */ + +/* DWORD 3 */ +#define NV30_VP_INST_SRC2L_SHIFT 28 /*NV20*/ +#define NV30_VP_INST_SRC2L_MASK (0x0F <<28) /*NV20*/ +#define NV30_VP_INST_STEMP_WRITEMASK_SHIFT 24 +#define NV30_VP_INST_STEMP_WRITEMASK_MASK (0x0F << 24) +#define NV30_VP_INST_VTEMP_WRITEMASK_SHIFT 20 +#define NV30_VP_INST_VTEMP_WRITEMASK_MASK (0x0F << 20) +#define NV30_VP_INST_SDEST_WRITEMASK_SHIFT 16 +#define NV30_VP_INST_SDEST_WRITEMASK_MASK (0x0F << 16) +#define NV30_VP_INST_VDEST_WRITEMASK_SHIFT 12 /*NV20*/ +#define NV30_VP_INST_VDEST_WRITEMASK_MASK (0x0F << 12) /*NV20*/ +#define NV30_VP_INST_DEST_SHIFT 2 +#define NV30_VP_INST_DEST_MASK (0x0F << 2) +# define NV30_VP_INST_DEST_POS 0 +# define NV30_VP_INST_DEST_BFC0 1 +# define NV30_VP_INST_DEST_BFC1 2 +# define NV30_VP_INST_DEST_COL0 3 +# define NV30_VP_INST_DEST_COL1 4 +# define NV30_VP_INST_DEST_FOGC 5 +# define NV30_VP_INST_DEST_PSZ 6 +# define NV30_VP_INST_DEST_TC(n) (8+n) + +#define NV30_VP_INST_LAST (1 << 0) + +/* Useful to split the source selection regs into their pieces */ +#define NV30_VP_SRC0_HIGH_SHIFT 6 +#define NV30_VP_SRC0_HIGH_MASK 0x00007FC0 +#define NV30_VP_SRC0_LOW_MASK 0x0000003F +#define NV30_VP_SRC2_HIGH_SHIFT 4 +#define NV30_VP_SRC2_HIGH_MASK 0x00007FF0 +#define NV30_VP_SRC2_LOW_MASK 0x0000000F + + +/* Source-register definition - matches NV20 exactly */ +#define NV30_VP_SRC_NEGATE (1<<14) +#define NV30_VP_SRC_SWZ_X_SHIFT 12 +#define NV30_VP_SRC_REG_SWZ_X_MASK (0x03 <<12) +#define NV30_VP_SRC_SWZ_Y_SHIFT 10 +#define NV30_VP_SRC_REG_SWZ_Y_MASK (0x03 <<10) +#define NV30_VP_SRC_SWZ_Z_SHIFT 8 +#define NV30_VP_SRC_REG_SWZ_Z_MASK (0x03 << 8) +#define NV30_VP_SRC_SWZ_W_SHIFT 6 +#define NV30_VP_SRC_REG_SWZ_W_MASK (0x03 << 6) +#define NV30_VP_SRC_REG_SWZ_ALL_SHIFT 6 +#define NV30_VP_SRC_REG_SWZ_ALL_MASK (0xFF << 6) +#define NV30_VP_SRC_TEMP_SRC_SHIFT 2 +#define NV30_VP_SRC_REG_TEMP_ID_MASK (0x0F << 0) +#define NV30_VP_SRC_REG_TYPE_SHIFT 0 +#define NV30_VP_SRC_REG_TYPE_MASK (0x03 << 0) +#define NV30_VP_SRC_REG_TYPE_TEMP 1 +#define NV30_VP_SRC_REG_TYPE_INPUT 2 +#define NV30_VP_SRC_REG_TYPE_CONST 3 /* guess */ + +/* + * Each fragment program opcode appears to be comprised of 4 32-bit values. + * + * 0 - Opcode, output reg/mask, ATTRIB source + * 1 - Source 0 + * 2 - Source 1 + * 3 - Source 2 + * + * There appears to be no special difference between result regs and temp regs. + * result.color == R0.xyzw + * result.depth == R1.z + * When the fragprog contains instructions to write depth, NV30_TCL_PRIMITIVE_3D_UNK1D78=0 + * otherwise it is set to 1. + * + * Constants are inserted directly after the instruction that uses them. + * + * It appears that it's not possible to use two input registers in one + * instruction as the input sourcing is done in the instruction dword + * and not the source selection dwords. As such instructions such as: + * + * ADD result.color, fragment.color, fragment.texcoord[0]; + * + * must be split into two MOV's and then an ADD (nvidia does this) but + * I'm not sure why it's not just one MOV and then source the second input + * in the ADD instruction.. + * + * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary + * negation requires multiplication with a const. + * + * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO/SWIZZLE_ONE + * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as SWIZZLE_ZERO + * is implemented simply by not writing to the relevant components of the destination. + * + * Conditional execution + * TODO + * + * Non-native instructions: + * LIT + * LRP - MAD+MAD + * SUB - ADD, negate second source + * RSQ - LG2 + EX2 + * POW - LG2 + MUL + EX2 + * SCS - COS + SIN + * XPD + */ + +//== Opcode / Destination selection == +#define NV30_FP_OP_PROGRAM_END (1 << 0) +#define NV30_FP_OP_OUT_REG_SHIFT 1 +#define NV30_FP_OP_OUT_REG_MASK (31 << 1) /* uncertain */ +/* Needs to be set when writing outputs to get expected result.. */ +#define NV30_FP_OP_OUT_REG_HALF (1 << 7) +#define NV30_FP_OP_COND_WRITE_ENABLE (1 << 8) +#define NV30_FP_OP_OUTMASK_SHIFT 9 +#define NV30_FP_OP_OUTMASK_MASK (0xF << 9) +# define NV30_FP_OP_OUT_X (1<<9) +# define NV30_FP_OP_OUT_Y (1<<10) +# define NV30_FP_OP_OUT_Z (1<<11) +# define NV30_FP_OP_OUT_W (1<<12) +/* Uncertain about these, especially the input_src values.. it's possible that + * they can be dynamically changed. + */ +#define NV30_FP_OP_INPUT_SRC_SHIFT 13 +#define NV30_FP_OP_INPUT_SRC_MASK (15 << 13) +# define NV30_FP_OP_INPUT_SRC_POSITION 0x0 +# define NV30_FP_OP_INPUT_SRC_COL0 0x1 +# define NV30_FP_OP_INPUT_SRC_COL1 0x2 +# define NV30_FP_OP_INPUT_SRC_FOGC 0x3 +# define NV30_FP_OP_INPUT_SRC_TC0 0x4 +# define NV30_FP_OP_INPUT_SRC_TC(n) (0x4 + n) +#define NV30_FP_OP_TEX_UNIT_SHIFT 17 +#define NV30_FP_OP_TEX_UNIT_MASK (0xF << 17) /* guess */ +#define NV30_FP_OP_PRECISION_SHIFT 22 +#define NV30_FP_OP_PRECISION_MASK (3 << 22) +# define NV30_FP_PRECISION_FP32 0 +# define NV30_FP_PRECISION_FP16 1 +# define NV30_FP_PRECISION_FX12 2 +#define NV30_FP_OP_OPCODE_SHIFT 24 +#define NV30_FP_OP_OPCODE_MASK (0x3F << 24) +# define NV30_FP_OP_OPCODE_NOP 0x00 +# define NV30_FP_OP_OPCODE_MOV 0x01 +# define NV30_FP_OP_OPCODE_MUL 0x02 +# define NV30_FP_OP_OPCODE_ADD 0x03 +# define NV30_FP_OP_OPCODE_MAD 0x04 +# define NV30_FP_OP_OPCODE_DP3 0x05 +# define NV30_FP_OP_OPCODE_DP4 0x06 +# define NV30_FP_OP_OPCODE_DST 0x07 +# define NV30_FP_OP_OPCODE_MIN 0x08 +# define NV30_FP_OP_OPCODE_MAX 0x09 +# define NV30_FP_OP_OPCODE_SLT 0x0A +# define NV30_FP_OP_OPCODE_SGE 0x0B +# define NV30_FP_OP_OPCODE_SLE 0x0C +# define NV30_FP_OP_OPCODE_SGT 0x0D +# define NV30_FP_OP_OPCODE_SNE 0x0E +# define NV30_FP_OP_OPCODE_SEQ 0x0F +# define NV30_FP_OP_OPCODE_FRC 0x10 +# define NV30_FP_OP_OPCODE_FLR 0x11 +# define NV30_FP_OP_OPCODE_KIL 0x12 +# define NV30_FP_OP_OPCODE_PK4B 0x13 +# define NV30_FP_OP_OPCODE_UP4B 0x14 +# define NV30_FP_OP_OPCODE_DDX 0x15 /* can only write XY */ +# define NV30_FP_OP_OPCODE_DDY 0x16 /* can only write XY */ +# define NV30_FP_OP_OPCODE_TEX 0x17 +# define NV30_FP_OP_OPCODE_TXP 0x18 +# define NV30_FP_OP_OPCODE_TXD 0x19 +# define NV30_FP_OP_OPCODE_RCP 0x1A +# define NV30_FP_OP_OPCODE_RSQ 0x1B +# define NV30_FP_OP_OPCODE_EX2 0x1C +# define NV30_FP_OP_OPCODE_LG2 0x1D +# define NV30_FP_OP_OPCODE_LIT 0x1E +# define NV30_FP_OP_OPCODE_LRP 0x1F +# define NV30_FP_OP_OPCODE_STR 0x20 +# define NV30_FP_OP_OPCODE_SFL 0x21 +# define NV30_FP_OP_OPCODE_COS 0x22 +# define NV30_FP_OP_OPCODE_SIN 0x23 +# define NV30_FP_OP_OPCODE_PK2H 0x24 +# define NV30_FP_OP_OPCODE_UP2H 0x25 +# define NV30_FP_OP_OPCODE_POW 0x26 +# define NV30_FP_OP_OPCODE_PK4UB 0x27 +# define NV30_FP_OP_OPCODE_UP4UB 0x28 +# define NV30_FP_OP_OPCODE_PK2US 0x29 +# define NV30_FP_OP_OPCODE_UP2US 0x2A +# define NV30_FP_OP_OPCODE_DP2A 0x2E +# define NV30_FP_OP_OPCODE_TXB 0x31 +# define NV30_FP_OP_OPCODE_RFL 0x36 +# define NV30_FP_OP_OPCODE_DIV 0x3A +#define NV30_FP_OP_OUT_SAT (1 << 31) + +/* high order bits of SRC0 */ +#define NV30_FP_OP_OUT_ABS (1 << 29) +#define NV30_FP_OP_COND_SWZ_W_SHIFT 27 +#define NV30_FP_OP_COND_SWZ_W_MASK (3 << 27) +#define NV30_FP_OP_COND_SWZ_Z_SHIFT 25 +#define NV30_FP_OP_COND_SWZ_Z_MASK (3 << 25) +#define NV30_FP_OP_COND_SWZ_Y_SHIFT 23 +#define NV30_FP_OP_COND_SWZ_Y_MASK (3 << 23) +#define NV30_FP_OP_COND_SWZ_X_SHIFT 21 +#define NV30_FP_OP_COND_SWZ_X_MASK (3 << 21) +#define NV30_FP_OP_COND_SWZ_ALL_SHIFT 21 +#define NV30_FP_OP_COND_SWZ_ALL_MASK (0xFF << 21) +#define NV30_FP_OP_COND_SHIFT 18 +#define NV30_FP_OP_COND_MASK (0x07 << 18) +# define NV30_FP_OP_COND_FL 0 +# define NV30_FP_OP_COND_LT 1 +# define NV30_FP_OP_COND_EQ 2 +# define NV30_FP_OP_COND_LE 3 +# define NV30_FP_OP_COND_GT 4 +# define NV30_FP_OP_COND_NE 5 +# define NV30_FP_OP_COND_GE 6 +# define NV30_FP_OP_COND_TR 7 + +/* high order bits of SRC1 */ +#define NV30_FP_OP_DST_SCALE_SHIFT 28 +#define NV30_FP_OP_DST_SCALE_MASK (3 << 28) +#define NV30_FP_OP_DST_SCALE_1X 0 +#define NV30_FP_OP_DST_SCALE_2X 1 +#define NV30_FP_OP_DST_SCALE_4X 2 +#define NV30_FP_OP_DST_SCALE_8X 3 +#define NV30_FP_OP_DST_SCALE_INV_2X 5 +#define NV30_FP_OP_DST_SCALE_INV_4X 6 +#define NV30_FP_OP_DST_SCALE_INV_8X 7 + + +/* high order bits of SRC2 */ +#define NV30_FP_OP_INDEX_INPUT (1 << 30) + +//== Register selection == +#define NV30_FP_REG_TYPE_SHIFT 0 +#define NV30_FP_REG_TYPE_MASK (3 << 0) +# define NV30_FP_REG_TYPE_TEMP 0 +# define NV30_FP_REG_TYPE_INPUT 1 +# define NV30_FP_REG_TYPE_CONST 2 +#define NV30_FP_REG_SRC_SHIFT 2 /* uncertain */ +#define NV30_FP_REG_SRC_MASK (31 << 2) +#define NV30_FP_REG_SRC_HALF (1 << 8) +#define NV30_FP_REG_SWZ_ALL_SHIFT 9 +#define NV30_FP_REG_SWZ_ALL_MASK (255 << 9) +#define NV30_FP_REG_SWZ_X_SHIFT 9 +#define NV30_FP_REG_SWZ_X_MASK (3 << 9) +#define NV30_FP_REG_SWZ_Y_SHIFT 11 +#define NV30_FP_REG_SWZ_Y_MASK (3 << 11) +#define NV30_FP_REG_SWZ_Z_SHIFT 13 +#define NV30_FP_REG_SWZ_Z_MASK (3 << 13) +#define NV30_FP_REG_SWZ_W_SHIFT 15 +#define NV30_FP_REG_SWZ_W_MASK (3 << 15) +# define NV30_FP_SWIZZLE_X 0 +# define NV30_FP_SWIZZLE_Y 1 +# define NV30_FP_SWIZZLE_Z 2 +# define NV30_FP_SWIZZLE_W 3 +#define NV30_FP_REG_NEGATE (1 << 17) + +#define NV30SR_NONE 0 +#define NV30SR_OUTPUT 1 +#define NV30SR_INPUT 2 +#define NV30SR_TEMP 3 +#define NV30SR_CONST 4 + +struct nv30_sreg { + int type; + int index; + + int dst_scale; + + int negate; + int abs; + int swz[4]; + + int cc_update; + int cc_update_reg; + int cc_test; + int cc_test_reg; + int cc_swz[4]; +}; + +static INLINE struct nv30_sreg +nv30_sr(int type, int index) +{ + struct nv30_sreg temp = { + .type = type, + .index = index, + .dst_scale = DEF_SCALE, + .abs = 0, + .negate = 0, + .swz = { 0, 1, 2, 3 }, + .cc_update = 0, + .cc_update_reg = 0, + .cc_test = DEF_CTEST, + .cc_test_reg = 0, + .cc_swz = { 0, 1, 2, 3 }, + }; + return temp; +} + +static INLINE struct nv30_sreg +nv30_sr_swz(struct nv30_sreg src, int x, int y, int z, int w) +{ + struct nv30_sreg dst = src; + + dst.swz[SWZ_X] = src.swz[x]; + dst.swz[SWZ_Y] = src.swz[y]; + dst.swz[SWZ_Z] = src.swz[z]; + dst.swz[SWZ_W] = src.swz[w]; + return dst; +} + +static INLINE struct nv30_sreg +nv30_sr_neg(struct nv30_sreg src) +{ + src.negate = !src.negate; + return src; +} + +static INLINE struct nv30_sreg +nv30_sr_abs(struct nv30_sreg src) +{ + src.abs = 1; + return src; +} + +static INLINE struct nv30_sreg +nv30_sr_scale(struct nv30_sreg src, int scale) +{ + src.dst_scale = scale; + return src; +} + +#endif diff --git a/src/gallium/drivers/nv30/nv30_state.c b/src/gallium/drivers/nv30/nv30_state.c new file mode 100644 index 0000000000..26147565a5 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_state.c @@ -0,0 +1,725 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "tgsi/tgsi_parse.h" + +#include "nv30_context.h" +#include "nv30_state.h" + +static void * +nv30_blend_state_create(struct pipe_context *pipe, + const struct pipe_blend_state *cso) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nouveau_grobj *rankine = nv30->screen->rankine; + struct nv30_blend_state *bso = CALLOC(1, sizeof(*bso)); + struct nouveau_stateobj *so = so_new(16, 0); + + if (cso->blend_enable) { + so_method(so, rankine, NV34TCL_BLEND_FUNC_ENABLE, 3); + so_data (so, 1); + so_data (so, (nvgl_blend_func(cso->alpha_src_factor) << 16) | + nvgl_blend_func(cso->rgb_src_factor)); + so_data (so, nvgl_blend_func(cso->alpha_dst_factor) << 16 | + nvgl_blend_func(cso->rgb_dst_factor)); + /* FIXME: Gallium assumes GL_EXT_blend_func_separate. + It is not the case for NV30 */ + so_method(so, rankine, NV34TCL_BLEND_EQUATION, 1); + so_data (so, nvgl_blend_eqn(cso->rgb_func)); + } else { + so_method(so, rankine, NV34TCL_BLEND_FUNC_ENABLE, 1); + so_data (so, 0); + } + + so_method(so, rankine, NV34TCL_COLOR_MASK, 1); + so_data (so, (((cso->colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) | + ((cso->colormask & PIPE_MASK_R) ? (0x01 << 16) : 0) | + ((cso->colormask & PIPE_MASK_G) ? (0x01 << 8) : 0) | + ((cso->colormask & PIPE_MASK_B) ? (0x01 << 0) : 0))); + + if (cso->logicop_enable) { + so_method(so, rankine, NV34TCL_COLOR_LOGIC_OP_ENABLE, 2); + so_data (so, 1); + so_data (so, nvgl_logicop_func(cso->logicop_func)); + } else { + so_method(so, rankine, NV34TCL_COLOR_LOGIC_OP_ENABLE, 1); + so_data (so, 0); + } + + so_method(so, rankine, NV34TCL_DITHER_ENABLE, 1); + so_data (so, cso->dither ? 1 : 0); + + so_ref(so, &bso->so); + bso->pipe = *cso; + return (void *)bso; +} + +static void +nv30_blend_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + nv30->blend = hwcso; + nv30->dirty |= NV30_NEW_BLEND; +} + +static void +nv30_blend_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv30_blend_state *bso = hwcso; + + so_ref(NULL, &bso->so); + FREE(bso); +} + + +static INLINE unsigned +wrap_mode(unsigned wrap) { + unsigned ret; + + switch (wrap) { + case PIPE_TEX_WRAP_REPEAT: + ret = NV34TCL_TX_WRAP_S_REPEAT; + break; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + ret = NV34TCL_TX_WRAP_S_MIRRORED_REPEAT; + break; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + ret = NV34TCL_TX_WRAP_S_CLAMP_TO_EDGE; + break; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + ret = NV34TCL_TX_WRAP_S_CLAMP_TO_BORDER; + break; + case PIPE_TEX_WRAP_CLAMP: + ret = NV34TCL_TX_WRAP_S_CLAMP; + break; +/* case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP_TO_EDGE; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP_TO_BORDER; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + ret = NV34TCL_TX_WRAP_S_MIRROR_CLAMP; + break;*/ + default: + NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); + ret = NV34TCL_TX_WRAP_S_REPEAT; + break; + } + + return ret >> NV34TCL_TX_WRAP_S_SHIFT; +} + +static void * +nv30_sampler_state_create(struct pipe_context *pipe, + const struct pipe_sampler_state *cso) +{ + struct nv30_sampler_state *ps; + uint32_t filter = 0; + + ps = MALLOC(sizeof(struct nv30_sampler_state)); + + ps->fmt = 0; + /* TODO: Not all RECTs formats have this bit set, bits 15-8 of format + are the tx format to use. We should store normalized coord flag + in sampler state structure, and set appropriate format in + nvxx_fragtex_build() + */ + /*NV34TCL_TX_FORMAT_RECT*/ + /*if (!cso->normalized_coords) { + ps->fmt |= (1<<14) ; + }*/ + + ps->wrap = ((wrap_mode(cso->wrap_s) << NV34TCL_TX_WRAP_S_SHIFT) | + (wrap_mode(cso->wrap_t) << NV34TCL_TX_WRAP_T_SHIFT) | + (wrap_mode(cso->wrap_r) << NV34TCL_TX_WRAP_R_SHIFT)); + + ps->en = 0; + + if (cso->max_anisotropy >= 8.0) { + ps->en |= NV34TCL_TX_ENABLE_ANISO_8X; + } else + if (cso->max_anisotropy >= 4.0) { + ps->en |= NV34TCL_TX_ENABLE_ANISO_4X; + } else + if (cso->max_anisotropy >= 2.0) { + ps->en |= NV34TCL_TX_ENABLE_ANISO_2X; + } + + switch (cso->mag_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + filter |= NV34TCL_TX_FILTER_MAGNIFY_LINEAR; + break; + case PIPE_TEX_FILTER_NEAREST: + default: + filter |= NV34TCL_TX_FILTER_MAGNIFY_NEAREST; + break; + } + + switch (cso->min_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR_MIPMAP_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + filter |= NV34TCL_TX_FILTER_MINIFY_LINEAR; + break; + } + break; + case PIPE_TEX_FILTER_NEAREST: + default: + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST_MIPMAP_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + filter |= NV34TCL_TX_FILTER_MINIFY_NEAREST; + break; + } + break; + } + + ps->filt = filter; + + { + float limit; + + limit = CLAMP(cso->lod_bias, -16.0, 15.0); + ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff; + + limit = CLAMP(cso->max_lod, 0.0, 15.0); + ps->en |= (int)(limit) << 14 /*NV34TCL_TX_ENABLE_MIPMAP_MAX_LOD_SHIFT*/; + + limit = CLAMP(cso->min_lod, 0.0, 15.0); + ps->en |= (int)(limit) << 26 /*NV34TCL_TX_ENABLE_MIPMAP_MIN_LOD_SHIFT*/; + } + + if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + switch (cso->compare_func) { + case PIPE_FUNC_NEVER: + ps->wrap |= NV34TCL_TX_WRAP_RCOMP_NEVER; + break; + case PIPE_FUNC_GREATER: + ps->wrap |= NV34TCL_TX_WRAP_RCOMP_GREATER; + break; + case PIPE_FUNC_EQUAL: + ps->wrap |= NV34TCL_TX_WRAP_RCOMP_EQUAL; + break; + case PIPE_FUNC_GEQUAL: + ps->wrap |= NV34TCL_TX_WRAP_RCOMP_GEQUAL; + break; + case PIPE_FUNC_LESS: + ps->wrap |= NV34TCL_TX_WRAP_RCOMP_LESS; + break; + case PIPE_FUNC_NOTEQUAL: + ps->wrap |= NV34TCL_TX_WRAP_RCOMP_NOTEQUAL; + break; + case PIPE_FUNC_LEQUAL: + ps->wrap |= NV34TCL_TX_WRAP_RCOMP_LEQUAL; + break; + case PIPE_FUNC_ALWAYS: + ps->wrap |= NV34TCL_TX_WRAP_RCOMP_ALWAYS; + break; + default: + break; + } + } + + ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) | + (float_to_ubyte(cso->border_color[0]) << 16) | + (float_to_ubyte(cso->border_color[1]) << 8) | + (float_to_ubyte(cso->border_color[2]) << 0)); + + return (void *)ps; +} + +static void +nv30_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) +{ + struct nv30_context *nv30 = nv30_context(pipe); + unsigned unit; + + for (unit = 0; unit < nr; unit++) { + nv30->tex_sampler[unit] = sampler[unit]; + nv30->dirty_samplers |= (1 << unit); + } + + for (unit = nr; unit < nv30->nr_samplers; unit++) { + nv30->tex_sampler[unit] = NULL; + nv30->dirty_samplers |= (1 << unit); + } + + nv30->nr_samplers = nr; + nv30->dirty |= NV30_NEW_SAMPLER; +} + +static void +nv30_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void +nv30_set_sampler_texture(struct pipe_context *pipe, unsigned nr, + struct pipe_texture **miptree) +{ + struct nv30_context *nv30 = nv30_context(pipe); + unsigned unit; + + for (unit = 0; unit < nr; unit++) { + pipe_texture_reference((struct pipe_texture **) + &nv30->tex_miptree[unit], miptree[unit]); + nv30->dirty_samplers |= (1 << unit); + } + + for (unit = nr; unit < nv30->nr_textures; unit++) { + pipe_texture_reference((struct pipe_texture **) + &nv30->tex_miptree[unit], NULL); + nv30->dirty_samplers |= (1 << unit); + } + + nv30->nr_textures = nr; + nv30->dirty |= NV30_NEW_SAMPLER; +} + +static void * +nv30_rasterizer_state_create(struct pipe_context *pipe, + const struct pipe_rasterizer_state *cso) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso)); + struct nouveau_stateobj *so = so_new(32, 0); + struct nouveau_grobj *rankine = nv30->screen->rankine; + + /*XXX: ignored: + * light_twoside + * point_smooth -nohw + * multisample + */ + + so_method(so, rankine, NV34TCL_SHADE_MODEL, 1); + so_data (so, cso->flatshade ? NV34TCL_SHADE_MODEL_FLAT : + NV34TCL_SHADE_MODEL_SMOOTH); + + so_method(so, rankine, NV34TCL_LINE_WIDTH, 2); + so_data (so, (unsigned char)(cso->line_width * 8.0) & 0xff); + so_data (so, cso->line_smooth ? 1 : 0); + so_method(so, rankine, NV34TCL_LINE_STIPPLE_ENABLE, 2); + so_data (so, cso->line_stipple_enable ? 1 : 0); + so_data (so, (cso->line_stipple_pattern << 16) | + cso->line_stipple_factor); + + so_method(so, rankine, NV34TCL_POINT_SIZE, 1); + so_data (so, fui(cso->point_size)); + + so_method(so, rankine, NV34TCL_POLYGON_MODE_FRONT, 6); + if (cso->front_winding == PIPE_WINDING_CCW) { + so_data(so, nvgl_polygon_mode(cso->fill_ccw)); + so_data(so, nvgl_polygon_mode(cso->fill_cw)); + switch (cso->cull_mode) { + case PIPE_WINDING_CCW: + so_data(so, NV34TCL_CULL_FACE_FRONT); + break; + case PIPE_WINDING_CW: + so_data(so, NV34TCL_CULL_FACE_BACK); + break; + case PIPE_WINDING_BOTH: + so_data(so, NV34TCL_CULL_FACE_FRONT_AND_BACK); + break; + default: + so_data(so, NV34TCL_CULL_FACE_BACK); + break; + } + so_data(so, NV34TCL_FRONT_FACE_CCW); + } else { + so_data(so, nvgl_polygon_mode(cso->fill_cw)); + so_data(so, nvgl_polygon_mode(cso->fill_ccw)); + switch (cso->cull_mode) { + case PIPE_WINDING_CCW: + so_data(so, NV34TCL_CULL_FACE_BACK); + break; + case PIPE_WINDING_CW: + so_data(so, NV34TCL_CULL_FACE_FRONT); + break; + case PIPE_WINDING_BOTH: + so_data(so, NV34TCL_CULL_FACE_FRONT_AND_BACK); + break; + default: + so_data(so, NV34TCL_CULL_FACE_BACK); + break; + } + so_data(so, NV34TCL_FRONT_FACE_CW); + } + so_data(so, cso->poly_smooth ? 1 : 0); + so_data(so, (cso->cull_mode != PIPE_WINDING_NONE) ? 1 : 0); + + so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); + so_data (so, cso->poly_stipple_enable ? 1 : 0); + + so_method(so, rankine, NV34TCL_POLYGON_OFFSET_POINT_ENABLE, 3); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT)) + so_data(so, 1); + else + so_data(so, 0); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_LINE) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_LINE)) + so_data(so, 1); + else + so_data(so, 0); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_FILL) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_FILL)) + so_data(so, 1); + else + so_data(so, 0); + if (cso->offset_cw || cso->offset_ccw) { + so_method(so, rankine, NV34TCL_POLYGON_OFFSET_FACTOR, 2); + so_data (so, fui(cso->offset_scale)); + so_data (so, fui(cso->offset_units * 2)); + } + + so_method(so, rankine, NV34TCL_POINT_SPRITE, 1); + if (cso->point_sprite) { + unsigned psctl = (1 << 0), i; + + for (i = 0; i < 8; i++) { + if (cso->sprite_coord_mode[i] != PIPE_SPRITE_COORD_NONE) + psctl |= (1 << (8 + i)); + } + + so_data(so, psctl); + } else { + so_data(so, 0); + } + + so_ref(so, &rsso->so); + rsso->pipe = *cso; + return (void *)rsso; +} + +static void +nv30_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + nv30->rasterizer = hwcso; + nv30->dirty |= NV30_NEW_RAST; + /*nv30->draw_dirty |= NV30_NEW_RAST;*/ +} + +static void +nv30_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv30_rasterizer_state *rsso = hwcso; + + so_ref(NULL, &rsso->so); + FREE(rsso); +} + +static void * +nv30_depth_stencil_alpha_state_create(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *cso) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso)); + struct nouveau_stateobj *so = so_new(32, 0); + struct nouveau_grobj *rankine = nv30->screen->rankine; + + so_method(so, rankine, NV34TCL_DEPTH_FUNC, 3); + so_data (so, nvgl_comparison_op(cso->depth.func)); + so_data (so, cso->depth.writemask ? 1 : 0); + so_data (so, cso->depth.enabled ? 1 : 0); + + so_method(so, rankine, NV34TCL_ALPHA_FUNC_ENABLE, 3); + so_data (so, cso->alpha.enabled ? 1 : 0); + so_data (so, nvgl_comparison_op(cso->alpha.func)); + so_data (so, float_to_ubyte(cso->alpha.ref_value)); + + if (cso->stencil[0].enabled) { + so_method(so, rankine, NV34TCL_STENCIL_FRONT_ENABLE, 8); + so_data (so, cso->stencil[0].enabled ? 1 : 0); + so_data (so, cso->stencil[0].writemask); + so_data (so, nvgl_comparison_op(cso->stencil[0].func)); + so_data (so, cso->stencil[0].ref_value); + so_data (so, cso->stencil[0].valuemask); + so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); + } else { + so_method(so, rankine, NV34TCL_STENCIL_FRONT_ENABLE, 1); + so_data (so, 0); + } + + if (cso->stencil[1].enabled) { + so_method(so, rankine, NV34TCL_STENCIL_BACK_ENABLE, 8); + so_data (so, cso->stencil[1].enabled ? 1 : 0); + so_data (so, cso->stencil[1].writemask); + so_data (so, nvgl_comparison_op(cso->stencil[1].func)); + so_data (so, cso->stencil[1].ref_value); + so_data (so, cso->stencil[1].valuemask); + so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); + } else { + so_method(so, rankine, NV34TCL_STENCIL_BACK_ENABLE, 1); + so_data (so, 0); + } + + so_ref(so, &zsaso->so); + zsaso->pipe = *cso; + return (void *)zsaso; +} + +static void +nv30_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + nv30->zsa = hwcso; + nv30->dirty |= NV30_NEW_ZSA; +} + +static void +nv30_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv30_zsa_state *zsaso = hwcso; + + so_ref(NULL, &zsaso->so); + FREE(zsaso); +} + +static void * +nv30_vp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + /*struct nv30_context *nv30 = nv30_context(pipe);*/ + struct nv30_vertex_program *vp; + + vp = CALLOC(1, sizeof(struct nv30_vertex_program)); + vp->pipe.tokens = tgsi_dup_tokens(cso->tokens); + /*vp->draw = draw_create_vertex_shader(nv30->draw, &vp->pipe);*/ + + return (void *)vp; +} + +static void +nv30_vp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + nv30->vertprog = hwcso; + nv30->dirty |= NV30_NEW_VERTPROG; + /*nv30->draw_dirty |= NV30_NEW_VERTPROG;*/ +} + +static void +nv30_vp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_vertex_program *vp = hwcso; + + /*draw_delete_vertex_shader(nv30->draw, vp->draw);*/ + nv30_vertprog_destroy(nv30, vp); + FREE((void*)vp->pipe.tokens); + FREE(vp); +} + +static void * +nv30_fp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nv30_fragment_program *fp; + + fp = CALLOC(1, sizeof(struct nv30_fragment_program)); + fp->pipe.tokens = tgsi_dup_tokens(cso->tokens); + + tgsi_scan_shader(fp->pipe.tokens, &fp->info); + + return (void *)fp; +} + +static void +nv30_fp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + nv30->fragprog = hwcso; + nv30->dirty |= NV30_NEW_FRAGPROG; +} + +static void +nv30_fp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nv30_fragment_program *fp = hwcso; + + nv30_fragprog_destroy(nv30, fp); + FREE((void*)fp->pipe.tokens); + FREE(fp); +} + +static void +nv30_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *bcol) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + nv30->blend_colour = *bcol; + nv30->dirty |= NV30_NEW_BCOL; +} + +static void +nv30_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) +{ +} + +static void +nv30_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, + const struct pipe_constant_buffer *buf ) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + nv30->constbuf[shader] = buf->buffer; + nv30->constbuf_nr[shader] = buf->buffer->size / (4 * sizeof(float)); + + if (shader == PIPE_SHADER_VERTEX) { + nv30->dirty |= NV30_NEW_VERTPROG; + } else + if (shader == PIPE_SHADER_FRAGMENT) { + nv30->dirty |= NV30_NEW_FRAGPROG; + } +} + +static void +nv30_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + nv30->framebuffer = *fb; + nv30->dirty |= NV30_NEW_FB; +} + +static void +nv30_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + memcpy(nv30->stipple, stipple->stipple, 4 * 32); + nv30->dirty |= NV30_NEW_STIPPLE; +} + +static void +nv30_set_scissor_state(struct pipe_context *pipe, + const struct pipe_scissor_state *s) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + nv30->scissor = *s; + nv30->dirty |= NV30_NEW_SCISSOR; +} + +static void +nv30_set_viewport_state(struct pipe_context *pipe, + const struct pipe_viewport_state *vpt) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + nv30->viewport = *vpt; + nv30->dirty |= NV30_NEW_VIEWPORT; + /*nv30->draw_dirty |= NV30_NEW_VIEWPORT;*/ +} + +static void +nv30_set_vertex_buffers(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_buffer *vb) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + memcpy(nv30->vtxbuf, vb, sizeof(*vb) * count); + nv30->vtxbuf_nr = count; + + nv30->dirty |= NV30_NEW_ARRAYS; + /*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/ +} + +static void +nv30_set_vertex_elements(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_element *ve) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + memcpy(nv30->vtxelt, ve, sizeof(*ve) * count); + nv30->vtxelt_nr = count; + + nv30->dirty |= NV30_NEW_ARRAYS; + /*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/ +} + +static void +nv30_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) +{ + struct nv30_context *nv30 = nv30_context(pipe); + + nv30->edgeflags = bitfield; + nv30->dirty |= NV30_NEW_ARRAYS; + /*nv30->draw_dirty |= NV30_NEW_ARRAYS;*/ +} + +void +nv30_init_state_functions(struct nv30_context *nv30) +{ + nv30->pipe.create_blend_state = nv30_blend_state_create; + nv30->pipe.bind_blend_state = nv30_blend_state_bind; + nv30->pipe.delete_blend_state = nv30_blend_state_delete; + + nv30->pipe.create_sampler_state = nv30_sampler_state_create; + nv30->pipe.bind_sampler_states = nv30_sampler_state_bind; + nv30->pipe.delete_sampler_state = nv30_sampler_state_delete; + nv30->pipe.set_sampler_textures = nv30_set_sampler_texture; + + nv30->pipe.create_rasterizer_state = nv30_rasterizer_state_create; + nv30->pipe.bind_rasterizer_state = nv30_rasterizer_state_bind; + nv30->pipe.delete_rasterizer_state = nv30_rasterizer_state_delete; + + nv30->pipe.create_depth_stencil_alpha_state = + nv30_depth_stencil_alpha_state_create; + nv30->pipe.bind_depth_stencil_alpha_state = + nv30_depth_stencil_alpha_state_bind; + nv30->pipe.delete_depth_stencil_alpha_state = + nv30_depth_stencil_alpha_state_delete; + + nv30->pipe.create_vs_state = nv30_vp_state_create; + nv30->pipe.bind_vs_state = nv30_vp_state_bind; + nv30->pipe.delete_vs_state = nv30_vp_state_delete; + + nv30->pipe.create_fs_state = nv30_fp_state_create; + nv30->pipe.bind_fs_state = nv30_fp_state_bind; + nv30->pipe.delete_fs_state = nv30_fp_state_delete; + + nv30->pipe.set_blend_color = nv30_set_blend_color; + nv30->pipe.set_clip_state = nv30_set_clip_state; + nv30->pipe.set_constant_buffer = nv30_set_constant_buffer; + nv30->pipe.set_framebuffer_state = nv30_set_framebuffer_state; + nv30->pipe.set_polygon_stipple = nv30_set_polygon_stipple; + nv30->pipe.set_scissor_state = nv30_set_scissor_state; + nv30->pipe.set_viewport_state = nv30_set_viewport_state; + + nv30->pipe.set_edgeflags = nv30_set_edgeflags; + nv30->pipe.set_vertex_buffers = nv30_set_vertex_buffers; + nv30->pipe.set_vertex_elements = nv30_set_vertex_elements; +} + diff --git a/src/gallium/drivers/nv30/nv30_state.h b/src/gallium/drivers/nv30/nv30_state.h new file mode 100644 index 0000000000..2023278e37 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_state.h @@ -0,0 +1,88 @@ +#ifndef __NV30_STATE_H__ +#define __NV30_STATE_H__ + +#include "pipe/p_state.h" +#include "tgsi/tgsi_scan.h" + +struct nv30_sampler_state { + uint32_t fmt; + uint32_t wrap; + uint32_t en; + uint32_t filt; + uint32_t bcol; +}; + +struct nv30_vertex_program_exec { + uint32_t data[4]; + boolean has_branch_offset; + int const_index; +}; + +struct nv30_vertex_program_data { + int index; /* immediates == -1 */ + float value[4]; +}; + +struct nv30_vertex_program { + struct pipe_shader_state pipe; + + boolean translated; + + struct nv30_vertex_program_exec *insns; + unsigned nr_insns; + struct nv30_vertex_program_data *consts; + unsigned nr_consts; + + struct nouveau_resource *exec; + unsigned exec_start; + struct nouveau_resource *data; + unsigned data_start; + unsigned data_start_min; + + uint32_t ir; + uint32_t or; + struct nouveau_stateobj *so; +}; + +struct nv30_fragment_program_data { + unsigned offset; + unsigned index; +}; + +struct nv30_fragment_program { + struct pipe_shader_state pipe; + struct tgsi_shader_info info; + + boolean translated; + boolean on_hw; + unsigned samplers; + + uint32_t *insn; + int insn_len; + + struct nv30_fragment_program_data *consts; + unsigned nr_consts; + + struct pipe_buffer *buffer; + + uint32_t fp_control; + uint32_t fp_reg_control; + struct nouveau_stateobj *so; +}; + +struct nv30_miptree { + struct pipe_texture base; + + struct pipe_buffer *buffer; + uint total_size; + + struct pipe_texture *shadow_tex; + struct pipe_surface *shadow_surface; + + struct { + uint pitch; + uint *image_offset; + } level[PIPE_MAX_TEXTURE_LEVELS]; +}; + +#endif diff --git a/src/gallium/drivers/nv30/nv30_state_blend.c b/src/gallium/drivers/nv30/nv30_state_blend.c new file mode 100644 index 0000000000..44d43e132a --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_state_blend.c @@ -0,0 +1,40 @@ +#include "nv30_context.h" + +static boolean +nv30_state_blend_validate(struct nv30_context *nv30) +{ + so_ref(nv30->blend->so, &nv30->state.hw[NV30_STATE_BLEND]); + return TRUE; +} + +struct nv30_state_entry nv30_state_blend = { + .validate = nv30_state_blend_validate, + .dirty = { + .pipe = NV30_NEW_BLEND, + .hw = NV30_STATE_BLEND + } +}; + +static boolean +nv30_state_blend_colour_validate(struct nv30_context *nv30) +{ + struct nouveau_stateobj *so = so_new(2, 0); + struct pipe_blend_color *bcol = &nv30->blend_colour; + + so_method(so, nv30->screen->rankine, NV34TCL_BLEND_COLOR, 1); + so_data (so, ((float_to_ubyte(bcol->color[3]) << 24) | + (float_to_ubyte(bcol->color[0]) << 16) | + (float_to_ubyte(bcol->color[1]) << 8) | + (float_to_ubyte(bcol->color[2]) << 0))); + + so_ref(so, &nv30->state.hw[NV30_STATE_BCOL]); + return TRUE; +} + +struct nv30_state_entry nv30_state_blend_colour = { + .validate = nv30_state_blend_colour_validate, + .dirty = { + .pipe = NV30_NEW_BCOL, + .hw = NV30_STATE_BCOL + } +}; diff --git a/src/gallium/drivers/nv30/nv30_state_emit.c b/src/gallium/drivers/nv30/nv30_state_emit.c new file mode 100644 index 0000000000..f77b08ff69 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_state_emit.c @@ -0,0 +1,118 @@ +#include "nv30_context.h" +#include "nv30_state.h" + +static struct nv30_state_entry *render_states[] = { + &nv30_state_framebuffer, + &nv30_state_rasterizer, + &nv30_state_scissor, + &nv30_state_stipple, + &nv30_state_fragprog, + &nv30_state_fragtex, + &nv30_state_vertprog, + &nv30_state_blend, + &nv30_state_blend_colour, + &nv30_state_zsa, + &nv30_state_viewport, + &nv30_state_vbo, + NULL +}; + +static void +nv30_state_do_validate(struct nv30_context *nv30, + struct nv30_state_entry **states) +{ + const struct pipe_framebuffer_state *fb = &nv30->framebuffer; + unsigned i; + + for (i = 0; i < fb->nr_cbufs; i++) + fb->cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED; + if (fb->zsbuf) + fb->zsbuf->status = PIPE_SURFACE_STATUS_DEFINED; + + while (*states) { + struct nv30_state_entry *e = *states; + + if (nv30->dirty & e->dirty.pipe) { + if (e->validate(nv30)) { + nv30->state.dirty |= (1ULL << e->dirty.hw); + } + } + + states++; + } + nv30->dirty = 0; +} + +void +nv30_state_emit(struct nv30_context *nv30) +{ + struct nv30_state *state = &nv30->state; + struct nv30_screen *screen = nv30->screen; + unsigned i, samplers; + uint64_t states; + + if (nv30->pctx_id != screen->cur_pctx) { + for (i = 0; i < NV30_STATE_MAX; i++) { + if (state->hw[i] && screen->state[i] != state->hw[i]) + state->dirty |= (1ULL << i); + } + + screen->cur_pctx = nv30->pctx_id; + } + + for (i = 0, states = state->dirty; states; i++) { + if (!(states & (1ULL << i))) + continue; + so_ref (state->hw[i], &nv30->screen->state[i]); + if (state->hw[i]) + so_emit(nv30->nvws, nv30->screen->state[i]); + states &= ~(1ULL << i); + } + + state->dirty = 0; + + so_emit_reloc_markers(nv30->nvws, state->hw[NV30_STATE_FB]); + for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) { + if (!(samplers & (1 << i))) + continue; + so_emit_reloc_markers(nv30->nvws, + state->hw[NV30_STATE_FRAGTEX0+i]); + samplers &= ~(1ULL << i); + } + so_emit_reloc_markers(nv30->nvws, state->hw[NV30_STATE_FRAGPROG]); + if (state->hw[NV30_STATE_VTXBUF] /*&& nv30->render_mode == HW*/) + so_emit_reloc_markers(nv30->nvws, state->hw[NV30_STATE_VTXBUF]); +} + +boolean +nv30_state_validate(struct nv30_context *nv30) +{ +#if 0 + boolean was_sw = nv30->fallback_swtnl ? TRUE : FALSE; + + if (nv30->render_mode != HW) { + /* Don't even bother trying to go back to hw if none + * of the states that caused swtnl previously have changed. + */ + if ((nv30->fallback_swtnl & nv30->dirty) + != nv30->fallback_swtnl) + return FALSE; + + /* Attempt to go to hwtnl again */ + nv30->pipe.flush(&nv30->pipe, 0, NULL); + nv30->dirty |= (NV30_NEW_VIEWPORT | + NV30_NEW_VERTPROG | + NV30_NEW_ARRAYS); + nv30->render_mode = HW; + } +#endif + nv30_state_do_validate(nv30, render_states); +#if 0 + if (nv30->fallback_swtnl || nv30->fallback_swrast) + return FALSE; + + if (was_sw) + NOUVEAU_ERR("swtnl->hw\n"); +#endif + return TRUE; +} diff --git a/src/gallium/drivers/nv30/nv30_state_fb.c b/src/gallium/drivers/nv30/nv30_state_fb.c new file mode 100644 index 0000000000..77368cb205 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_state_fb.c @@ -0,0 +1,144 @@ +#include "nv30_context.h" +#include "nouveau/nouveau_util.h" + +static boolean +nv30_state_framebuffer_validate(struct nv30_context *nv30) +{ + struct pipe_framebuffer_state *fb = &nv30->framebuffer; + struct pipe_surface *rt[2], *zeta = NULL; + uint32_t rt_enable, rt_format; + int i, colour_format = 0, zeta_format = 0; + struct nouveau_stateobj *so = so_new(64, 10); + unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; + unsigned w = fb->width; + unsigned h = fb->height; + struct nv30_miptree *nv30mt; + + rt_enable = 0; + for (i = 0; i < fb->nr_cbufs; i++) { + if (colour_format) { + assert(colour_format == fb->cbufs[i]->format); + } else { + colour_format = fb->cbufs[i]->format; + rt_enable |= (NV34TCL_RT_ENABLE_COLOR0 << i); + rt[i] = fb->cbufs[i]; + } + } + + if (rt_enable & NV34TCL_RT_ENABLE_COLOR1) + rt_enable |= NV34TCL_RT_ENABLE_MRT; + + if (fb->zsbuf) { + zeta_format = fb->zsbuf->format; + zeta = fb->zsbuf; + } + + if (!(rt[0]->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { + assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); + for (i = 1; i < fb->nr_cbufs; i++) + assert(!(rt[i]->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)); + + /* FIXME: NV34TCL_RT_FORMAT_LOG2_[WIDTH/HEIGHT] */ + rt_format = NV34TCL_RT_FORMAT_TYPE_SWIZZLED | + log2i(fb->width) << 16 /*NV34TCL_RT_FORMAT_LOG2_WIDTH_SHIFT*/ | + log2i(fb->height) << 24 /*NV34TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT*/; + } + else + rt_format = NV34TCL_RT_FORMAT_TYPE_LINEAR; + + switch (colour_format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case 0: + rt_format |= NV34TCL_RT_FORMAT_COLOR_A8R8G8B8; + break; + case PIPE_FORMAT_R5G6B5_UNORM: + rt_format |= NV34TCL_RT_FORMAT_COLOR_R5G6B5; + break; + default: + assert(0); + } + + switch (zeta_format) { + case PIPE_FORMAT_Z16_UNORM: + rt_format |= NV34TCL_RT_FORMAT_ZETA_Z16; + break; + case PIPE_FORMAT_Z24S8_UNORM: + case 0: + rt_format |= NV34TCL_RT_FORMAT_ZETA_Z24S8; + break; + default: + assert(0); + } + + if (rt_enable & NV34TCL_RT_ENABLE_COLOR0) { + uint32_t pitch = rt[0]->stride; + if (zeta) { + pitch |= (zeta->stride << 16); + } else { + pitch |= (pitch << 16); + } + + nv30mt = (struct nv30_miptree *)rt[0]->texture; + so_method(so, nv30->screen->rankine, NV34TCL_DMA_COLOR0, 1); + so_reloc (so, nv30mt->buffer, 0, rt_flags | NOUVEAU_BO_OR, + nv30->nvws->channel->vram->handle, + nv30->nvws->channel->gart->handle); + so_method(so, nv30->screen->rankine, NV34TCL_COLOR0_PITCH, 2); + so_data (so, pitch); + so_reloc (so, nv30mt->buffer, rt[0]->offset, rt_flags | + NOUVEAU_BO_LOW, 0, 0); + } + + if (rt_enable & NV34TCL_RT_ENABLE_COLOR1) { + nv30mt = (struct nv30_miptree *)rt[1]->texture; + so_method(so, nv30->screen->rankine, NV34TCL_DMA_COLOR1, 1); + so_reloc (so, nv30mt->buffer, 0, rt_flags | NOUVEAU_BO_OR, + nv30->nvws->channel->vram->handle, + nv30->nvws->channel->gart->handle); + so_method(so, nv30->screen->rankine, NV34TCL_COLOR1_OFFSET, 2); + so_reloc (so, nv30mt->buffer, rt[1]->offset, rt_flags | + NOUVEAU_BO_LOW, 0, 0); + so_data (so, rt[1]->stride); + } + + if (zeta_format) { + nv30mt = (struct nv30_miptree *)zeta->texture; + so_method(so, nv30->screen->rankine, NV34TCL_DMA_ZETA, 1); + so_reloc (so, nv30mt->buffer, 0, rt_flags | NOUVEAU_BO_OR, + nv30->nvws->channel->vram->handle, + nv30->nvws->channel->gart->handle); + so_method(so, nv30->screen->rankine, NV34TCL_ZETA_OFFSET, 1); + so_reloc (so, nv30mt->buffer, zeta->offset, rt_flags | + NOUVEAU_BO_LOW, 0, 0); + /* TODO: allocate LMA depth buffer */ + } + + so_method(so, nv30->screen->rankine, NV34TCL_RT_ENABLE, 1); + so_data (so, rt_enable); + so_method(so, nv30->screen->rankine, NV34TCL_RT_HORIZ, 3); + so_data (so, (w << 16) | 0); + so_data (so, (h << 16) | 0); + so_data (so, rt_format); + so_method(so, nv30->screen->rankine, NV34TCL_VIEWPORT_HORIZ, 2); + so_data (so, (w << 16) | 0); + so_data (so, (h << 16) | 0); + so_method(so, nv30->screen->rankine, NV34TCL_VIEWPORT_CLIP_HORIZ(0), 2); + so_data (so, ((w - 1) << 16) | 0); + so_data (so, ((h - 1) << 16) | 0); + so_method(so, nv30->screen->rankine, 0x1d88, 1); + so_data (so, (1 << 12) | h); + /* Wonder why this is needed, context should all be set to zero on init */ + so_method(so, nv30->screen->rankine, NV34TCL_VIEWPORT_TX_ORIGIN, 1); + so_data (so, 0); + + so_ref(so, &nv30->state.hw[NV30_STATE_FB]); + return TRUE; +} + +struct nv30_state_entry nv30_state_framebuffer = { + .validate = nv30_state_framebuffer_validate, + .dirty = { + .pipe = NV30_NEW_FB, + .hw = NV30_STATE_FB + } +}; diff --git a/src/gallium/drivers/nv30/nv30_state_rasterizer.c b/src/gallium/drivers/nv30/nv30_state_rasterizer.c new file mode 100644 index 0000000000..6d1b60e043 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_state_rasterizer.c @@ -0,0 +1,17 @@ +#include "nv30_context.h" + +static boolean +nv30_state_rasterizer_validate(struct nv30_context *nv30) +{ + so_ref(nv30->rasterizer->so, + &nv30->state.hw[NV30_STATE_RAST]); + return TRUE; +} + +struct nv30_state_entry nv30_state_rasterizer = { + .validate = nv30_state_rasterizer_validate, + .dirty = { + .pipe = NV30_NEW_RAST, + .hw = NV30_STATE_RAST + } +}; diff --git a/src/gallium/drivers/nv30/nv30_state_scissor.c b/src/gallium/drivers/nv30/nv30_state_scissor.c new file mode 100644 index 0000000000..1db9bc1795 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_state_scissor.c @@ -0,0 +1,35 @@ +#include "nv30_context.h" + +static boolean +nv30_state_scissor_validate(struct nv30_context *nv30) +{ + struct pipe_rasterizer_state *rast = &nv30->rasterizer->pipe; + struct pipe_scissor_state *s = &nv30->scissor; + struct nouveau_stateobj *so; + + if (nv30->state.hw[NV30_STATE_SCISSOR] && + (rast->scissor == 0 && nv30->state.scissor_enabled == 0)) + return FALSE; + nv30->state.scissor_enabled = rast->scissor; + + so = so_new(3, 0); + so_method(so, nv30->screen->rankine, NV34TCL_SCISSOR_HORIZ, 2); + if (nv30->state.scissor_enabled) { + so_data (so, ((s->maxx - s->minx) << 16) | s->minx); + so_data (so, ((s->maxy - s->miny) << 16) | s->miny); + } else { + so_data (so, 4096 << 16); + so_data (so, 4096 << 16); + } + + so_ref(so, &nv30->state.hw[NV30_STATE_SCISSOR]); + return TRUE; +} + +struct nv30_state_entry nv30_state_scissor = { + .validate = nv30_state_scissor_validate, + .dirty = { + .pipe = NV30_NEW_SCISSOR | NV30_NEW_RAST, + .hw = NV30_STATE_SCISSOR + } +}; diff --git a/src/gallium/drivers/nv30/nv30_state_stipple.c b/src/gallium/drivers/nv30/nv30_state_stipple.c new file mode 100644 index 0000000000..41b42813b4 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_state_stipple.c @@ -0,0 +1,39 @@ +#include "nv30_context.h" + +static boolean +nv30_state_stipple_validate(struct nv30_context *nv30) +{ + struct pipe_rasterizer_state *rast = &nv30->rasterizer->pipe; + struct nouveau_grobj *rankine = nv30->screen->rankine; + struct nouveau_stateobj *so; + + if (nv30->state.hw[NV30_STATE_STIPPLE] && + (rast->poly_stipple_enable == 0 && nv30->state.stipple_enabled == 0)) + return FALSE; + + if (rast->poly_stipple_enable) { + unsigned i; + + so = so_new(35, 0); + so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); + so_data (so, 1); + so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_PATTERN(0), 32); + for (i = 0; i < 32; i++) + so_data(so, nv30->stipple[i]); + } else { + so = so_new(2, 0); + so_method(so, rankine, NV34TCL_POLYGON_STIPPLE_ENABLE, 1); + so_data (so, 0); + } + + so_ref(so, &nv30->state.hw[NV30_STATE_STIPPLE]); + return TRUE; +} + +struct nv30_state_entry nv30_state_stipple = { + .validate = nv30_state_stipple_validate, + .dirty = { + .pipe = NV30_NEW_STIPPLE | NV30_NEW_RAST, + .hw = NV30_STATE_STIPPLE, + } +}; diff --git a/src/gallium/drivers/nv30/nv30_state_viewport.c b/src/gallium/drivers/nv30/nv30_state_viewport.c new file mode 100644 index 0000000000..951d40ebfd --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_state_viewport.c @@ -0,0 +1,70 @@ +#include "nv30_context.h" + +static boolean +nv30_state_viewport_validate(struct nv30_context *nv30) +{ + struct pipe_viewport_state *vpt = &nv30->viewport; + struct nouveau_stateobj *so; + unsigned bypass; + + if (/*nv30->render_mode == HW &&*/ !nv30->rasterizer->pipe.bypass_clipping) + bypass = 0; + else + bypass = 1; + + if (nv30->state.hw[NV30_STATE_VIEWPORT] && + (bypass || !(nv30->dirty & NV30_NEW_VIEWPORT)) && + nv30->state.viewport_bypass == bypass) + return FALSE; + nv30->state.viewport_bypass = bypass; + + so = so_new(11, 0); + if (!bypass) { + so_method(so, nv30->screen->rankine, + NV34TCL_VIEWPORT_TRANSLATE_X, 8); + so_data (so, fui(vpt->translate[0])); + so_data (so, fui(vpt->translate[1])); + so_data (so, fui(vpt->translate[2])); + so_data (so, fui(vpt->translate[3])); + so_data (so, fui(vpt->scale[0])); + so_data (so, fui(vpt->scale[1])); + so_data (so, fui(vpt->scale[2])); + so_data (so, fui(vpt->scale[3])); +/* so_method(so, nv30->screen->rankine, 0x1d78, 1); + so_data (so, 1); +*/ } else { + so_method(so, nv30->screen->rankine, + NV34TCL_VIEWPORT_TRANSLATE_X, 8); + so_data (so, fui(0.0)); + so_data (so, fui(0.0)); + so_data (so, fui(0.0)); + so_data (so, fui(0.0)); + so_data (so, fui(1.0)); + so_data (so, fui(1.0)); + so_data (so, fui(1.0)); + so_data (so, fui(0.0)); + /* Not entirely certain what this is yet. The DDX uses this + * value also as it fixes rendering when you pass + * pre-transformed vertices to the GPU. My best gusss is that + * this bypasses some culling/clipping stage. Might be worth + * noting that points/lines are uneffected by whatever this + * value fixes, only filled polygons are effected. + */ +/* so_method(so, nv30->screen->rankine, 0x1d78, 1); + so_data (so, 0x110); +*/ } + /* TODO/FIXME: never saw value 0x0110 in renouveau dumps, only 0x0001 */ + so_method(so, nv30->screen->rankine, 0x1d78, 1); + so_data (so, 1); + + so_ref(so, &nv30->state.hw[NV30_STATE_VIEWPORT]); + return TRUE; +} + +struct nv30_state_entry nv30_state_viewport = { + .validate = nv30_state_viewport_validate, + .dirty = { + .pipe = NV30_NEW_VIEWPORT | NV30_NEW_RAST, + .hw = NV30_STATE_VIEWPORT + } +}; diff --git a/src/gallium/drivers/nv30/nv30_state_zsa.c b/src/gallium/drivers/nv30/nv30_state_zsa.c new file mode 100644 index 0000000000..0940b7269b --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_state_zsa.c @@ -0,0 +1,17 @@ +#include "nv30_context.h" + +static boolean +nv30_state_zsa_validate(struct nv30_context *nv30) +{ + so_ref(nv30->zsa->so, + &nv30->state.hw[NV30_STATE_ZSA]); + return TRUE; +} + +struct nv30_state_entry nv30_state_zsa = { + .validate = nv30_state_zsa_validate, + .dirty = { + .pipe = NV30_NEW_ZSA, + .hw = NV30_STATE_ZSA + } +}; diff --git a/src/gallium/drivers/nv30/nv30_surface.c b/src/gallium/drivers/nv30/nv30_surface.c new file mode 100644 index 0000000000..0f8dc12045 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_surface.c @@ -0,0 +1,72 @@ + +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "nv30_context.h" +#include "pipe/p_defines.h" +#include "pipe/internal/p_winsys_screen.h" +#include "pipe/p_inlines.h" +#include "util/u_tile.h" + +static void +nv30_surface_copy(struct pipe_context *pipe, boolean do_flip, + struct pipe_surface *dest, unsigned destx, unsigned desty, + struct pipe_surface *src, unsigned srcx, unsigned srcy, + unsigned width, unsigned height) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nv04_surface_2d *eng2d = nv30->screen->eng2d; + + if (do_flip) { + desty += height; + while (height--) { + eng2d->copy(eng2d, dest, destx, desty--, src, + srcx, srcy++, width, 1); + } + return; + } + + eng2d->copy(eng2d, dest, destx, desty, src, srcx, srcy, width, height); +} + +static void +nv30_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, + unsigned destx, unsigned desty, unsigned width, + unsigned height, unsigned value) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nv04_surface_2d *eng2d = nv30->screen->eng2d; + + eng2d->fill(eng2d, dest, destx, desty, width, height, value); +} + +void +nv30_init_surface_functions(struct nv30_context *nv30) +{ + nv30->pipe.surface_copy = nv30_surface_copy; + nv30->pipe.surface_fill = nv30_surface_fill; +} diff --git a/src/gallium/drivers/nv30/nv30_vbo.c b/src/gallium/drivers/nv30/nv30_vbo.c new file mode 100644 index 0000000000..2d6d48ac16 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_vbo.c @@ -0,0 +1,556 @@ +#include "pipe/p_context.h" +#include "pipe/p_state.h" + +#include "nv30_context.h" +#include "nv30_state.h" + +#include "nouveau/nouveau_channel.h" +#include "nouveau/nouveau_pushbuf.h" +#include "nouveau/nouveau_util.h" + +#define FORCE_SWTNL 0 + +static INLINE int +nv30_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp) +{ + switch (pipe) { + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_R32G32_FLOAT: + case PIPE_FORMAT_R32G32B32_FLOAT: + case PIPE_FORMAT_R32G32B32A32_FLOAT: + *fmt = NV34TCL_VTXFMT_TYPE_FLOAT; + break; + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8G8_UNORM: + case PIPE_FORMAT_R8G8B8_UNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: + *fmt = NV34TCL_VTXFMT_TYPE_UBYTE; + break; + case PIPE_FORMAT_R16_SSCALED: + case PIPE_FORMAT_R16G16_SSCALED: + case PIPE_FORMAT_R16G16B16_SSCALED: + case PIPE_FORMAT_R16G16B16A16_SSCALED: + *fmt = NV34TCL_VTXFMT_TYPE_USHORT; + break; + default: + NOUVEAU_ERR("Unknown format %s\n", pf_name(pipe)); + return 1; + } + + switch (pipe) { + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_R16_SSCALED: + *ncomp = 1; + break; + case PIPE_FORMAT_R8G8_UNORM: + case PIPE_FORMAT_R32G32_FLOAT: + case PIPE_FORMAT_R16G16_SSCALED: + *ncomp = 2; + break; + case PIPE_FORMAT_R8G8B8_UNORM: + case PIPE_FORMAT_R32G32B32_FLOAT: + case PIPE_FORMAT_R16G16B16_SSCALED: + *ncomp = 3; + break; + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R16G16B16A16_SSCALED: + *ncomp = 4; + break; + default: + NOUVEAU_ERR("Unknown format %s\n", pf_name(pipe)); + return 1; + } + + return 0; +} + +static boolean +nv30_vbo_set_idxbuf(struct nv30_context *nv30, struct pipe_buffer *ib, + unsigned ib_size) +{ + struct pipe_screen *pscreen = &nv30->screen->pipe; + unsigned type; + + if (!ib) { + nv30->idxbuf = NULL; + nv30->idxbuf_format = 0xdeadbeef; + return FALSE; + } + + if (!pscreen->get_param(pscreen, NOUVEAU_CAP_HW_IDXBUF) || ib_size == 1) + return FALSE; + + switch (ib_size) { + case 2: + type = NV34TCL_IDXBUF_FORMAT_TYPE_U16; + break; + case 4: + type = NV34TCL_IDXBUF_FORMAT_TYPE_U32; + break; + default: + return FALSE; + } + + if (ib != nv30->idxbuf || + type != nv30->idxbuf_format) { + nv30->dirty |= NV30_NEW_ARRAYS; + nv30->idxbuf = ib; + nv30->idxbuf_format = type; + } + + return TRUE; +} + +static boolean +nv30_vbo_static_attrib(struct nv30_context *nv30, struct nouveau_stateobj *so, + int attrib, struct pipe_vertex_element *ve, + struct pipe_vertex_buffer *vb) +{ + struct pipe_winsys *ws = nv30->pipe.winsys; + struct nouveau_grobj *rankine = nv30->screen->rankine; + unsigned type, ncomp; + void *map; + + if (nv30_vbo_format_to_hw(ve->src_format, &type, &ncomp)) + return FALSE; + + map = ws->buffer_map(ws, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ); + map += vb->buffer_offset + ve->src_offset; + + switch (type) { + case NV34TCL_VTXFMT_TYPE_FLOAT: + { + float *v = map; + + switch (ncomp) { + case 4: + so_method(so, rankine, NV34TCL_VTX_ATTR_4F_X(attrib), 4); + so_data (so, fui(v[0])); + so_data (so, fui(v[1])); + so_data (so, fui(v[2])); + so_data (so, fui(v[3])); + break; + case 3: + so_method(so, rankine, NV34TCL_VTX_ATTR_3F_X(attrib), 3); + so_data (so, fui(v[0])); + so_data (so, fui(v[1])); + so_data (so, fui(v[2])); + break; + case 2: + so_method(so, rankine, NV34TCL_VTX_ATTR_2F_X(attrib), 2); + so_data (so, fui(v[0])); + so_data (so, fui(v[1])); + break; + case 1: + so_method(so, rankine, NV34TCL_VTX_ATTR_1F(attrib), 1); + so_data (so, fui(v[0])); + break; + default: + ws->buffer_unmap(ws, vb->buffer); + return FALSE; + } + } + break; + default: + ws->buffer_unmap(ws, vb->buffer); + return FALSE; + } + + ws->buffer_unmap(ws, vb->buffer); + + return TRUE; +} + +boolean +nv30_draw_arrays(struct pipe_context *pipe, + unsigned mode, unsigned start, unsigned count) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nouveau_channel *chan = nv30->nvws->channel; + unsigned restart = 0; + + nv30_vbo_set_idxbuf(nv30, NULL, 0); + if (FORCE_SWTNL || !nv30_state_validate(nv30)) { + /*return nv30_draw_elements_swtnl(pipe, NULL, 0, + mode, start, count);*/ + return FALSE; + } + + while (count) { + unsigned vc, nr; + + nv30_state_emit(nv30); + + vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256, + mode, start, count, &restart); + if (!vc) { + FIRE_RING(NULL); + continue; + } + + BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); + + nr = (vc & 0xff); + if (nr) { + BEGIN_RING(rankine, NV34TCL_VB_VERTEX_BATCH, 1); + OUT_RING (((nr - 1) << 24) | start); + start += nr; + } + + nr = vc >> 8; + while (nr) { + unsigned push = nr > 2047 ? 2047 : nr; + + nr -= push; + + BEGIN_RING_NI(rankine, NV34TCL_VB_VERTEX_BATCH, push); + while (push--) { + OUT_RING(((0x100 - 1) << 24) | start); + start += 0x100; + } + } + + BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (0); + + count -= vc; + start = restart; + } + + pipe->flush(pipe, 0, NULL); + return TRUE; +} + +static INLINE void +nv30_draw_elements_u08(struct nv30_context *nv30, void *ib, + unsigned mode, unsigned start, unsigned count) +{ + struct nouveau_channel *chan = nv30->nvws->channel; + + while (count) { + uint8_t *elts = (uint8_t *)ib + start; + unsigned vc, push, restart = 0; + + nv30_state_emit(nv30); + + vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2, + mode, start, count, &restart); + if (vc == 0) { + FIRE_RING(NULL); + continue; + } + count -= vc; + + BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); + + if (vc & 1) { + BEGIN_RING(rankine, NV34TCL_VB_ELEMENT_U32, 1); + OUT_RING (elts[0]); + elts++; vc--; + } + + while (vc) { + unsigned i; + + push = MIN2(vc, 2047 * 2); + + BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U16, push >> 1); + for (i = 0; i < push; i+=2) + OUT_RING((elts[i+1] << 16) | elts[i]); + + vc -= push; + elts += push; + } + + BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (0); + + start = restart; + } +} + +static INLINE void +nv30_draw_elements_u16(struct nv30_context *nv30, void *ib, + unsigned mode, unsigned start, unsigned count) +{ + struct nouveau_channel *chan = nv30->nvws->channel; + + while (count) { + uint16_t *elts = (uint16_t *)ib + start; + unsigned vc, push, restart = 0; + + nv30_state_emit(nv30); + + vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2, + mode, start, count, &restart); + if (vc == 0) { + FIRE_RING(NULL); + continue; + } + count -= vc; + + BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); + + if (vc & 1) { + BEGIN_RING(rankine, NV34TCL_VB_ELEMENT_U32, 1); + OUT_RING (elts[0]); + elts++; vc--; + } + + while (vc) { + unsigned i; + + push = MIN2(vc, 2047 * 2); + + BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U16, push >> 1); + for (i = 0; i < push; i+=2) + OUT_RING((elts[i+1] << 16) | elts[i]); + + vc -= push; + elts += push; + } + + BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (0); + + start = restart; + } +} + +static INLINE void +nv30_draw_elements_u32(struct nv30_context *nv30, void *ib, + unsigned mode, unsigned start, unsigned count) +{ + struct nouveau_channel *chan = nv30->nvws->channel; + + while (count) { + uint32_t *elts = (uint32_t *)ib + start; + unsigned vc, push, restart = 0; + + nv30_state_emit(nv30); + + vc = nouveau_vbuf_split(chan->pushbuf->remaining, 5, 1, + mode, start, count, &restart); + if (vc == 0) { + FIRE_RING(NULL); + continue; + } + count -= vc; + + BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); + + while (vc) { + push = MIN2(vc, 2047); + + BEGIN_RING_NI(rankine, NV34TCL_VB_ELEMENT_U32, push); + OUT_RINGp (elts, push); + + vc -= push; + elts += push; + } + + BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (0); + + start = restart; + } +} + +static boolean +nv30_draw_elements_inline(struct pipe_context *pipe, + struct pipe_buffer *ib, unsigned ib_size, + unsigned mode, unsigned start, unsigned count) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct pipe_winsys *ws = pipe->winsys; + void *map; + + map = ws->buffer_map(ws, ib, PIPE_BUFFER_USAGE_CPU_READ); + if (!ib) { + NOUVEAU_ERR("failed mapping ib\n"); + return FALSE; + } + + switch (ib_size) { + case 1: + nv30_draw_elements_u08(nv30, map, mode, start, count); + break; + case 2: + nv30_draw_elements_u16(nv30, map, mode, start, count); + break; + case 4: + nv30_draw_elements_u32(nv30, map, mode, start, count); + break; + default: + NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size); + break; + } + + ws->buffer_unmap(ws, ib); + return TRUE; +} + +static boolean +nv30_draw_elements_vbo(struct pipe_context *pipe, + unsigned mode, unsigned start, unsigned count) +{ + struct nv30_context *nv30 = nv30_context(pipe); + struct nouveau_channel *chan = nv30->nvws->channel; + unsigned restart = 0; + + while (count) { + unsigned nr, vc; + + nv30_state_emit(nv30); + + vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256, + mode, start, count, &restart); + if (!vc) { + FIRE_RING(NULL); + continue; + } + + BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); + + nr = (vc & 0xff); + if (nr) { + BEGIN_RING(rankine, NV34TCL_VB_INDEX_BATCH, 1); + OUT_RING (((nr - 1) << 24) | start); + start += nr; + } + + nr = vc >> 8; + while (nr) { + unsigned push = nr > 2047 ? 2047 : nr; + + nr -= push; + + BEGIN_RING_NI(rankine, NV34TCL_VB_INDEX_BATCH, push); + while (push--) { + OUT_RING(((0x100 - 1) << 24) | start); + start += 0x100; + } + } + + BEGIN_RING(rankine, NV34TCL_VERTEX_BEGIN_END, 1); + OUT_RING (0); + + count -= vc; + start = restart; + } + + return TRUE; +} + +boolean +nv30_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, unsigned indexSize, + unsigned mode, unsigned start, unsigned count) +{ + struct nv30_context *nv30 = nv30_context(pipe); + boolean idxbuf; + + idxbuf = nv30_vbo_set_idxbuf(nv30, indexBuffer, indexSize); + if (FORCE_SWTNL || !nv30_state_validate(nv30)) { + /*return nv30_draw_elements_swtnl(pipe, NULL, 0, + mode, start, count);*/ + return FALSE; + } + + if (idxbuf) { + nv30_draw_elements_vbo(pipe, mode, start, count); + } else { + nv30_draw_elements_inline(pipe, indexBuffer, indexSize, + mode, start, count); + } + + pipe->flush(pipe, 0, NULL); + return TRUE; +} + +static boolean +nv30_vbo_validate(struct nv30_context *nv30) +{ + struct nouveau_stateobj *vtxbuf, *vtxfmt, *sattr = NULL; + struct nouveau_grobj *rankine = nv30->screen->rankine; + struct pipe_buffer *ib = nv30->idxbuf; + unsigned ib_format = nv30->idxbuf_format; + unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; + int hw; + + if (nv30->edgeflags) { + /*nv30->fallback_swtnl |= NV30_NEW_ARRAYS;*/ + return FALSE; + } + + vtxbuf = so_new(20, 18); + so_method(vtxbuf, rankine, NV34TCL_VTXBUF_ADDRESS(0), nv30->vtxelt_nr); + vtxfmt = so_new(17, 0); + so_method(vtxfmt, rankine, NV34TCL_VTXFMT(0), nv30->vtxelt_nr); + + for (hw = 0; hw < nv30->vtxelt_nr; hw++) { + struct pipe_vertex_element *ve; + struct pipe_vertex_buffer *vb; + unsigned type, ncomp; + + ve = &nv30->vtxelt[hw]; + vb = &nv30->vtxbuf[ve->vertex_buffer_index]; + + if (!vb->stride) { + if (!sattr) + sattr = so_new(16 * 5, 0); + + if (nv30_vbo_static_attrib(nv30, sattr, hw, ve, vb)) { + so_data(vtxbuf, 0); + so_data(vtxfmt, NV34TCL_VTXFMT_TYPE_FLOAT); + continue; + } + } + + if (nv30_vbo_format_to_hw(ve->src_format, &type, &ncomp)) { + /*nv30->fallback_swtnl |= NV30_NEW_ARRAYS;*/ + so_ref(NULL, &vtxbuf); + so_ref(NULL, &vtxfmt); + return FALSE; + } + + so_reloc(vtxbuf, vb->buffer, vb->buffer_offset + ve->src_offset, + vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, + 0, NV34TCL_VTXBUF_ADDRESS_DMA1); + so_data (vtxfmt, ((vb->stride << NV34TCL_VTXFMT_STRIDE_SHIFT) | + (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | type)); + } + + if (ib) { + so_method(vtxbuf, rankine, NV34TCL_IDXBUF_ADDRESS, 2); + so_reloc (vtxbuf, ib, 0, vb_flags | NOUVEAU_BO_LOW, 0, 0); + so_reloc (vtxbuf, ib, ib_format, vb_flags | NOUVEAU_BO_OR, + 0, NV34TCL_IDXBUF_FORMAT_DMA1); + } + + so_method(vtxbuf, rankine, 0x1710, 1); + so_data (vtxbuf, 0); + + so_ref(vtxbuf, &nv30->state.hw[NV30_STATE_VTXBUF]); + nv30->state.dirty |= (1ULL << NV30_STATE_VTXBUF); + so_ref(vtxfmt, &nv30->state.hw[NV30_STATE_VTXFMT]); + nv30->state.dirty |= (1ULL << NV30_STATE_VTXFMT); + so_ref(sattr, &nv30->state.hw[NV30_STATE_VTXATTR]); + nv30->state.dirty |= (1ULL << NV30_STATE_VTXATTR); + return FALSE; +} + +struct nv30_state_entry nv30_state_vbo = { + .validate = nv30_vbo_validate, + .dirty = { + .pipe = NV30_NEW_ARRAYS, + .hw = 0, + } +}; diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c new file mode 100644 index 0000000000..d262725057 --- /dev/null +++ b/src/gallium/drivers/nv30/nv30_vertprog.c @@ -0,0 +1,838 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_dump.h" + +#include "nv30_context.h" +#include "nv30_state.h" + +/* TODO (at least...): + * 1. Indexed consts + ARL + * 2. Arb. swz/negation + * 3. NV_vp11, NV_vp2, NV_vp3 features + * - extra arith opcodes + * - branching + * - texture sampling + * - indexed attribs + * - indexed results + * 4. bugs + */ + +#define SWZ_X 0 +#define SWZ_Y 1 +#define SWZ_Z 2 +#define SWZ_W 3 +#define MASK_X 8 +#define MASK_Y 4 +#define MASK_Z 2 +#define MASK_W 1 +#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) +#define DEF_SCALE 0 +#define DEF_CTEST 0 +#include "nv30_shader.h" + +#define swz(s,x,y,z,w) nv30_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) +#define neg(s) nv30_sr_neg((s)) +#define abs(s) nv30_sr_abs((s)) + +struct nv30_vpc { + struct nv30_vertex_program *vp; + + struct nv30_vertex_program_exec *vpi; + + unsigned output_map[PIPE_MAX_SHADER_OUTPUTS]; + + int high_temp; + int temp_temp_count; + + struct nv30_sreg *imm; + unsigned nr_imm; +}; + +static struct nv30_sreg +temp(struct nv30_vpc *vpc) +{ + int idx; + + idx = vpc->temp_temp_count++; + idx += vpc->high_temp + 1; + return nv30_sr(NV30SR_TEMP, idx); +} + +static struct nv30_sreg +constant(struct nv30_vpc *vpc, int pipe, float x, float y, float z, float w) +{ + struct nv30_vertex_program *vp = vpc->vp; + struct nv30_vertex_program_data *vpd; + int idx; + + if (pipe >= 0) { + for (idx = 0; idx < vp->nr_consts; idx++) { + if (vp->consts[idx].index == pipe) + return nv30_sr(NV30SR_CONST, idx); + } + } + + idx = vp->nr_consts++; + vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts); + vpd = &vp->consts[idx]; + + vpd->index = pipe; + vpd->value[0] = x; + vpd->value[1] = y; + vpd->value[2] = z; + vpd->value[3] = w; + return nv30_sr(NV30SR_CONST, idx); +} + +#define arith(cc,s,o,d,m,s0,s1,s2) \ + nv30_vp_arith((cc), (s), NV30_VP_INST_##o, (d), (m), (s0), (s1), (s2)) + +static void +emit_src(struct nv30_vpc *vpc, uint32_t *hw, int pos, struct nv30_sreg src) +{ + struct nv30_vertex_program *vp = vpc->vp; + uint32_t sr = 0; + + switch (src.type) { + case NV30SR_TEMP: + sr |= (NV30_VP_SRC_REG_TYPE_TEMP << NV30_VP_SRC_REG_TYPE_SHIFT); + sr |= (src.index << NV30_VP_SRC_TEMP_SRC_SHIFT); + break; + case NV30SR_INPUT: + sr |= (NV30_VP_SRC_REG_TYPE_INPUT << + NV30_VP_SRC_REG_TYPE_SHIFT); + vp->ir |= (1 << src.index); + hw[1] |= (src.index << NV30_VP_INST_INPUT_SRC_SHIFT); + break; + case NV30SR_CONST: + sr |= (NV30_VP_SRC_REG_TYPE_CONST << + NV30_VP_SRC_REG_TYPE_SHIFT); + assert(vpc->vpi->const_index == -1 || + vpc->vpi->const_index == src.index); + vpc->vpi->const_index = src.index; + break; + case NV30SR_NONE: + sr |= (NV30_VP_SRC_REG_TYPE_INPUT << + NV30_VP_SRC_REG_TYPE_SHIFT); + break; + default: + assert(0); + } + + if (src.negate) + sr |= NV30_VP_SRC_NEGATE; + + if (src.abs) + hw[0] |= (1 << (21 + pos)); + + sr |= ((src.swz[0] << NV30_VP_SRC_SWZ_X_SHIFT) | + (src.swz[1] << NV30_VP_SRC_SWZ_Y_SHIFT) | + (src.swz[2] << NV30_VP_SRC_SWZ_Z_SHIFT) | + (src.swz[3] << NV30_VP_SRC_SWZ_W_SHIFT)); + +/* + * |VVV| + * d�.�b + * \u/ + * + */ + + switch (pos) { + case 0: + hw[1] |= ((sr & NV30_VP_SRC0_HIGH_MASK) >> + NV30_VP_SRC0_HIGH_SHIFT) << NV30_VP_INST_SRC0H_SHIFT; + hw[2] |= (sr & NV30_VP_SRC0_LOW_MASK) << + NV30_VP_INST_SRC0L_SHIFT; + break; + case 1: + hw[2] |= sr << NV30_VP_INST_SRC1_SHIFT; + break; + case 2: + hw[2] |= ((sr & NV30_VP_SRC2_HIGH_MASK) >> + NV30_VP_SRC2_HIGH_SHIFT) << NV30_VP_INST_SRC2H_SHIFT; + hw[3] |= (sr & NV30_VP_SRC2_LOW_MASK) << + NV30_VP_INST_SRC2L_SHIFT; + break; + default: + assert(0); + } +} + +static void +emit_dst(struct nv30_vpc *vpc, uint32_t *hw, int slot, struct nv30_sreg dst) +{ + struct nv30_vertex_program *vp = vpc->vp; + + switch (dst.type) { + case NV30SR_TEMP: + hw[0] |= (dst.index << NV30_VP_INST_DEST_TEMP_ID_SHIFT); + break; + case NV30SR_OUTPUT: + switch (dst.index) { + case NV30_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break; + case NV30_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; + case NV30_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break; + case NV30_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break; + case NV30_VP_INST_DEST_FOGC : vp->or |= (1 << 4); break; + case NV30_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break; + case NV30_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break; + case NV30_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break; + case NV30_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break; + case NV30_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break; + case NV30_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break; + case NV30_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break; + case NV30_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break; + case NV30_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break; + default: + break; + } + + hw[3] |= (dst.index << NV30_VP_INST_DEST_SHIFT); + hw[0] |= NV30_VP_INST_VEC_DEST_TEMP_MASK | (1<<20); + + /*XXX: no way this is entirely correct, someone needs to + * figure out what exactly it is. + */ + hw[3] |= 0x800; + break; + default: + assert(0); + } +} + +static void +nv30_vp_arith(struct nv30_vpc *vpc, int slot, int op, + struct nv30_sreg dst, int mask, + struct nv30_sreg s0, struct nv30_sreg s1, + struct nv30_sreg s2) +{ + struct nv30_vertex_program *vp = vpc->vp; + uint32_t *hw; + + vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi)); + vpc->vpi = &vp->insns[vp->nr_insns - 1]; + memset(vpc->vpi, 0, sizeof(*vpc->vpi)); + vpc->vpi->const_index = -1; + + hw = vpc->vpi->data; + + hw[0] |= (NV30_VP_INST_COND_TR << NV30_VP_INST_COND_SHIFT); + hw[0] |= ((0 << NV30_VP_INST_COND_SWZ_X_SHIFT) | + (1 << NV30_VP_INST_COND_SWZ_Y_SHIFT) | + (2 << NV30_VP_INST_COND_SWZ_Z_SHIFT) | + (3 << NV30_VP_INST_COND_SWZ_W_SHIFT)); + + hw[1] |= (op << NV30_VP_INST_VEC_OPCODE_SHIFT); +// hw[3] |= NV30_VP_INST_SCA_DEST_TEMP_MASK; +// hw[3] |= (mask << NV30_VP_INST_VEC_WRITEMASK_SHIFT); + + if (dst.type == NV30SR_OUTPUT) { + if (slot) + hw[3] |= (mask << NV30_VP_INST_SDEST_WRITEMASK_SHIFT); + else + hw[3] |= (mask << NV30_VP_INST_VDEST_WRITEMASK_SHIFT); + } else { + if (slot) + hw[3] |= (mask << NV30_VP_INST_STEMP_WRITEMASK_SHIFT); + else + hw[3] |= (mask << NV30_VP_INST_VTEMP_WRITEMASK_SHIFT); + } + + emit_dst(vpc, hw, slot, dst); + emit_src(vpc, hw, 0, s0); + emit_src(vpc, hw, 1, s1); + emit_src(vpc, hw, 2, s2); +} + +static INLINE struct nv30_sreg +tgsi_src(struct nv30_vpc *vpc, const struct tgsi_full_src_register *fsrc) { + struct nv30_sreg src; + + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + src = nv30_sr(NV30SR_INPUT, fsrc->SrcRegister.Index); + break; + case TGSI_FILE_CONSTANT: + src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0); + break; + case TGSI_FILE_IMMEDIATE: + src = vpc->imm[fsrc->SrcRegister.Index]; + break; + case TGSI_FILE_TEMPORARY: + if (vpc->high_temp < fsrc->SrcRegister.Index) + vpc->high_temp = fsrc->SrcRegister.Index; + src = nv30_sr(NV30SR_TEMP, fsrc->SrcRegister.Index); + break; + default: + NOUVEAU_ERR("bad src file\n"); + break; + } + + src.abs = fsrc->SrcRegisterExtMod.Absolute; + src.negate = fsrc->SrcRegister.Negate; + src.swz[0] = fsrc->SrcRegister.SwizzleX; + src.swz[1] = fsrc->SrcRegister.SwizzleY; + src.swz[2] = fsrc->SrcRegister.SwizzleZ; + src.swz[3] = fsrc->SrcRegister.SwizzleW; + return src; +} + +static INLINE struct nv30_sreg +tgsi_dst(struct nv30_vpc *vpc, const struct tgsi_full_dst_register *fdst) { + struct nv30_sreg dst; + + switch (fdst->DstRegister.File) { + case TGSI_FILE_OUTPUT: + dst = nv30_sr(NV30SR_OUTPUT, + vpc->output_map[fdst->DstRegister.Index]); + + break; + case TGSI_FILE_TEMPORARY: + dst = nv30_sr(NV30SR_TEMP, fdst->DstRegister.Index); + if (vpc->high_temp < dst.index) + vpc->high_temp = dst.index; + break; + default: + NOUVEAU_ERR("bad dst file\n"); + break; + } + + return dst; +} + +static INLINE int +tgsi_mask(uint tgsi) +{ + int mask = 0; + + if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; + if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; + if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; + if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; + return mask; +} + +static boolean +nv30_vertprog_parse_instruction(struct nv30_vpc *vpc, + const struct tgsi_full_instruction *finst) +{ + struct nv30_sreg src[3], dst, tmp; + struct nv30_sreg none = nv30_sr(NV30SR_NONE, 0); + int mask; + int ai = -1, ci = -1; + int i; + + if (finst->Instruction.Opcode == TGSI_OPCODE_END) + return TRUE; + + vpc->temp_temp_count = 0; + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->FullSrcRegisters[i]; + if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { + src[i] = tgsi_src(vpc, fsrc); + } + } + + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->FullSrcRegisters[i]; + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + if (ai == -1 || ai == fsrc->SrcRegister.Index) { + ai = fsrc->SrcRegister.Index; + src[i] = tgsi_src(vpc, fsrc); + } else { + src[i] = temp(vpc); + arith(vpc, 0, OP_MOV, src[i], MASK_ALL, + tgsi_src(vpc, fsrc), none, none); + } + break; + /*XXX: index comparison is broken now that consts come from + * two different register files. + */ + case TGSI_FILE_CONSTANT: + case TGSI_FILE_IMMEDIATE: + if (ci == -1 || ci == fsrc->SrcRegister.Index) { + ci = fsrc->SrcRegister.Index; + src[i] = tgsi_src(vpc, fsrc); + } else { + src[i] = temp(vpc); + arith(vpc, 0, OP_MOV, src[i], MASK_ALL, + tgsi_src(vpc, fsrc), none, none); + } + break; + case TGSI_FILE_TEMPORARY: + /* handled above */ + break; + default: + NOUVEAU_ERR("bad src file\n"); + return FALSE; + } + } + + dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]); + mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); + + switch (finst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none); + break; + case TGSI_OPCODE_ADD: + arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]); + break; + case TGSI_OPCODE_ARL: + arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_DP3: + arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DP4: + arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DPH: + arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DST: + arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_EX2: + arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_EXP: + arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_FLR: + arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_FRC: + arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_LG2: + arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_LIT: + arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_LOG: + arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_MAD: + arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]); + break; + case TGSI_OPCODE_MAX: + arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MIN: + arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MOV: + arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_MUL: + arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_POW: + tmp = temp(vpc); + arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none, + swz(src[0], X, X, X, X)); + arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X), + swz(src[1], X, X, X, X), none); + arith(vpc, 1, OP_EX2, dst, mask, none, none, + swz(tmp, X, X, X, X)); + break; + case TGSI_OPCODE_RCP: + arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_RET: + break; + case TGSI_OPCODE_RSQ: + arith(vpc, 1, OP_RSQ, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_SGE: + arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SGT: + arith(vpc, 0, OP_SGT, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SLT: + arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SUB: + arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1])); + break; + case TGSI_OPCODE_XPD: + tmp = temp(vpc); + arith(vpc, 0, OP_MUL, tmp, mask, + swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); + arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W), + swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), + neg(tmp)); + break; + default: + NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); + return FALSE; + } + + return TRUE; +} + +static boolean +nv30_vertprog_parse_decl_output(struct nv30_vpc *vpc, + const struct tgsi_full_declaration *fdec) +{ + int hw; + + switch (fdec->Semantic.SemanticName) { + case TGSI_SEMANTIC_POSITION: + hw = NV30_VP_INST_DEST_POS; + break; + case TGSI_SEMANTIC_COLOR: + if (fdec->Semantic.SemanticIndex == 0) { + hw = NV30_VP_INST_DEST_COL0; + } else + if (fdec->Semantic.SemanticIndex == 1) { + hw = NV30_VP_INST_DEST_COL1; + } else { + NOUVEAU_ERR("bad colour semantic index\n"); + return FALSE; + } + break; + case TGSI_SEMANTIC_BCOLOR: + if (fdec->Semantic.SemanticIndex == 0) { + hw = NV30_VP_INST_DEST_BFC0; + } else + if (fdec->Semantic.SemanticIndex == 1) { + hw = NV30_VP_INST_DEST_BFC1; + } else { + NOUVEAU_ERR("bad bcolour semantic index\n"); + return FALSE; + } + break; + case TGSI_SEMANTIC_FOG: + hw = NV30_VP_INST_DEST_FOGC; + break; + case TGSI_SEMANTIC_PSIZE: + hw = NV30_VP_INST_DEST_PSZ; + break; + case TGSI_SEMANTIC_GENERIC: + if (fdec->Semantic.SemanticIndex <= 7) { + hw = NV30_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex); + } else { + NOUVEAU_ERR("bad generic semantic index\n"); + return FALSE; + } + break; + default: + NOUVEAU_ERR("bad output semantic\n"); + return FALSE; + } + + vpc->output_map[fdec->DeclarationRange.First] = hw; + return TRUE; +} + +static boolean +nv30_vertprog_prepare(struct nv30_vpc *vpc) +{ + struct tgsi_parse_context p; + int nr_imm = 0; + + tgsi_parse_init(&p, vpc->vp->pipe.tokens); + while (!tgsi_parse_end_of_tokens(&p)) { + const union tgsi_full_token *tok = &p.FullToken; + + tgsi_parse_token(&p); + switch(tok->Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: + nr_imm++; + break; + default: + break; + } + } + tgsi_parse_free(&p); + + if (nr_imm) { + vpc->imm = CALLOC(nr_imm, sizeof(struct nv30_sreg)); + assert(vpc->imm); + } + + return TRUE; +} + +static void +nv30_vertprog_translate(struct nv30_context *nv30, + struct nv30_vertex_program *vp) +{ + struct tgsi_parse_context parse; + struct nv30_vpc *vpc = NULL; + + tgsi_dump(vp->pipe.tokens,0); + + vpc = CALLOC(1, sizeof(struct nv30_vpc)); + if (!vpc) + return; + vpc->vp = vp; + vpc->high_temp = -1; + + if (!nv30_vertprog_prepare(vpc)) { + FREE(vpc); + return; + } + + tgsi_parse_init(&parse, vp->pipe.tokens); + + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + { + const struct tgsi_full_declaration *fdec; + fdec = &parse.FullToken.FullDeclaration; + switch (fdec->Declaration.File) { + case TGSI_FILE_OUTPUT: + if (!nv30_vertprog_parse_decl_output(vpc, fdec)) + goto out_err; + break; + default: + break; + } + } + break; + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + const struct tgsi_full_immediate *imm; + + imm = &parse.FullToken.FullImmediate; + assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); + assert(imm->Immediate.NrTokens == 4 + 1); + vpc->imm[vpc->nr_imm++] = + constant(vpc, -1, + imm->u.ImmediateFloat32[0].Float, + imm->u.ImmediateFloat32[1].Float, + imm->u.ImmediateFloat32[2].Float, + imm->u.ImmediateFloat32[3].Float); + } + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + const struct tgsi_full_instruction *finst; + finst = &parse.FullToken.FullInstruction; + if (!nv30_vertprog_parse_instruction(vpc, finst)) + goto out_err; + } + break; + default: + break; + } + } + + vp->insns[vp->nr_insns - 1].data[3] |= NV30_VP_INST_LAST; + vp->translated = TRUE; +out_err: + tgsi_parse_free(&parse); + FREE(vpc); +} + +static boolean +nv30_vertprog_validate(struct nv30_context *nv30) +{ + struct nouveau_winsys *nvws = nv30->nvws; + struct pipe_winsys *ws = nv30->pipe.winsys; + struct nouveau_grobj *rankine = nv30->screen->rankine; + struct nv30_vertex_program *vp; + struct pipe_buffer *constbuf; + boolean upload_code = FALSE, upload_data = FALSE; + int i; + + vp = nv30->vertprog; + constbuf = nv30->constbuf[PIPE_SHADER_VERTEX]; + + /* Translate TGSI shader into hw bytecode */ + if (!vp->translated) { + nv30_vertprog_translate(nv30, vp); + if (!vp->translated) + return FALSE; + } + + /* Allocate hw vtxprog exec slots */ + if (!vp->exec) { + struct nouveau_resource *heap = nv30->screen->vp_exec_heap; + struct nouveau_stateobj *so; + uint vplen = vp->nr_insns; + + if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) { + while (heap->next && heap->size < vplen) { + struct nv30_vertex_program *evict; + + evict = heap->next->priv; + nvws->res_free(&evict->exec); + } + + if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) + assert(0); + } + + so = so_new(2, 0); + so_method(so, rankine, NV34TCL_VP_START_FROM_ID, 1); + so_data (so, vp->exec->start); + so_ref(so, &vp->so); + + upload_code = TRUE; + } + + /* Allocate hw vtxprog const slots */ + if (vp->nr_consts && !vp->data) { + struct nouveau_resource *heap = nv30->screen->vp_data_heap; + + if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) { + while (heap->next && heap->size < vp->nr_consts) { + struct nv30_vertex_program *evict; + + evict = heap->next->priv; + nvws->res_free(&evict->data); + } + + if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) + assert(0); + } + + /*XXX: handle this some day */ + assert(vp->data->start >= vp->data_start_min); + + upload_data = TRUE; + if (vp->data_start != vp->data->start) + upload_code = TRUE; + } + + /* If exec or data segments moved we need to patch the program to + * fixup offsets and register IDs. + */ + if (vp->exec_start != vp->exec->start) { + for (i = 0; i < vp->nr_insns; i++) { + struct nv30_vertex_program_exec *vpi = &vp->insns[i]; + + if (vpi->has_branch_offset) { + assert(0); + } + } + + vp->exec_start = vp->exec->start; + } + + if (vp->nr_consts && vp->data_start != vp->data->start) { + for (i = 0; i < vp->nr_insns; i++) { + struct nv30_vertex_program_exec *vpi = &vp->insns[i]; + + if (vpi->const_index >= 0) { + vpi->data[1] &= ~NV30_VP_INST_CONST_SRC_MASK; + vpi->data[1] |= + (vpi->const_index + vp->data->start) << + NV30_VP_INST_CONST_SRC_SHIFT; + + } + } + + vp->data_start = vp->data->start; + } + + /* Update + Upload constant values */ + if (vp->nr_consts) { + float *map = NULL; + + if (constbuf) { + map = ws->buffer_map(ws, constbuf, + PIPE_BUFFER_USAGE_CPU_READ); + } + + for (i = 0; i < vp->nr_consts; i++) { + struct nv30_vertex_program_data *vpd = &vp->consts[i]; + + if (vpd->index >= 0) { + if (!upload_data && + !memcmp(vpd->value, &map[vpd->index * 4], + 4 * sizeof(float))) + continue; + memcpy(vpd->value, &map[vpd->index * 4], + 4 * sizeof(float)); + } + + BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_CONST_ID, 5); + OUT_RING (i + vp->data->start); + OUT_RINGp ((uint32_t *)vpd->value, 4); + } + + if (constbuf) { + ws->buffer_unmap(ws, constbuf); + } + } + + /* Upload vtxprog */ + if (upload_code) { +#if 0 + for (i = 0; i < vp->nr_insns; i++) { + NOUVEAU_MSG("VP inst %d: 0x%08x 0x%08x 0x%08x 0x%08x\n", + i, vp->insns[i].data[0], vp->insns[i].data[1], + vp->insns[i].data[2], vp->insns[i].data[3]); + } +#endif + BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_FROM_ID, 1); + OUT_RING (vp->exec->start); + for (i = 0; i < vp->nr_insns; i++) { + BEGIN_RING(rankine, NV34TCL_VP_UPLOAD_INST(0), 4); + OUT_RINGp (vp->insns[i].data, 4); + } + } + + if (vp->so != nv30->state.hw[NV30_STATE_VERTPROG]) { + so_ref(vp->so, &nv30->state.hw[NV30_STATE_VERTPROG]); + return TRUE; + } + + return FALSE; +} + +void +nv30_vertprog_destroy(struct nv30_context *nv30, struct nv30_vertex_program *vp) +{ + struct nouveau_winsys *nvws = nv30->screen->nvws; + + vp->translated = FALSE; + + if (vp->nr_insns) { + FREE(vp->insns); + vp->insns = NULL; + vp->nr_insns = 0; + } + + if (vp->nr_consts) { + FREE(vp->consts); + vp->consts = NULL; + vp->nr_consts = 0; + } + + nvws->res_free(&vp->exec); + vp->exec_start = 0; + nvws->res_free(&vp->data); + vp->data_start = 0; + vp->data_start_min = 0; + + vp->ir = vp->or = 0; + so_ref(NULL, &vp->so); +} + +struct nv30_state_entry nv30_state_vertprog = { + .validate = nv30_vertprog_validate, + .dirty = { + .pipe = NV30_NEW_VERTPROG /*| NV30_NEW_UCP*/, + .hw = NV30_STATE_VERTPROG, + } +}; diff --git a/src/gallium/drivers/nv40/Makefile b/src/gallium/drivers/nv40/Makefile new file mode 100644 index 0000000000..8c738aefa6 --- /dev/null +++ b/src/gallium/drivers/nv40/Makefile @@ -0,0 +1,28 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = nv40 + +C_SOURCES = \ + nv40_clear.c \ + nv40_context.c \ + nv40_draw.c \ + nv40_fragprog.c \ + nv40_fragtex.c \ + nv40_miptree.c \ + nv40_query.c \ + nv40_screen.c \ + nv40_state.c \ + nv40_state_blend.c \ + nv40_state_emit.c \ + nv40_state_fb.c \ + nv40_state_rasterizer.c \ + nv40_state_scissor.c \ + nv40_state_stipple.c \ + nv40_state_viewport.c \ + nv40_state_zsa.c \ + nv40_surface.c \ + nv40_vbo.c \ + nv40_vertprog.c + +include ../../Makefile.template diff --git a/src/gallium/drivers/nv40/nv40_clear.c b/src/gallium/drivers/nv40/nv40_clear.c new file mode 100644 index 0000000000..59efd620e3 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_clear.c @@ -0,0 +1,13 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "nv40_context.h" + +void +nv40_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue) +{ + pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, clearValue); + ps->status = PIPE_SURFACE_STATUS_CLEAR; +} diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c new file mode 100644 index 0000000000..5d325f5067 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_context.c @@ -0,0 +1,72 @@ +#include "draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/internal/p_winsys_screen.h" + +#include "nv40_context.h" +#include "nv40_screen.h" + +static void +nv40_flush(struct pipe_context *pipe, unsigned flags, + struct pipe_fence_handle **fence) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + if (flags & PIPE_FLUSH_TEXTURE_CACHE) { + BEGIN_RING(curie, 0x1fd8, 1); + OUT_RING (2); + BEGIN_RING(curie, 0x1fd8, 1); + OUT_RING (1); + } + + FIRE_RING(fence); +} + +static void +nv40_destroy(struct pipe_context *pipe) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + if (nv40->draw) + draw_destroy(nv40->draw); + FREE(nv40); +} + +struct pipe_context * +nv40_create(struct pipe_screen *pscreen, unsigned pctx_id) +{ + struct nv40_screen *screen = nv40_screen(pscreen); + struct pipe_winsys *ws = pscreen->winsys; + struct nv40_context *nv40; + struct nouveau_winsys *nvws = screen->nvws; + + nv40 = CALLOC(1, sizeof(struct nv40_context)); + if (!nv40) + return NULL; + nv40->screen = screen; + nv40->pctx_id = pctx_id; + + nv40->nvws = nvws; + + nv40->pipe.winsys = ws; + nv40->pipe.screen = pscreen; + nv40->pipe.destroy = nv40_destroy; + nv40->pipe.draw_arrays = nv40_draw_arrays; + nv40->pipe.draw_elements = nv40_draw_elements; + nv40->pipe.clear = nv40_clear; + nv40->pipe.flush = nv40_flush; + + nv40_init_query_functions(nv40); + nv40_init_surface_functions(nv40); + nv40_init_state_functions(nv40); + + /* Create, configure, and install fallback swtnl path */ + nv40->draw = draw_create(); + draw_wide_point_threshold(nv40->draw, 9999999.0); + draw_wide_line_threshold(nv40->draw, 9999999.0); + draw_enable_line_stipple(nv40->draw, FALSE); + draw_enable_point_sprites(nv40->draw, FALSE); + draw_set_rasterize_stage(nv40->draw, nv40_draw_render_stage(nv40)); + + return &nv40->pipe; +} + diff --git a/src/gallium/drivers/nv40/nv40_context.h b/src/gallium/drivers/nv40/nv40_context.h new file mode 100644 index 0000000000..adcfbdd85a --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_context.h @@ -0,0 +1,233 @@ +#ifndef __NV40_CONTEXT_H__ +#define __NV40_CONTEXT_H__ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_compiler.h" + +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "draw/draw_vertex.h" + +#include "nouveau/nouveau_winsys.h" +#include "nouveau/nouveau_gldefs.h" + +#define NOUVEAU_PUSH_CONTEXT(ctx) \ + struct nv40_screen *ctx = nv40->screen +#include "nouveau/nouveau_push.h" +#include "nouveau/nouveau_stateobj.h" + +#include "nv40_state.h" + +#define NOUVEAU_ERR(fmt, args...) \ + fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); +#define NOUVEAU_MSG(fmt, args...) \ + fprintf(stderr, "nouveau: "fmt, ##args); + +enum nv40_state_index { + NV40_STATE_FB = 0, + NV40_STATE_VIEWPORT = 1, + NV40_STATE_BLEND = 2, + NV40_STATE_RAST = 3, + NV40_STATE_ZSA = 4, + NV40_STATE_BCOL = 5, + NV40_STATE_CLIP = 6, + NV40_STATE_SCISSOR = 7, + NV40_STATE_STIPPLE = 8, + NV40_STATE_FRAGPROG = 9, + NV40_STATE_VERTPROG = 10, + NV40_STATE_FRAGTEX0 = 11, + NV40_STATE_FRAGTEX1 = 12, + NV40_STATE_FRAGTEX2 = 13, + NV40_STATE_FRAGTEX3 = 14, + NV40_STATE_FRAGTEX4 = 15, + NV40_STATE_FRAGTEX5 = 16, + NV40_STATE_FRAGTEX6 = 17, + NV40_STATE_FRAGTEX7 = 18, + NV40_STATE_FRAGTEX8 = 19, + NV40_STATE_FRAGTEX9 = 20, + NV40_STATE_FRAGTEX10 = 21, + NV40_STATE_FRAGTEX11 = 22, + NV40_STATE_FRAGTEX12 = 23, + NV40_STATE_FRAGTEX13 = 24, + NV40_STATE_FRAGTEX14 = 25, + NV40_STATE_FRAGTEX15 = 26, + NV40_STATE_VERTTEX0 = 27, + NV40_STATE_VERTTEX1 = 28, + NV40_STATE_VERTTEX2 = 29, + NV40_STATE_VERTTEX3 = 30, + NV40_STATE_VTXBUF = 31, + NV40_STATE_VTXFMT = 32, + NV40_STATE_VTXATTR = 33, + NV40_STATE_MAX = 34 +}; + +#include "nv40_screen.h" + +#define NV40_NEW_BLEND (1 << 0) +#define NV40_NEW_RAST (1 << 1) +#define NV40_NEW_ZSA (1 << 2) +#define NV40_NEW_SAMPLER (1 << 3) +#define NV40_NEW_FB (1 << 4) +#define NV40_NEW_STIPPLE (1 << 5) +#define NV40_NEW_SCISSOR (1 << 6) +#define NV40_NEW_VIEWPORT (1 << 7) +#define NV40_NEW_BCOL (1 << 8) +#define NV40_NEW_VERTPROG (1 << 9) +#define NV40_NEW_FRAGPROG (1 << 10) +#define NV40_NEW_ARRAYS (1 << 11) +#define NV40_NEW_UCP (1 << 12) + +struct nv40_rasterizer_state { + struct pipe_rasterizer_state pipe; + struct nouveau_stateobj *so; +}; + +struct nv40_zsa_state { + struct pipe_depth_stencil_alpha_state pipe; + struct nouveau_stateobj *so; +}; + +struct nv40_blend_state { + struct pipe_blend_state pipe; + struct nouveau_stateobj *so; +}; + + +struct nv40_state { + unsigned scissor_enabled; + unsigned stipple_enabled; + unsigned viewport_bypass; + unsigned fp_samplers; + + uint64_t dirty; + struct nouveau_stateobj *hw[NV40_STATE_MAX]; +}; + +struct nv40_context { + struct pipe_context pipe; + + struct nouveau_winsys *nvws; + struct nv40_screen *screen; + unsigned pctx_id; + + struct draw_context *draw; + + /* HW state derived from pipe states */ + struct nv40_state state; + struct { + struct nv40_vertex_program *vertprog; + + unsigned nr_attribs; + unsigned hw[PIPE_MAX_SHADER_INPUTS]; + unsigned draw[PIPE_MAX_SHADER_INPUTS]; + unsigned emit[PIPE_MAX_SHADER_INPUTS]; + } swtnl; + + enum { + HW, SWTNL, SWRAST + } render_mode; + unsigned fallback_swtnl; + unsigned fallback_swrast; + + /* Context state */ + unsigned dirty, draw_dirty; + struct pipe_scissor_state scissor; + unsigned stipple[32]; + struct pipe_clip_state clip; + struct nv40_vertex_program *vertprog; + struct nv40_fragment_program *fragprog; + struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; + unsigned constbuf_nr[PIPE_SHADER_TYPES]; + struct nv40_rasterizer_state *rasterizer; + struct nv40_zsa_state *zsa; + struct nv40_blend_state *blend; + struct pipe_blend_color blend_colour; + struct pipe_viewport_state viewport; + struct pipe_framebuffer_state framebuffer; + struct pipe_buffer *idxbuf; + unsigned idxbuf_format; + struct nv40_sampler_state *tex_sampler[PIPE_MAX_SAMPLERS]; + struct nv40_miptree *tex_miptree[PIPE_MAX_SAMPLERS]; + unsigned nr_samplers; + unsigned nr_textures; + unsigned dirty_samplers; + struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; + unsigned vtxbuf_nr; + struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS]; + unsigned vtxelt_nr; + const unsigned *edgeflags; +}; + +static INLINE struct nv40_context * +nv40_context(struct pipe_context *pipe) +{ + return (struct nv40_context *)pipe; +} + +struct nv40_state_entry { + boolean (*validate)(struct nv40_context *nv40); + struct { + unsigned pipe; + unsigned hw; + } dirty; +}; + +extern void nv40_init_state_functions(struct nv40_context *nv40); +extern void nv40_init_surface_functions(struct nv40_context *nv40); +extern void nv40_init_query_functions(struct nv40_context *nv40); + +extern void nv40_screen_init_miptree_functions(struct pipe_screen *pscreen); + +/* nv40_draw.c */ +extern struct draw_stage *nv40_draw_render_stage(struct nv40_context *nv40); +extern boolean nv40_draw_elements_swtnl(struct pipe_context *pipe, + struct pipe_buffer *idxbuf, + unsigned ib_size, unsigned mode, + unsigned start, unsigned count); + +/* nv40_vertprog.c */ +extern void nv40_vertprog_destroy(struct nv40_context *, + struct nv40_vertex_program *); + +/* nv40_fragprog.c */ +extern void nv40_fragprog_destroy(struct nv40_context *, + struct nv40_fragment_program *); + +/* nv40_fragtex.c */ +extern void nv40_fragtex_bind(struct nv40_context *); + +/* nv40_state.c and friends */ +extern boolean nv40_state_validate(struct nv40_context *nv40); +extern boolean nv40_state_validate_swtnl(struct nv40_context *nv40); +extern void nv40_state_emit(struct nv40_context *nv40); +extern struct nv40_state_entry nv40_state_rasterizer; +extern struct nv40_state_entry nv40_state_scissor; +extern struct nv40_state_entry nv40_state_stipple; +extern struct nv40_state_entry nv40_state_fragprog; +extern struct nv40_state_entry nv40_state_vertprog; +extern struct nv40_state_entry nv40_state_blend; +extern struct nv40_state_entry nv40_state_blend_colour; +extern struct nv40_state_entry nv40_state_zsa; +extern struct nv40_state_entry nv40_state_viewport; +extern struct nv40_state_entry nv40_state_framebuffer; +extern struct nv40_state_entry nv40_state_fragtex; +extern struct nv40_state_entry nv40_state_vbo; +extern struct nv40_state_entry nv40_state_vtxfmt; + +/* nv40_vbo.c */ +extern boolean nv40_draw_arrays(struct pipe_context *, unsigned mode, + unsigned start, unsigned count); +extern boolean nv40_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, + unsigned count); + +/* nv40_clear.c */ +extern void nv40_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue); + +#endif diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c new file mode 100644 index 0000000000..c83ff91d7e --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_draw.c @@ -0,0 +1,349 @@ +#include "pipe/p_shader_tokens.h" + +#include "util/u_pack_color.h" + +#include "draw/draw_context.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pipe.h" + +#include "nv40_context.h" +#define NV40_SHADER_NO_FUCKEDNESS +#include "nv40_shader.h" + +/* Simple, but crappy, swtnl path, hopefully we wont need to hit this very + * often at all. Uses "quadro style" vertex submission + a fixed vertex + * layout to avoid the need to generate a vertex program or vtxfmt. + */ + +struct nv40_render_stage { + struct draw_stage stage; + struct nv40_context *nv40; + unsigned prim; +}; + +static INLINE struct nv40_render_stage * +nv40_render_stage(struct draw_stage *stage) +{ + return (struct nv40_render_stage *)stage; +} + +static INLINE void +nv40_render_vertex(struct nv40_context *nv40, const struct vertex_header *v) +{ + unsigned i; + + for (i = 0; i < nv40->swtnl.nr_attribs; i++) { + unsigned idx = nv40->swtnl.draw[i]; + unsigned hw = nv40->swtnl.hw[i]; + + switch (nv40->swtnl.emit[i]) { + case EMIT_OMIT: + break; + case EMIT_1F: + BEGIN_RING(curie, NV40TCL_VTX_ATTR_1F(hw), 1); + OUT_RING (fui(v->data[idx][0])); + break; + case EMIT_2F: + BEGIN_RING(curie, NV40TCL_VTX_ATTR_2F_X(hw), 2); + OUT_RING (fui(v->data[idx][0])); + OUT_RING (fui(v->data[idx][1])); + break; + case EMIT_3F: + BEGIN_RING(curie, NV40TCL_VTX_ATTR_3F_X(hw), 3); + OUT_RING (fui(v->data[idx][0])); + OUT_RING (fui(v->data[idx][1])); + OUT_RING (fui(v->data[idx][2])); + break; + case EMIT_4F: + BEGIN_RING(curie, NV40TCL_VTX_ATTR_4F_X(hw), 4); + OUT_RING (fui(v->data[idx][0])); + OUT_RING (fui(v->data[idx][1])); + OUT_RING (fui(v->data[idx][2])); + OUT_RING (fui(v->data[idx][3])); + break; + case EMIT_4UB: + BEGIN_RING(curie, NV40TCL_VTX_ATTR_4UB(hw), 1); + OUT_RING (pack_ub4(float_to_ubyte(v->data[idx][0]), + float_to_ubyte(v->data[idx][1]), + float_to_ubyte(v->data[idx][2]), + float_to_ubyte(v->data[idx][3]))); + break; + default: + assert(0); + break; + } + } +} + +static INLINE void +nv40_render_prim(struct draw_stage *stage, struct prim_header *prim, + unsigned mode, unsigned count) +{ + struct nv40_render_stage *rs = nv40_render_stage(stage); + struct nv40_context *nv40 = rs->nv40; + struct nouveau_pushbuf *pb = nv40->nvws->channel->pushbuf; + unsigned i; + + /* Ensure there's room for 4xfloat32 + potentially 3 begin/end */ + if (pb->remaining < ((count * 20) + 6)) { + if (rs->prim != NV40TCL_BEGIN_END_STOP) { + NOUVEAU_ERR("AIII, missed flush\n"); + assert(0); + } + FIRE_RING(NULL); + nv40_state_emit(nv40); + } + + /* Switch primitive modes if necessary */ + if (rs->prim != mode) { + if (rs->prim != NV40TCL_BEGIN_END_STOP) { + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (NV40TCL_BEGIN_END_STOP); + } + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (mode); + rs->prim = mode; + } + + /* Emit vertex data */ + for (i = 0; i < count; i++) + nv40_render_vertex(nv40, prim->v[i]); + + /* If it's likely we'll need to empty the push buffer soon, finish + * off the primitive now. + */ + if (pb->remaining < ((count * 20) + 6)) { + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (NV40TCL_BEGIN_END_STOP); + rs->prim = NV40TCL_BEGIN_END_STOP; + } +} + +static void +nv40_render_point(struct draw_stage *draw, struct prim_header *prim) +{ + nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_POINTS, 1); +} + +static void +nv40_render_line(struct draw_stage *draw, struct prim_header *prim) +{ + nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_LINES, 2); +} + +static void +nv40_render_tri(struct draw_stage *draw, struct prim_header *prim) +{ + nv40_render_prim(draw, prim, NV40TCL_BEGIN_END_TRIANGLES, 3); +} + +static void +nv40_render_flush(struct draw_stage *draw, unsigned flags) +{ + struct nv40_render_stage *rs = nv40_render_stage(draw); + struct nv40_context *nv40 = rs->nv40; + + if (rs->prim != NV40TCL_BEGIN_END_STOP) { + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (NV40TCL_BEGIN_END_STOP); + rs->prim = NV40TCL_BEGIN_END_STOP; + } +} + +static void +nv40_render_reset_stipple_counter(struct draw_stage *draw) +{ +} + +static void +nv40_render_destroy(struct draw_stage *draw) +{ + FREE(draw); +} + +static INLINE void +emit_mov(struct nv40_vertex_program *vp, + unsigned dst, unsigned src, unsigned vor, unsigned mask) +{ + struct nv40_vertex_program_exec *inst; + + vp->insns = realloc(vp->insns, + sizeof(struct nv40_vertex_program_exec) * + ++vp->nr_insns); + inst = &vp->insns[vp->nr_insns - 1]; + + inst->data[0] = 0x401f9c6c; + inst->data[1] = 0x0040000d | (src << 8); + inst->data[2] = 0x8106c083; + inst->data[3] = 0x6041ff80 | (dst << 2) | (mask << 13); + inst->const_index = -1; + inst->has_branch_offset = FALSE; + + vp->ir |= (1 << src); + if (vor != ~0) + vp->or |= (1 << vor); +} + +static struct nv40_vertex_program * +create_drawvp(struct nv40_context *nv40) +{ + struct nv40_vertex_program *vp = CALLOC_STRUCT(nv40_vertex_program); + unsigned i; + + emit_mov(vp, NV40_VP_INST_DEST_POS, 0, ~0, 0xf); + emit_mov(vp, NV40_VP_INST_DEST_COL0, 3, 0, 0xf); + emit_mov(vp, NV40_VP_INST_DEST_COL1, 4, 1, 0xf); + emit_mov(vp, NV40_VP_INST_DEST_BFC0, 3, 2, 0xf); + emit_mov(vp, NV40_VP_INST_DEST_BFC1, 4, 3, 0xf); + emit_mov(vp, NV40_VP_INST_DEST_FOGC, 5, 4, 0x8); + for (i = 0; i < 8; i++) + emit_mov(vp, NV40_VP_INST_DEST_TC(i), 8 + i, 14 + i, 0xf); + + vp->insns[vp->nr_insns - 1].data[3] |= 1; + vp->translated = TRUE; + return vp; +} + +struct draw_stage * +nv40_draw_render_stage(struct nv40_context *nv40) +{ + struct nv40_render_stage *render = CALLOC_STRUCT(nv40_render_stage); + + if (!nv40->swtnl.vertprog) + nv40->swtnl.vertprog = create_drawvp(nv40); + + render->nv40 = nv40; + render->stage.draw = nv40->draw; + render->stage.point = nv40_render_point; + render->stage.line = nv40_render_line; + render->stage.tri = nv40_render_tri; + render->stage.flush = nv40_render_flush; + render->stage.reset_stipple_counter = nv40_render_reset_stipple_counter; + render->stage.destroy = nv40_render_destroy; + + return &render->stage; +} + +boolean +nv40_draw_elements_swtnl(struct pipe_context *pipe, + struct pipe_buffer *idxbuf, unsigned idxbuf_size, + unsigned mode, unsigned start, unsigned count) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct pipe_winsys *ws = pipe->winsys; + unsigned i; + void *map; + + if (!nv40_state_validate_swtnl(nv40)) + return FALSE; + nv40->state.dirty &= ~(1ULL << NV40_STATE_VTXBUF); + nv40_state_emit(nv40); + + for (i = 0; i < nv40->vtxbuf_nr; i++) { + map = ws->buffer_map(ws, nv40->vtxbuf[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_vertex_buffer(nv40->draw, i, map); + } + + if (idxbuf) { + map = ws->buffer_map(ws, idxbuf, PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer(nv40->draw, idxbuf_size, map); + } else { + draw_set_mapped_element_buffer(nv40->draw, 0, NULL); + } + + if (nv40->constbuf[PIPE_SHADER_VERTEX]) { + const unsigned nr = nv40->constbuf_nr[PIPE_SHADER_VERTEX]; + + map = ws->buffer_map(ws, nv40->constbuf[PIPE_SHADER_VERTEX], + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_constant_buffer(nv40->draw, map, nr); + } + + draw_arrays(nv40->draw, mode, start, count); + + for (i = 0; i < nv40->vtxbuf_nr; i++) + ws->buffer_unmap(ws, nv40->vtxbuf[i].buffer); + + if (idxbuf) + ws->buffer_unmap(ws, idxbuf); + + if (nv40->constbuf[PIPE_SHADER_VERTEX]) + ws->buffer_unmap(ws, nv40->constbuf[PIPE_SHADER_VERTEX]); + + draw_flush(nv40->draw); + pipe->flush(pipe, 0, NULL); + + return TRUE; +} + +static INLINE void +emit_attrib(struct nv40_context *nv40, unsigned hw, unsigned emit, + unsigned semantic, unsigned index) +{ + unsigned draw_out = draw_find_vs_output(nv40->draw, semantic, index); + unsigned a = nv40->swtnl.nr_attribs++; + + nv40->swtnl.hw[a] = hw; + nv40->swtnl.emit[a] = emit; + nv40->swtnl.draw[a] = draw_out; +} + +static boolean +nv40_state_vtxfmt_validate(struct nv40_context *nv40) +{ + struct nv40_fragment_program *fp = nv40->fragprog; + unsigned colour = 0, texcoords = 0, fog = 0, i; + + /* Determine needed fragprog inputs */ + for (i = 0; i < fp->info.num_inputs; i++) { + switch (fp->info.input_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + break; + case TGSI_SEMANTIC_COLOR: + colour |= (1 << fp->info.input_semantic_index[i]); + break; + case TGSI_SEMANTIC_GENERIC: + texcoords |= (1 << fp->info.input_semantic_index[i]); + break; + case TGSI_SEMANTIC_FOG: + fog = 1; + break; + default: + assert(0); + } + } + + nv40->swtnl.nr_attribs = 0; + + /* Map draw vtxprog output to hw attribute IDs */ + for (i = 0; i < 2; i++) { + if (!(colour & (1 << i))) + continue; + emit_attrib(nv40, 3 + i, EMIT_4UB, TGSI_SEMANTIC_COLOR, i); + } + + for (i = 0; i < 8; i++) { + if (!(texcoords & (1 << i))) + continue; + emit_attrib(nv40, 8 + i, EMIT_4F, TGSI_SEMANTIC_GENERIC, i); + } + + if (fog) { + emit_attrib(nv40, 5, EMIT_1F, TGSI_SEMANTIC_FOG, 0); + } + + emit_attrib(nv40, 0, EMIT_3F, TGSI_SEMANTIC_POSITION, 0); + + return FALSE; +} + +struct nv40_state_entry nv40_state_vtxfmt = { + .validate = nv40_state_vtxfmt_validate, + .dirty = { + .pipe = NV40_NEW_ARRAYS | NV40_NEW_FRAGPROG, + .hw = 0 + } +}; + diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c new file mode 100644 index 0000000000..91dcbebda0 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -0,0 +1,991 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" + +#include "nv40_context.h" + +#define SWZ_X 0 +#define SWZ_Y 1 +#define SWZ_Z 2 +#define SWZ_W 3 +#define MASK_X 1 +#define MASK_Y 2 +#define MASK_Z 4 +#define MASK_W 8 +#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) +#define DEF_SCALE NV40_FP_OP_DST_SCALE_1X +#define DEF_CTEST NV40_FP_OP_COND_TR +#include "nv40_shader.h" + +#define swz(s,x,y,z,w) nv40_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) +#define neg(s) nv40_sr_neg((s)) +#define abs(s) nv40_sr_abs((s)) +#define scale(s,v) nv40_sr_scale((s), NV40_FP_OP_DST_SCALE_##v) + +#define MAX_CONSTS 128 +#define MAX_IMM 32 +struct nv40_fpc { + struct nv40_fragment_program *fp; + + uint attrib_map[PIPE_MAX_SHADER_INPUTS]; + + unsigned r_temps; + unsigned r_temps_discard; + struct nv40_sreg r_result[PIPE_MAX_SHADER_OUTPUTS]; + struct nv40_sreg *r_temp; + + int num_regs; + + unsigned inst_offset; + unsigned have_const; + + struct { + int pipe; + float vals[4]; + } consts[MAX_CONSTS]; + int nr_consts; + + struct nv40_sreg imm[MAX_IMM]; + unsigned nr_imm; +}; + +static INLINE struct nv40_sreg +temp(struct nv40_fpc *fpc) +{ + int idx = ffs(~fpc->r_temps) - 1; + + if (idx < 0) { + NOUVEAU_ERR("out of temps!!\n"); + assert(0); + return nv40_sr(NV40SR_TEMP, 0); + } + + fpc->r_temps |= (1 << idx); + fpc->r_temps_discard |= (1 << idx); + return nv40_sr(NV40SR_TEMP, idx); +} + +static INLINE void +release_temps(struct nv40_fpc *fpc) +{ + fpc->r_temps &= ~fpc->r_temps_discard; + fpc->r_temps_discard = 0; +} + +static INLINE struct nv40_sreg +constant(struct nv40_fpc *fpc, int pipe, float vals[4]) +{ + int idx; + + if (fpc->nr_consts == MAX_CONSTS) + assert(0); + idx = fpc->nr_consts++; + + fpc->consts[idx].pipe = pipe; + if (pipe == -1) + memcpy(fpc->consts[idx].vals, vals, 4 * sizeof(float)); + return nv40_sr(NV40SR_CONST, idx); +} + +#define arith(cc,s,o,d,m,s0,s1,s2) \ + nv40_fp_arith((cc), (s), NV40_FP_OP_OPCODE_##o, \ + (d), (m), (s0), (s1), (s2)) +#define tex(cc,s,o,u,d,m,s0,s1,s2) \ + nv40_fp_tex((cc), (s), NV40_FP_OP_OPCODE_##o, (u), \ + (d), (m), (s0), none, none) + +static void +grow_insns(struct nv40_fpc *fpc, int size) +{ + struct nv40_fragment_program *fp = fpc->fp; + + fp->insn_len += size; + fp->insn = realloc(fp->insn, sizeof(uint32_t) * fp->insn_len); +} + +static void +emit_src(struct nv40_fpc *fpc, int pos, struct nv40_sreg src) +{ + struct nv40_fragment_program *fp = fpc->fp; + uint32_t *hw = &fp->insn[fpc->inst_offset]; + uint32_t sr = 0; + + switch (src.type) { + case NV40SR_INPUT: + sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT); + hw[0] |= (src.index << NV40_FP_OP_INPUT_SRC_SHIFT); + break; + case NV40SR_OUTPUT: + sr |= NV40_FP_REG_SRC_HALF; + /* fall-through */ + case NV40SR_TEMP: + sr |= (NV40_FP_REG_TYPE_TEMP << NV40_FP_REG_TYPE_SHIFT); + sr |= (src.index << NV40_FP_REG_SRC_SHIFT); + break; + case NV40SR_CONST: + if (!fpc->have_const) { + grow_insns(fpc, 4); + fpc->have_const = 1; + } + + hw = &fp->insn[fpc->inst_offset]; + if (fpc->consts[src.index].pipe >= 0) { + struct nv40_fragment_program_data *fpd; + + fp->consts = realloc(fp->consts, ++fp->nr_consts * + sizeof(*fpd)); + fpd = &fp->consts[fp->nr_consts - 1]; + fpd->offset = fpc->inst_offset + 4; + fpd->index = fpc->consts[src.index].pipe; + memset(&fp->insn[fpd->offset], 0, sizeof(uint32_t) * 4); + } else { + memcpy(&fp->insn[fpc->inst_offset + 4], + fpc->consts[src.index].vals, + sizeof(uint32_t) * 4); + } + + sr |= (NV40_FP_REG_TYPE_CONST << NV40_FP_REG_TYPE_SHIFT); + break; + case NV40SR_NONE: + sr |= (NV40_FP_REG_TYPE_INPUT << NV40_FP_REG_TYPE_SHIFT); + break; + default: + assert(0); + } + + if (src.negate) + sr |= NV40_FP_REG_NEGATE; + + if (src.abs) + hw[1] |= (1 << (29 + pos)); + + sr |= ((src.swz[0] << NV40_FP_REG_SWZ_X_SHIFT) | + (src.swz[1] << NV40_FP_REG_SWZ_Y_SHIFT) | + (src.swz[2] << NV40_FP_REG_SWZ_Z_SHIFT) | + (src.swz[3] << NV40_FP_REG_SWZ_W_SHIFT)); + + hw[pos + 1] |= sr; +} + +static void +emit_dst(struct nv40_fpc *fpc, struct nv40_sreg dst) +{ + struct nv40_fragment_program *fp = fpc->fp; + uint32_t *hw = &fp->insn[fpc->inst_offset]; + + switch (dst.type) { + case NV40SR_TEMP: + if (fpc->num_regs < (dst.index + 1)) + fpc->num_regs = dst.index + 1; + break; + case NV40SR_OUTPUT: + if (dst.index == 1) { + fp->fp_control |= 0xe; + } else { + hw[0] |= NV40_FP_OP_OUT_REG_HALF; + } + break; + case NV40SR_NONE: + hw[0] |= (1 << 30); + break; + default: + assert(0); + } + + hw[0] |= (dst.index << NV40_FP_OP_OUT_REG_SHIFT); +} + +static void +nv40_fp_arith(struct nv40_fpc *fpc, int sat, int op, + struct nv40_sreg dst, int mask, + struct nv40_sreg s0, struct nv40_sreg s1, struct nv40_sreg s2) +{ + struct nv40_fragment_program *fp = fpc->fp; + uint32_t *hw; + + fpc->inst_offset = fp->insn_len; + fpc->have_const = 0; + grow_insns(fpc, 4); + hw = &fp->insn[fpc->inst_offset]; + memset(hw, 0, sizeof(uint32_t) * 4); + + if (op == NV40_FP_OP_OPCODE_KIL) + fp->fp_control |= NV40TCL_FP_CONTROL_KIL; + hw[0] |= (op << NV40_FP_OP_OPCODE_SHIFT); + hw[0] |= (mask << NV40_FP_OP_OUTMASK_SHIFT); + hw[2] |= (dst.dst_scale << NV40_FP_OP_DST_SCALE_SHIFT); + + if (sat) + hw[0] |= NV40_FP_OP_OUT_SAT; + + if (dst.cc_update) + hw[0] |= NV40_FP_OP_COND_WRITE_ENABLE; + hw[1] |= (dst.cc_test << NV40_FP_OP_COND_SHIFT); + hw[1] |= ((dst.cc_swz[0] << NV40_FP_OP_COND_SWZ_X_SHIFT) | + (dst.cc_swz[1] << NV40_FP_OP_COND_SWZ_Y_SHIFT) | + (dst.cc_swz[2] << NV40_FP_OP_COND_SWZ_Z_SHIFT) | + (dst.cc_swz[3] << NV40_FP_OP_COND_SWZ_W_SHIFT)); + + emit_dst(fpc, dst); + emit_src(fpc, 0, s0); + emit_src(fpc, 1, s1); + emit_src(fpc, 2, s2); +} + +static void +nv40_fp_tex(struct nv40_fpc *fpc, int sat, int op, int unit, + struct nv40_sreg dst, int mask, + struct nv40_sreg s0, struct nv40_sreg s1, struct nv40_sreg s2) +{ + struct nv40_fragment_program *fp = fpc->fp; + + nv40_fp_arith(fpc, sat, op, dst, mask, s0, s1, s2); + + fp->insn[fpc->inst_offset] |= (unit << NV40_FP_OP_TEX_UNIT_SHIFT); + fp->samplers |= (1 << unit); +} + +static INLINE struct nv40_sreg +tgsi_src(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc) +{ + struct nv40_sreg src; + + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + src = nv40_sr(NV40SR_INPUT, + fpc->attrib_map[fsrc->SrcRegister.Index]); + break; + case TGSI_FILE_CONSTANT: + src = constant(fpc, fsrc->SrcRegister.Index, NULL); + break; + case TGSI_FILE_IMMEDIATE: + assert(fsrc->SrcRegister.Index < fpc->nr_imm); + src = fpc->imm[fsrc->SrcRegister.Index]; + break; + case TGSI_FILE_TEMPORARY: + src = fpc->r_temp[fsrc->SrcRegister.Index]; + break; + /* NV40 fragprog result regs are just temps, so this is simple */ + case TGSI_FILE_OUTPUT: + src = fpc->r_result[fsrc->SrcRegister.Index]; + break; + default: + NOUVEAU_ERR("bad src file\n"); + break; + } + + src.abs = fsrc->SrcRegisterExtMod.Absolute; + src.negate = fsrc->SrcRegister.Negate; + src.swz[0] = fsrc->SrcRegister.SwizzleX; + src.swz[1] = fsrc->SrcRegister.SwizzleY; + src.swz[2] = fsrc->SrcRegister.SwizzleZ; + src.swz[3] = fsrc->SrcRegister.SwizzleW; + return src; +} + +static INLINE struct nv40_sreg +tgsi_dst(struct nv40_fpc *fpc, const struct tgsi_full_dst_register *fdst) { + switch (fdst->DstRegister.File) { + case TGSI_FILE_OUTPUT: + return fpc->r_result[fdst->DstRegister.Index]; + case TGSI_FILE_TEMPORARY: + return fpc->r_temp[fdst->DstRegister.Index]; + case TGSI_FILE_NULL: + return nv40_sr(NV40SR_NONE, 0); + default: + NOUVEAU_ERR("bad dst file %d\n", fdst->DstRegister.File); + return nv40_sr(NV40SR_NONE, 0); + } +} + +static INLINE int +tgsi_mask(uint tgsi) +{ + int mask = 0; + + if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; + if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; + if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; + if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; + return mask; +} + +static boolean +src_native_swz(struct nv40_fpc *fpc, const struct tgsi_full_src_register *fsrc, + struct nv40_sreg *src) +{ + const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); + struct nv40_sreg tgsi = tgsi_src(fpc, fsrc); + uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0; + uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX, + fsrc->SrcRegisterExtSwz.NegateY, + fsrc->SrcRegisterExtSwz.NegateZ, + fsrc->SrcRegisterExtSwz.NegateW }; + uint c; + + for (c = 0; c < 4; c++) { + switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + mask |= (1 << c); + break; + case TGSI_EXTSWIZZLE_ZERO: + zero_mask |= (1 << c); + tgsi.swz[c] = SWZ_X; + break; + case TGSI_EXTSWIZZLE_ONE: + one_mask |= (1 << c); + tgsi.swz[c] = SWZ_X; + break; + default: + assert(0); + } + + if (!tgsi.negate && neg[c]) + neg_mask |= (1 << c); + } + + if (mask == MASK_ALL && !neg_mask) + return TRUE; + + *src = temp(fpc); + + if (mask) + arith(fpc, 0, MOV, *src, mask, tgsi, none, none); + + if (zero_mask) + arith(fpc, 0, SFL, *src, zero_mask, *src, none, none); + + if (one_mask) + arith(fpc, 0, STR, *src, one_mask, *src, none, none); + + if (neg_mask) { + struct nv40_sreg one = temp(fpc); + arith(fpc, 0, STR, one, neg_mask, one, none, none); + arith(fpc, 0, MUL, *src, neg_mask, *src, neg(one), none); + } + + return FALSE; +} + +static boolean +nv40_fragprog_parse_instruction(struct nv40_fpc *fpc, + const struct tgsi_full_instruction *finst) +{ + const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); + struct nv40_sreg src[3], dst, tmp; + int mask, sat, unit; + int ai = -1, ci = -1, ii = -1; + int i; + + if (finst->Instruction.Opcode == TGSI_OPCODE_END) + return TRUE; + + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->FullSrcRegisters[i]; + if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { + src[i] = tgsi_src(fpc, fsrc); + } + } + + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->FullSrcRegisters[i]; + + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + case TGSI_FILE_CONSTANT: + case TGSI_FILE_TEMPORARY: + if (!src_native_swz(fpc, fsrc, &src[i])) + continue; + break; + default: + break; + } + + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + if (ai == -1 || ai == fsrc->SrcRegister.Index) { + ai = fsrc->SrcRegister.Index; + src[i] = tgsi_src(fpc, fsrc); + } else { + src[i] = temp(fpc); + arith(fpc, 0, MOV, src[i], MASK_ALL, + tgsi_src(fpc, fsrc), none, none); + } + break; + case TGSI_FILE_CONSTANT: + if ((ci == -1 && ii == -1) || + ci == fsrc->SrcRegister.Index) { + ci = fsrc->SrcRegister.Index; + src[i] = tgsi_src(fpc, fsrc); + } else { + src[i] = temp(fpc); + arith(fpc, 0, MOV, src[i], MASK_ALL, + tgsi_src(fpc, fsrc), none, none); + } + break; + case TGSI_FILE_IMMEDIATE: + if ((ci == -1 && ii == -1) || + ii == fsrc->SrcRegister.Index) { + ii = fsrc->SrcRegister.Index; + src[i] = tgsi_src(fpc, fsrc); + } else { + src[i] = temp(fpc); + arith(fpc, 0, MOV, src[i], MASK_ALL, + tgsi_src(fpc, fsrc), none, none); + } + break; + case TGSI_FILE_TEMPORARY: + /* handled above */ + break; + case TGSI_FILE_SAMPLER: + unit = fsrc->SrcRegister.Index; + break; + case TGSI_FILE_OUTPUT: + break; + default: + NOUVEAU_ERR("bad src file\n"); + return FALSE; + } + } + + dst = tgsi_dst(fpc, &finst->FullDstRegisters[0]); + mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); + sat = (finst->Instruction.Saturate == TGSI_SAT_ZERO_ONE); + + switch (finst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + arith(fpc, sat, MOV, dst, mask, abs(src[0]), none, none); + break; + case TGSI_OPCODE_ADD: + arith(fpc, sat, ADD, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_CMP: + tmp = temp(fpc); + arith(fpc, sat, MOV, dst, mask, src[2], none, none); + tmp.cc_update = 1; + arith(fpc, 0, MOV, tmp, 0xf, src[0], none, none); + dst.cc_test = NV40_VP_INST_COND_LT; + arith(fpc, sat, MOV, dst, mask, src[1], none, none); + break; + case TGSI_OPCODE_COS: + arith(fpc, sat, COS, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_DDX: + if (mask & (MASK_Z | MASK_W)) { + tmp = temp(fpc); + arith(fpc, sat, DDX, tmp, MASK_X | MASK_Y, + swz(src[0], Z, W, Z, W), none, none); + arith(fpc, 0, MOV, tmp, MASK_Z | MASK_W, + swz(tmp, X, Y, X, Y), none, none); + arith(fpc, sat, DDX, tmp, MASK_X | MASK_Y, src[0], + none, none); + arith(fpc, 0, MOV, dst, mask, tmp, none, none); + } else { + arith(fpc, sat, DDX, dst, mask, src[0], none, none); + } + break; + case TGSI_OPCODE_DDY: + if (mask & (MASK_Z | MASK_W)) { + tmp = temp(fpc); + arith(fpc, sat, DDY, tmp, MASK_X | MASK_Y, + swz(src[0], Z, W, Z, W), none, none); + arith(fpc, 0, MOV, tmp, MASK_Z | MASK_W, + swz(tmp, X, Y, X, Y), none, none); + arith(fpc, sat, DDY, tmp, MASK_X | MASK_Y, src[0], + none, none); + arith(fpc, 0, MOV, dst, mask, tmp, none, none); + } else { + arith(fpc, sat, DDY, dst, mask, src[0], none, none); + } + break; + case TGSI_OPCODE_DP3: + arith(fpc, sat, DP3, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DP4: + arith(fpc, sat, DP4, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DPH: + tmp = temp(fpc); + arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[1], none); + arith(fpc, sat, ADD, dst, mask, swz(tmp, X, X, X, X), + swz(src[1], W, W, W, W), none); + break; + case TGSI_OPCODE_DST: + arith(fpc, sat, DST, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_EX2: + arith(fpc, sat, EX2, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_FLR: + arith(fpc, sat, FLR, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_FRC: + arith(fpc, sat, FRC, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_KILP: + arith(fpc, 0, KIL, none, 0, none, none, none); + break; + case TGSI_OPCODE_KIL: + dst = nv40_sr(NV40SR_NONE, 0); + dst.cc_update = 1; + arith(fpc, 0, MOV, dst, MASK_ALL, src[0], none, none); + dst.cc_update = 0; dst.cc_test = NV40_FP_OP_COND_LT; + arith(fpc, 0, KIL, dst, 0, none, none, none); + break; + case TGSI_OPCODE_LG2: + arith(fpc, sat, LG2, dst, mask, src[0], none, none); + break; +// case TGSI_OPCODE_LIT: + case TGSI_OPCODE_LRP: + tmp = temp(fpc); + arith(fpc, 0, MAD, tmp, mask, neg(src[0]), src[2], src[2]); + arith(fpc, sat, MAD, dst, mask, src[0], src[1], tmp); + break; + case TGSI_OPCODE_MAD: + arith(fpc, sat, MAD, dst, mask, src[0], src[1], src[2]); + break; + case TGSI_OPCODE_MAX: + arith(fpc, sat, MAX, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MIN: + arith(fpc, sat, MIN, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MOV: + arith(fpc, sat, MOV, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_MUL: + arith(fpc, sat, MUL, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_NOISE1: + case TGSI_OPCODE_NOISE2: + case TGSI_OPCODE_NOISE3: + case TGSI_OPCODE_NOISE4: + arith(fpc, sat, SFL, dst, mask, none, none, none); + break; + case TGSI_OPCODE_POW: + tmp = temp(fpc); + arith(fpc, 0, LG2, tmp, MASK_X, + swz(src[0], X, X, X, X), none, none); + arith(fpc, 0, MUL, tmp, MASK_X, swz(tmp, X, X, X, X), + swz(src[1], X, X, X, X), none); + arith(fpc, sat, EX2, dst, mask, + swz(tmp, X, X, X, X), none, none); + break; + case TGSI_OPCODE_RCP: + arith(fpc, sat, RCP, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_RET: + assert(0); + break; + case TGSI_OPCODE_RFL: + tmp = temp(fpc); + arith(fpc, 0, DP3, tmp, MASK_X, src[0], src[0], none); + arith(fpc, 0, DP3, tmp, MASK_Y, src[0], src[1], none); + arith(fpc, 0, DIV, scale(tmp, 2X), MASK_Z, + swz(tmp, Y, Y, Y, Y), swz(tmp, X, X, X, X), none); + arith(fpc, sat, MAD, dst, mask, + swz(tmp, Z, Z, Z, Z), src[0], neg(src[1])); + break; + case TGSI_OPCODE_RSQ: + tmp = temp(fpc); + arith(fpc, 0, LG2, scale(tmp, INV_2X), MASK_X, + abs(swz(src[0], X, X, X, X)), none, none); + arith(fpc, sat, EX2, dst, mask, + neg(swz(tmp, X, X, X, X)), none, none); + break; + case TGSI_OPCODE_SCS: + if (mask & MASK_X) { + arith(fpc, sat, COS, dst, MASK_X, + swz(src[0], X, X, X, X), none, none); + } + if (mask & MASK_Y) { + arith(fpc, sat, SIN, dst, MASK_Y, + swz(src[0], X, X, X, X), none, none); + } + break; + case TGSI_OPCODE_SEQ: + arith(fpc, sat, SEQ, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SFL: + arith(fpc, sat, SFL, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SGE: + arith(fpc, sat, SGE, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SGT: + arith(fpc, sat, SGT, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SIN: + arith(fpc, sat, SIN, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_SLE: + arith(fpc, sat, SLE, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SLT: + arith(fpc, sat, SLT, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SNE: + arith(fpc, sat, SNE, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_STR: + arith(fpc, sat, STR, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SUB: + arith(fpc, sat, ADD, dst, mask, src[0], neg(src[1]), none); + break; + case TGSI_OPCODE_TEX: + tex(fpc, sat, TEX, unit, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_TXB: + tex(fpc, sat, TXB, unit, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_TXP: + tex(fpc, sat, TXP, unit, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_XPD: + tmp = temp(fpc); + arith(fpc, 0, MUL, tmp, mask, + swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); + arith(fpc, sat, MAD, dst, (mask & ~MASK_W), + swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), + neg(tmp)); + break; + default: + NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); + return FALSE; + } + + release_temps(fpc); + return TRUE; +} + +static boolean +nv40_fragprog_parse_decl_attrib(struct nv40_fpc *fpc, + const struct tgsi_full_declaration *fdec) +{ + int hw; + + switch (fdec->Semantic.SemanticName) { + case TGSI_SEMANTIC_POSITION: + hw = NV40_FP_OP_INPUT_SRC_POSITION; + break; + case TGSI_SEMANTIC_COLOR: + if (fdec->Semantic.SemanticIndex == 0) { + hw = NV40_FP_OP_INPUT_SRC_COL0; + } else + if (fdec->Semantic.SemanticIndex == 1) { + hw = NV40_FP_OP_INPUT_SRC_COL1; + } else { + NOUVEAU_ERR("bad colour semantic index\n"); + return FALSE; + } + break; + case TGSI_SEMANTIC_FOG: + hw = NV40_FP_OP_INPUT_SRC_FOGC; + break; + case TGSI_SEMANTIC_GENERIC: + if (fdec->Semantic.SemanticIndex <= 7) { + hw = NV40_FP_OP_INPUT_SRC_TC(fdec->Semantic. + SemanticIndex); + } else { + NOUVEAU_ERR("bad generic semantic index\n"); + return FALSE; + } + break; + default: + NOUVEAU_ERR("bad input semantic\n"); + return FALSE; + } + + fpc->attrib_map[fdec->DeclarationRange.First] = hw; + return TRUE; +} + +static boolean +nv40_fragprog_parse_decl_output(struct nv40_fpc *fpc, + const struct tgsi_full_declaration *fdec) +{ + unsigned idx = fdec->DeclarationRange.First; + unsigned hw; + + switch (fdec->Semantic.SemanticName) { + case TGSI_SEMANTIC_POSITION: + hw = 1; + break; + case TGSI_SEMANTIC_COLOR: + switch (fdec->Semantic.SemanticIndex) { + case 0: hw = 0; break; + case 1: hw = 2; break; + case 2: hw = 3; break; + case 3: hw = 4; break; + default: + NOUVEAU_ERR("bad rcol index\n"); + return FALSE; + } + break; + default: + NOUVEAU_ERR("bad output semantic\n"); + return FALSE; + } + + fpc->r_result[idx] = nv40_sr(NV40SR_OUTPUT, hw); + fpc->r_temps |= (1 << hw); + return TRUE; +} + +static boolean +nv40_fragprog_prepare(struct nv40_fpc *fpc) +{ + struct tgsi_parse_context p; + int high_temp = -1, i; + + tgsi_parse_init(&p, fpc->fp->pipe.tokens); + while (!tgsi_parse_end_of_tokens(&p)) { + const union tgsi_full_token *tok = &p.FullToken; + + tgsi_parse_token(&p); + switch(tok->Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + { + const struct tgsi_full_declaration *fdec; + fdec = &p.FullToken.FullDeclaration; + switch (fdec->Declaration.File) { + case TGSI_FILE_INPUT: + if (!nv40_fragprog_parse_decl_attrib(fpc, fdec)) + goto out_err; + break; + case TGSI_FILE_OUTPUT: + if (!nv40_fragprog_parse_decl_output(fpc, fdec)) + goto out_err; + break; + case TGSI_FILE_TEMPORARY: + if (fdec->DeclarationRange.Last > high_temp) { + high_temp = + fdec->DeclarationRange.Last; + } + break; + default: + break; + } + } + break; + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + struct tgsi_full_immediate *imm; + float vals[4]; + + imm = &p.FullToken.FullImmediate; + assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); + assert(fpc->nr_imm < MAX_IMM); + + vals[0] = imm->u.ImmediateFloat32[0].Float; + vals[1] = imm->u.ImmediateFloat32[1].Float; + vals[2] = imm->u.ImmediateFloat32[2].Float; + vals[3] = imm->u.ImmediateFloat32[3].Float; + fpc->imm[fpc->nr_imm++] = constant(fpc, -1, vals); + } + break; + default: + break; + } + } + tgsi_parse_free(&p); + + if (++high_temp) { + fpc->r_temp = CALLOC(high_temp, sizeof(struct nv40_sreg)); + for (i = 0; i < high_temp; i++) + fpc->r_temp[i] = temp(fpc); + fpc->r_temps_discard = 0; + } + + return TRUE; + +out_err: + if (fpc->r_temp) + FREE(fpc->r_temp); + tgsi_parse_free(&p); + return FALSE; +} + +static void +nv40_fragprog_translate(struct nv40_context *nv40, + struct nv40_fragment_program *fp) +{ + struct tgsi_parse_context parse; + struct nv40_fpc *fpc = NULL; + + fpc = CALLOC(1, sizeof(struct nv40_fpc)); + if (!fpc) + return; + fpc->fp = fp; + fpc->num_regs = 2; + + if (!nv40_fragprog_prepare(fpc)) { + FREE(fpc); + return; + } + + tgsi_parse_init(&parse, fp->pipe.tokens); + + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + const struct tgsi_full_instruction *finst; + + finst = &parse.FullToken.FullInstruction; + if (!nv40_fragprog_parse_instruction(fpc, finst)) + goto out_err; + } + break; + default: + break; + } + } + + fp->fp_control |= fpc->num_regs << NV40TCL_FP_CONTROL_TEMP_COUNT_SHIFT; + + /* Terminate final instruction */ + fp->insn[fpc->inst_offset] |= 0x00000001; + + /* Append NOP + END instruction, may or may not be necessary. */ + fpc->inst_offset = fp->insn_len; + grow_insns(fpc, 4); + fp->insn[fpc->inst_offset + 0] = 0x00000001; + fp->insn[fpc->inst_offset + 1] = 0x00000000; + fp->insn[fpc->inst_offset + 2] = 0x00000000; + fp->insn[fpc->inst_offset + 3] = 0x00000000; + + fp->translated = TRUE; +out_err: + tgsi_parse_free(&parse); + if (fpc->r_temp) + FREE(fpc->r_temp); + FREE(fpc); +} + +static void +nv40_fragprog_upload(struct nv40_context *nv40, + struct nv40_fragment_program *fp) +{ + struct pipe_winsys *ws = nv40->pipe.winsys; + const uint32_t le = 1; + uint32_t *map; + int i; + + map = ws->buffer_map(ws, fp->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + +#if 0 + for (i = 0; i < fp->insn_len; i++) { + fflush(stdout); fflush(stderr); + NOUVEAU_ERR("%d 0x%08x\n", i, fp->insn[i]); + fflush(stdout); fflush(stderr); + } +#endif + + if ((*(const uint8_t *)&le)) { + for (i = 0; i < fp->insn_len; i++) { + map[i] = fp->insn[i]; + } + } else { + /* Weird swapping for big-endian chips */ + for (i = 0; i < fp->insn_len; i++) { + map[i] = ((fp->insn[i] & 0xffff) << 16) | + ((fp->insn[i] >> 16) & 0xffff); + } + } + + ws->buffer_unmap(ws, fp->buffer); +} + +static boolean +nv40_fragprog_validate(struct nv40_context *nv40) +{ + struct nv40_fragment_program *fp = nv40->fragprog; + struct pipe_buffer *constbuf = + nv40->constbuf[PIPE_SHADER_FRAGMENT]; + struct pipe_winsys *ws = nv40->pipe.winsys; + struct nouveau_stateobj *so; + boolean new_consts = FALSE; + int i; + + if (fp->translated) + goto update_constants; + + nv40->fallback_swrast &= ~NV40_NEW_FRAGPROG; + nv40_fragprog_translate(nv40, fp); + if (!fp->translated) { + nv40->fallback_swrast |= NV40_NEW_FRAGPROG; + return FALSE; + } + + fp->buffer = ws->buffer_create(ws, 0x100, 0, fp->insn_len * 4); + nv40_fragprog_upload(nv40, fp); + + so = so_new(4, 1); + so_method(so, nv40->screen->curie, NV40TCL_FP_ADDRESS, 1); + so_reloc (so, fp->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | + NOUVEAU_BO_RD | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, + NV40TCL_FP_ADDRESS_DMA0, NV40TCL_FP_ADDRESS_DMA1); + so_method(so, nv40->screen->curie, NV40TCL_FP_CONTROL, 1); + so_data (so, fp->fp_control); + so_ref(so, &fp->so); + +update_constants: + if (fp->nr_consts) { + float *map; + + map = ws->buffer_map(ws, constbuf, PIPE_BUFFER_USAGE_CPU_READ); + for (i = 0; i < fp->nr_consts; i++) { + struct nv40_fragment_program_data *fpd = &fp->consts[i]; + uint32_t *p = &fp->insn[fpd->offset]; + uint32_t *cb = (uint32_t *)&map[fpd->index * 4]; + + if (!memcmp(p, cb, 4 * sizeof(float))) + continue; + memcpy(p, cb, 4 * sizeof(float)); + new_consts = TRUE; + } + ws->buffer_unmap(ws, constbuf); + + if (new_consts) + nv40_fragprog_upload(nv40, fp); + } + + if (new_consts || fp->so != nv40->state.hw[NV40_STATE_FRAGPROG]) { + so_ref(fp->so, &nv40->state.hw[NV40_STATE_FRAGPROG]); + return TRUE; + } + + return FALSE; +} + +void +nv40_fragprog_destroy(struct nv40_context *nv40, + struct nv40_fragment_program *fp) +{ + if (fp->insn_len) + FREE(fp->insn); +} + +struct nv40_state_entry nv40_state_fragprog = { + .validate = nv40_fragprog_validate, + .dirty = { + .pipe = NV40_NEW_FRAGPROG, + .hw = NV40_STATE_FRAGPROG + } +}; + diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nv40/nv40_fragtex.c new file mode 100644 index 0000000000..0227d22620 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_fragtex.c @@ -0,0 +1,168 @@ +#include "nv40_context.h" + +#define _(m,tf,ts0x,ts0y,ts0z,ts0w,ts1x,ts1y,ts1z,ts1w,sx,sy,sz,sw) \ +{ \ + TRUE, \ + PIPE_FORMAT_##m, \ + NV40TCL_TEX_FORMAT_FORMAT_##tf, \ + (NV40TCL_TEX_SWIZZLE_S0_X_##ts0x | NV40TCL_TEX_SWIZZLE_S0_Y_##ts0y | \ + NV40TCL_TEX_SWIZZLE_S0_Z_##ts0z | NV40TCL_TEX_SWIZZLE_S0_W_##ts0w | \ + NV40TCL_TEX_SWIZZLE_S1_X_##ts1x | NV40TCL_TEX_SWIZZLE_S1_Y_##ts1y | \ + NV40TCL_TEX_SWIZZLE_S1_Z_##ts1z | NV40TCL_TEX_SWIZZLE_S1_W_##ts1w), \ + ((NV40TCL_TEX_FILTER_SIGNED_RED*sx) | (NV40TCL_TEX_FILTER_SIGNED_GREEN*sy) | \ + (NV40TCL_TEX_FILTER_SIGNED_BLUE*sz) | (NV40TCL_TEX_FILTER_SIGNED_ALPHA*sw)) \ +} + +struct nv40_texture_format { + boolean defined; + uint pipe; + int format; + int swizzle; + int sign; +}; + +static struct nv40_texture_format +nv40_texture_formats[] = { + _(A8R8G8B8_UNORM, A8R8G8B8, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), + _(A1R5G5B5_UNORM, A1R5G5B5, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), + _(A4R4G4B4_UNORM, A4R4G4B4, S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), + _(R5G6B5_UNORM , R5G6B5 , S1, S1, S1, ONE, X, Y, Z, W, 0, 0, 0, 0), + _(L8_UNORM , L8 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0), + _(A8_UNORM , L8 , ZERO, ZERO, ZERO, S1, X, X, X, X, 0, 0, 0, 0), + _(R16_SNORM , A16 , ZERO, ZERO, S1, ONE, X, X, X, Y, 1, 1, 1, 1), + _(I8_UNORM , L8 , S1, S1, S1, S1, X, X, X, X, 0, 0, 0, 0), + _(A8L8_UNORM , A8L8 , S1, S1, S1, S1, X, X, X, Y, 0, 0, 0, 0), + _(Z16_UNORM , Z16 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0), + _(Z24S8_UNORM , Z24 , S1, S1, S1, ONE, X, X, X, X, 0, 0, 0, 0), + _(DXT1_RGB , DXT1 , S1, S1, S1, ONE, X, Y, Z, W, 0, 0, 0, 0), + _(DXT1_RGBA , DXT1 , S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), + _(DXT3_RGBA , DXT3 , S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), + _(DXT5_RGBA , DXT5 , S1, S1, S1, S1, X, Y, Z, W, 0, 0, 0, 0), + {}, +}; + +static struct nv40_texture_format * +nv40_fragtex_format(uint pipe_format) +{ + struct nv40_texture_format *tf = nv40_texture_formats; + + while (tf->defined) { + if (tf->pipe == pipe_format) + return tf; + tf++; + } + + NOUVEAU_ERR("unknown texture format %s\n", pf_name(pipe_format)); + return NULL; +} + + +static struct nouveau_stateobj * +nv40_fragtex_build(struct nv40_context *nv40, int unit) +{ + struct nv40_sampler_state *ps = nv40->tex_sampler[unit]; + struct nv40_miptree *nv40mt = nv40->tex_miptree[unit]; + struct pipe_texture *pt = &nv40mt->base; + struct nv40_texture_format *tf; + struct nouveau_stateobj *so; + uint32_t txf, txs, txp; + unsigned tex_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; + + tf = nv40_fragtex_format(pt->format); + if (!tf) + assert(0); + + txf = ps->fmt; + txf |= tf->format | 0x8000; + txf |= ((pt->last_level + 1) << NV40TCL_TEX_FORMAT_MIPMAP_COUNT_SHIFT); + + if (1) /* XXX */ + txf |= NV40TCL_TEX_FORMAT_NO_BORDER; + + switch (pt->target) { + case PIPE_TEXTURE_CUBE: + txf |= NV40TCL_TEX_FORMAT_CUBIC; + /* fall-through */ + case PIPE_TEXTURE_2D: + txf |= NV40TCL_TEX_FORMAT_DIMS_2D; + break; + case PIPE_TEXTURE_3D: + txf |= NV40TCL_TEX_FORMAT_DIMS_3D; + break; + case PIPE_TEXTURE_1D: + txf |= NV40TCL_TEX_FORMAT_DIMS_1D; + break; + default: + NOUVEAU_ERR("Unknown target %d\n", pt->target); + return NULL; + } + + if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { + txp = 0; + } else { + txp = nv40mt->level[0].pitch; + txf |= NV40TCL_TEX_FORMAT_LINEAR; + } + + txs = tf->swizzle; + + so = so_new(16, 2); + so_method(so, nv40->screen->curie, NV40TCL_TEX_OFFSET(unit), 8); + so_reloc (so, nv40mt->buffer, 0, tex_flags | NOUVEAU_BO_LOW, 0, 0); + so_reloc (so, nv40mt->buffer, txf, tex_flags | NOUVEAU_BO_OR, + NV40TCL_TEX_FORMAT_DMA0, NV40TCL_TEX_FORMAT_DMA1); + so_data (so, ps->wrap); + so_data (so, NV40TCL_TEX_ENABLE_ENABLE | ps->en); + so_data (so, txs); + so_data (so, ps->filt | tf->sign | 0x2000 /*voodoo*/); + so_data (so, (pt->width[0] << NV40TCL_TEX_SIZE0_W_SHIFT) | + pt->height[0]); + so_data (so, ps->bcol); + so_method(so, nv40->screen->curie, NV40TCL_TEX_SIZE1(unit), 1); + so_data (so, (pt->depth[0] << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) | txp); + + return so; +} + +static boolean +nv40_fragtex_validate(struct nv40_context *nv40) +{ + struct nv40_fragment_program *fp = nv40->fragprog; + struct nv40_state *state = &nv40->state; + struct nouveau_stateobj *so; + unsigned samplers, unit; + + samplers = state->fp_samplers & ~fp->samplers; + while (samplers) { + unit = ffs(samplers) - 1; + samplers &= ~(1 << unit); + + so = so_new(2, 0); + so_method(so, nv40->screen->curie, NV40TCL_TEX_ENABLE(unit), 1); + so_data (so, 0); + so_ref(so, &nv40->state.hw[NV40_STATE_FRAGTEX0 + unit]); + state->dirty |= (1ULL << (NV40_STATE_FRAGTEX0 + unit)); + } + + samplers = nv40->dirty_samplers & fp->samplers; + while (samplers) { + unit = ffs(samplers) - 1; + samplers &= ~(1 << unit); + + so = nv40_fragtex_build(nv40, unit); + so_ref(so, &nv40->state.hw[NV40_STATE_FRAGTEX0 + unit]); + state->dirty |= (1ULL << (NV40_STATE_FRAGTEX0 + unit)); + } + + nv40->state.fp_samplers = fp->samplers; + return FALSE; +} + +struct nv40_state_entry nv40_state_fragtex = { + .validate = nv40_fragtex_validate, + .dirty = { + .pipe = NV40_NEW_SAMPLER | NV40_NEW_FRAGPROG, + .hw = 0 + } +}; + diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c new file mode 100644 index 0000000000..e38b1e7f5c --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_miptree.c @@ -0,0 +1,238 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "nv40_context.h" + +static void +nv40_miptree_layout(struct nv40_miptree *mt) +{ + struct pipe_texture *pt = &mt->base; + uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0]; + uint offset = 0; + int nr_faces, l, f; + uint wide_pitch = pt->tex_usage & (PIPE_TEXTURE_USAGE_SAMPLER | + PIPE_TEXTURE_USAGE_DEPTH_STENCIL | + PIPE_TEXTURE_USAGE_RENDER_TARGET | + PIPE_TEXTURE_USAGE_DISPLAY_TARGET | + PIPE_TEXTURE_USAGE_PRIMARY); + + if (pt->target == PIPE_TEXTURE_CUBE) { + nr_faces = 6; + } else + if (pt->target == PIPE_TEXTURE_3D) { + nr_faces = pt->depth[0]; + } else { + nr_faces = 1; + } + + for (l = 0; l <= pt->last_level; l++) { + pt->width[l] = width; + pt->height[l] = height; + pt->depth[l] = depth; + pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); + pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); + + if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) + mt->level[l].pitch = align(pt->width[0] * pt->block.size, 64); + else + mt->level[l].pitch = pt->width[l] * pt->block.size; + + mt->level[l].image_offset = + CALLOC(nr_faces, sizeof(unsigned)); + + width = MAX2(1, width >> 1); + height = MAX2(1, height >> 1); + depth = MAX2(1, depth >> 1); + } + + for (f = 0; f < nr_faces; f++) { + for (l = 0; l < pt->last_level; l++) { + mt->level[l].image_offset[f] = offset; + + if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR) && + pt->width[l + 1] > 1 && pt->height[l + 1] > 1) + offset += align(mt->level[l].pitch * pt->height[l], 64); + else + offset += mt->level[l].pitch * pt->height[l]; + } + + mt->level[l].image_offset[f] = offset; + offset += mt->level[l].pitch * pt->height[l]; + } + + mt->total_size = offset; +} + +static struct pipe_texture * +nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) +{ + struct pipe_winsys *ws = pscreen->winsys; + struct nv40_miptree *mt; + unsigned buf_usage = PIPE_BUFFER_USAGE_PIXEL | + NOUVEAU_BUFFER_USAGE_TEXTURE; + + mt = MALLOC(sizeof(struct nv40_miptree)); + if (!mt) + return NULL; + mt->base = *pt; + mt->base.refcount = 1; + mt->base.screen = pscreen; + mt->shadow_tex = NULL; + mt->shadow_surface = NULL; + + /* Swizzled textures must be POT */ + if (pt->width[0] & (pt->width[0] - 1) || + pt->height[0] & (pt->height[0] - 1)) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + else + if (pt->tex_usage & (PIPE_TEXTURE_USAGE_PRIMARY | + PIPE_TEXTURE_USAGE_DISPLAY_TARGET | + PIPE_TEXTURE_USAGE_DEPTH_STENCIL)) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + else + if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + else { + switch (pt->format) { + /* TODO: Figure out which formats can be swizzled */ + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_R16_SNORM: + { + if (debug_get_bool_option("NOUVEAU_NO_SWIZZLE", FALSE)) + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + break; + } + default: + mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR; + } + } + + if (pt->tex_usage & PIPE_TEXTURE_USAGE_DYNAMIC) + buf_usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE; + + nv40_miptree_layout(mt); + + mt->buffer = ws->buffer_create(ws, 256, buf_usage, mt->total_size); + if (!mt->buffer) { + FREE(mt); + return NULL; + } + + return &mt->base; +} + +static struct pipe_texture * +nv40_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, + const unsigned *stride, struct pipe_buffer *pb) +{ + struct nv40_miptree *mt; + + /* Only supports 2D, non-mipmapped textures for the moment */ + if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || + pt->depth[0] != 1) + return NULL; + + mt = CALLOC_STRUCT(nv40_miptree); + if (!mt) + return NULL; + + mt->base = *pt; + mt->base.refcount = 1; + mt->base.screen = pscreen; + mt->level[0].pitch = stride[0]; + mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); + + pipe_buffer_reference(pscreen, &mt->buffer, pb); + return &mt->base; +} + +static void +nv40_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **ppt) +{ + struct pipe_texture *pt = *ppt; + struct nv40_miptree *mt = (struct nv40_miptree *)pt; + int l; + + *ppt = NULL; + if (--pt->refcount) + return; + + pipe_buffer_reference(pscreen, &mt->buffer, NULL); + for (l = 0; l <= pt->last_level; l++) { + if (mt->level[l].image_offset) + FREE(mt->level[l].image_offset); + } + + if (mt->shadow_tex) { + if (mt->shadow_surface) + pscreen->tex_surface_release(pscreen, &mt->shadow_surface); + nv40_miptree_release(pscreen, &mt->shadow_tex); + } + + FREE(mt); +} + +static struct pipe_surface * +nv40_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice, + unsigned flags) +{ + struct nv40_miptree *mt = (struct nv40_miptree *)pt; + struct pipe_surface *ps; + + ps = CALLOC_STRUCT(pipe_surface); + if (!ps) + return NULL; + pipe_texture_reference(&ps->texture, pt); + ps->format = pt->format; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->block = pt->block; + ps->nblocksx = pt->nblocksx[level]; + ps->nblocksy = pt->nblocksy[level]; + ps->stride = mt->level[level].pitch; + ps->usage = flags; + ps->status = PIPE_SURFACE_STATUS_DEFINED; + ps->refcount = 1; + ps->face = face; + ps->level = level; + ps->zslice = zslice; + + if (pt->target == PIPE_TEXTURE_CUBE) { + ps->offset = mt->level[level].image_offset[face]; + } else + if (pt->target == PIPE_TEXTURE_3D) { + ps->offset = mt->level[level].image_offset[zslice]; + } else { + ps->offset = mt->level[level].image_offset[0]; + } + + return ps; +} + +static void +nv40_miptree_surface_del(struct pipe_screen *pscreen, + struct pipe_surface **psurface) +{ + struct pipe_surface *ps = *psurface; + + *psurface = NULL; + if (--ps->refcount > 0) + return; + + pipe_texture_reference(&ps->texture, NULL); + FREE(ps); +} + +void +nv40_screen_init_miptree_functions(struct pipe_screen *pscreen) +{ + pscreen->texture_create = nv40_miptree_create; + pscreen->texture_blanket = nv40_miptree_blanket; + pscreen->texture_release = nv40_miptree_release; + pscreen->get_tex_surface = nv40_miptree_surface_new; + pscreen->tex_surface_release = nv40_miptree_surface_del; +} + diff --git a/src/gallium/drivers/nv40/nv40_query.c b/src/gallium/drivers/nv40/nv40_query.c new file mode 100644 index 0000000000..9b9a43f49d --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_query.c @@ -0,0 +1,122 @@ +#include "pipe/p_context.h" + +#include "nv40_context.h" + +struct nv40_query { + struct nouveau_resource *object; + unsigned type; + boolean ready; + uint64_t result; +}; + +static INLINE struct nv40_query * +nv40_query(struct pipe_query *pipe) +{ + return (struct nv40_query *)pipe; +} + +static struct pipe_query * +nv40_query_create(struct pipe_context *pipe, unsigned query_type) +{ + struct nv40_query *q; + + q = CALLOC(1, sizeof(struct nv40_query)); + q->type = query_type; + + return (struct pipe_query *)q; +} + +static void +nv40_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_query *q = nv40_query(pq); + + if (q->object) + nv40->nvws->res_free(&q->object); + FREE(q); +} + +static void +nv40_query_begin(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_query *q = nv40_query(pq); + + assert(q->type == PIPE_QUERY_OCCLUSION_COUNTER); + + /* Happens when end_query() is called, then another begin_query() + * without querying the result in-between. For now we'll wait for + * the existing query to notify completion, but it could be better. + */ + if (q->object) { + uint64_t tmp; + pipe->get_query_result(pipe, pq, 1, &tmp); + } + + if (nv40->nvws->res_alloc(nv40->screen->query_heap, 1, NULL, &q->object)) + assert(0); + nv40->nvws->notifier_reset(nv40->screen->query, q->object->start); + + BEGIN_RING(curie, NV40TCL_QUERY_RESET, 1); + OUT_RING (1); + BEGIN_RING(curie, NV40TCL_QUERY_UNK17CC, 1); + OUT_RING (1); + + q->ready = FALSE; +} + +static void +nv40_query_end(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_query *q = nv40_query(pq); + + BEGIN_RING(curie, NV40TCL_QUERY_GET, 1); + OUT_RING ((0x01 << NV40TCL_QUERY_GET_UNK24_SHIFT) | + ((q->object->start * 32) << NV40TCL_QUERY_GET_OFFSET_SHIFT)); + FIRE_RING(NULL); +} + +static boolean +nv40_query_result(struct pipe_context *pipe, struct pipe_query *pq, + boolean wait, uint64_t *result) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_query *q = nv40_query(pq); + struct nouveau_winsys *nvws = nv40->nvws; + + assert(q->object && q->type == PIPE_QUERY_OCCLUSION_COUNTER); + + if (!q->ready) { + unsigned status; + + status = nvws->notifier_status(nv40->screen->query, + q->object->start); + if (status != NV_NOTIFY_STATE_STATUS_COMPLETED) { + if (wait == FALSE) + return FALSE; + nvws->notifier_wait(nv40->screen->query, q->object->start, + NV_NOTIFY_STATE_STATUS_COMPLETED, + 0); + } + + q->result = nvws->notifier_retval(nv40->screen->query, + q->object->start); + q->ready = TRUE; + nvws->res_free(&q->object); + } + + *result = q->result; + return TRUE; +} + +void +nv40_init_query_functions(struct nv40_context *nv40) +{ + nv40->pipe.create_query = nv40_query_create; + nv40->pipe.destroy_query = nv40_query_destroy; + nv40->pipe.begin_query = nv40_query_begin; + nv40->pipe.end_query = nv40_query_end; + nv40->pipe.get_query_result = nv40_query_result; +} diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c new file mode 100644 index 0000000000..2372bc8441 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -0,0 +1,383 @@ +#include "pipe/p_screen.h" +#include "util/u_simple_screen.h" + +#include "nv40_context.h" +#include "nv40_screen.h" + +#define NV4X_GRCLASS4097_CHIPSETS 0x00000baf +#define NV4X_GRCLASS4497_CHIPSETS 0x00005450 +#define NV6X_GRCLASS4497_CHIPSETS 0x00000088 + +static const char * +nv40_screen_get_name(struct pipe_screen *pscreen) +{ + struct nv40_screen *screen = nv40_screen(pscreen); + struct nouveau_device *dev = screen->nvws->channel->device; + static char buffer[128]; + + snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset); + return buffer; +} + +static const char * +nv40_screen_get_vendor(struct pipe_screen *pscreen) +{ + return "nouveau"; +} + +static int +nv40_screen_get_param(struct pipe_screen *pscreen, int param) +{ + struct nv40_screen *screen = nv40_screen(pscreen); + + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 16; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_S3TC: + return 1; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 1; + case PIPE_CAP_POINT_SPRITE: + return 1; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 4; + case PIPE_CAP_OCCLUSION_QUERY: + return 1; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 13; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 10; + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 13; + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + case PIPE_CAP_TEXTURE_MIRROR_REPEAT: + return 1; + case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: + return 0; /* We have 4 - but unsupported currently */ + case NOUVEAU_CAP_HW_VTXBUF: + return 1; + case NOUVEAU_CAP_HW_IDXBUF: + if (screen->curie->grclass == NV40TCL) + return 1; + return 0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0; + } +} + +static float +nv40_screen_get_paramf(struct pipe_screen *pscreen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 10.0; + case PIPE_CAP_MAX_POINT_WIDTH: + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 64.0; + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 16.0; + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0.0; + } +} + +static boolean +nv40_screen_surface_format_supported(struct pipe_screen *pscreen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, unsigned geom_flags) +{ + if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z16_UNORM: + return TRUE; + default: + break; + } + } else { + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_A1R5G5B5_UNORM: + case PIPE_FORMAT_A4R4G4B4_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_R16_SNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_I8_UNORM: + case PIPE_FORMAT_A8L8_UNORM: + case PIPE_FORMAT_Z16_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT5_RGBA: + return TRUE; + default: + break; + } + } + + return FALSE; +} + +static struct pipe_buffer * +nv40_surface_buffer(struct pipe_surface *surf) +{ + struct nv40_miptree *mt = (struct nv40_miptree *)surf->texture; + + return mt->buffer; +} + +static void * +nv40_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, + unsigned flags ) +{ + struct pipe_winsys *ws = screen->winsys; + struct pipe_surface *surface_to_map; + void *map; + + if (!(surface->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { + struct nv40_miptree *mt = (struct nv40_miptree *)surface->texture; + + if (!mt->shadow_tex) { + unsigned old_tex_usage = surface->texture->tex_usage; + surface->texture->tex_usage = NOUVEAU_TEXTURE_USAGE_LINEAR | + PIPE_TEXTURE_USAGE_DYNAMIC; + mt->shadow_tex = screen->texture_create(screen, surface->texture); + surface->texture->tex_usage = old_tex_usage; + + assert(mt->shadow_tex->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR); + } + + mt->shadow_surface = screen->get_tex_surface + ( + screen, mt->shadow_tex, + surface->face, surface->level, surface->zslice, + surface->usage + ); + + surface_to_map = mt->shadow_surface; + } + else + surface_to_map = surface; + + assert(surface_to_map); + map = ws->buffer_map(ws, nv40_surface_buffer(surface_to_map), flags); + if (!map) + return NULL; + + return map + surface_to_map->offset; +} + +static void +nv40_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) +{ + struct pipe_winsys *ws = screen->winsys; + struct pipe_surface *surface_to_unmap; + + /* TODO: Copy from shadow just before push buffer is flushed instead. + There are probably some programs that map/unmap excessively + before rendering. */ + if (!(surface->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { + struct nv40_miptree *mt = (struct nv40_miptree *)surface->texture; + + assert(mt->shadow_tex); + + surface_to_unmap = mt->shadow_surface; + } + else + surface_to_unmap = surface; + + assert(surface_to_unmap); + + ws->buffer_unmap(ws, nv40_surface_buffer(surface_to_unmap)); + + if (surface_to_unmap != surface) { + struct nv40_screen *nvscreen = nv40_screen(screen); + + nvscreen->eng2d->copy(nvscreen->eng2d, surface, 0, 0, + surface_to_unmap, 0, 0, + surface->width, surface->height); + + screen->tex_surface_release(screen, &surface_to_unmap); + } +} + +static void +nv40_screen_destroy(struct pipe_screen *pscreen) +{ + struct nv40_screen *screen = nv40_screen(pscreen); + struct nouveau_winsys *nvws = screen->nvws; + + nvws->res_free(&screen->vp_exec_heap); + nvws->res_free(&screen->vp_data_heap); + nvws->res_free(&screen->query_heap); + nvws->notifier_free(&screen->query); + nvws->notifier_free(&screen->sync); + nvws->grobj_free(&screen->curie); + + FREE(pscreen); +} + +struct pipe_screen * +nv40_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) +{ + struct nv40_screen *screen = CALLOC_STRUCT(nv40_screen); + struct nouveau_stateobj *so; + unsigned curie_class; + unsigned chipset = nvws->channel->device->chipset; + int ret; + + if (!screen) + return NULL; + screen->nvws = nvws; + + /* 2D engine setup */ + screen->eng2d = nv04_surface_2d_init(nvws); + screen->eng2d->buf = nv40_surface_buffer; + + /* 3D object */ + switch (chipset & 0xf0) { + case 0x40: + if (NV4X_GRCLASS4097_CHIPSETS & (1 << (chipset & 0x0f))) + curie_class = NV40TCL; + else + if (NV4X_GRCLASS4497_CHIPSETS & (1 << (chipset & 0x0f))) + curie_class = NV44TCL; + break; + case 0x60: + if (NV6X_GRCLASS4497_CHIPSETS & (1 << (chipset & 0x0f))) + curie_class = NV44TCL; + break; + default: + break; + } + + if (!curie_class) { + NOUVEAU_ERR("Unknown nv4x chipset: nv%02x\n", chipset); + return NULL; + } + + ret = nvws->grobj_alloc(nvws, curie_class, &screen->curie); + if (ret) { + NOUVEAU_ERR("Error creating 3D object: %d\n", ret); + return FALSE; + } + + /* Notifier for sync purposes */ + ret = nvws->notifier_alloc(nvws, 1, &screen->sync); + if (ret) { + NOUVEAU_ERR("Error creating notifier object: %d\n", ret); + nv40_screen_destroy(&screen->pipe); + return NULL; + } + + /* Query objects */ + ret = nvws->notifier_alloc(nvws, 32, &screen->query); + if (ret) { + NOUVEAU_ERR("Error initialising query objects: %d\n", ret); + nv40_screen_destroy(&screen->pipe); + return NULL; + } + + ret = nvws->res_init(&screen->query_heap, 0, 32); + if (ret) { + NOUVEAU_ERR("Error initialising query object heap: %d\n", ret); + nv40_screen_destroy(&screen->pipe); + return NULL; + } + + /* Vtxprog resources */ + if (nvws->res_init(&screen->vp_exec_heap, 0, 512) || + nvws->res_init(&screen->vp_data_heap, 0, 256)) { + nv40_screen_destroy(&screen->pipe); + return NULL; + } + + /* Static curie initialisation */ + so = so_new(128, 0); + so_method(so, screen->curie, NV40TCL_DMA_NOTIFY, 1); + so_data (so, screen->sync->handle); + so_method(so, screen->curie, NV40TCL_DMA_TEXTURE0, 2); + so_data (so, nvws->channel->vram->handle); + so_data (so, nvws->channel->gart->handle); + so_method(so, screen->curie, NV40TCL_DMA_COLOR1, 1); + so_data (so, nvws->channel->vram->handle); + so_method(so, screen->curie, NV40TCL_DMA_COLOR0, 2); + so_data (so, nvws->channel->vram->handle); + so_data (so, nvws->channel->vram->handle); + so_method(so, screen->curie, NV40TCL_DMA_VTXBUF0, 2); + so_data (so, nvws->channel->vram->handle); + so_data (so, nvws->channel->gart->handle); + so_method(so, screen->curie, NV40TCL_DMA_FENCE, 2); + so_data (so, 0); + so_data (so, screen->query->handle); + so_method(so, screen->curie, NV40TCL_DMA_UNK01AC, 2); + so_data (so, nvws->channel->vram->handle); + so_data (so, nvws->channel->vram->handle); + so_method(so, screen->curie, NV40TCL_DMA_COLOR2, 2); + so_data (so, nvws->channel->vram->handle); + so_data (so, nvws->channel->vram->handle); + + so_method(so, screen->curie, 0x1ea4, 3); + so_data (so, 0x00000010); + so_data (so, 0x01000100); + so_data (so, 0xff800006); + + /* vtxprog output routing */ + so_method(so, screen->curie, 0x1fc4, 1); + so_data (so, 0x06144321); + so_method(so, screen->curie, 0x1fc8, 2); + so_data (so, 0xedcba987); + so_data (so, 0x00000021); + so_method(so, screen->curie, 0x1fd0, 1); + so_data (so, 0x00171615); + so_method(so, screen->curie, 0x1fd4, 1); + so_data (so, 0x001b1a19); + + so_method(so, screen->curie, 0x1ef8, 1); + so_data (so, 0x0020ffff); + so_method(so, screen->curie, 0x1d64, 1); + so_data (so, 0x00d30000); + so_method(so, screen->curie, 0x1e94, 1); + so_data (so, 0x00000001); + + so_emit(nvws, so); + so_ref(NULL, &so); + nvws->push_flush(nvws, 0, NULL); + + screen->pipe.winsys = ws; + screen->pipe.destroy = nv40_screen_destroy; + + screen->pipe.get_name = nv40_screen_get_name; + screen->pipe.get_vendor = nv40_screen_get_vendor; + screen->pipe.get_param = nv40_screen_get_param; + screen->pipe.get_paramf = nv40_screen_get_paramf; + + screen->pipe.is_format_supported = nv40_screen_surface_format_supported; + + screen->pipe.surface_map = nv40_surface_map; + screen->pipe.surface_unmap = nv40_surface_unmap; + + nv40_screen_init_miptree_functions(&screen->pipe); + u_simple_screen_init(&screen->pipe); + + return &screen->pipe; +} + diff --git a/src/gallium/drivers/nv40/nv40_screen.h b/src/gallium/drivers/nv40/nv40_screen.h new file mode 100644 index 0000000000..4500aa0e5c --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_screen.h @@ -0,0 +1,37 @@ +#ifndef __NV40_SCREEN_H__ +#define __NV40_SCREEN_H__ + +#include "pipe/p_screen.h" +#include "nv04/nv04_surface_2d.h" + +struct nv40_screen { + struct pipe_screen pipe; + + struct nouveau_winsys *nvws; + + unsigned cur_pctx; + + /* HW graphics objects */ + struct nv04_surface_2d *eng2d; + struct nouveau_grobj *curie; + struct nouveau_notifier *sync; + + /* Query object resources */ + struct nouveau_notifier *query; + struct nouveau_resource *query_heap; + + /* Vtxprog resources */ + struct nouveau_resource *vp_exec_heap; + struct nouveau_resource *vp_data_heap; + + /* Current 3D state of channel */ + struct nouveau_stateobj *state[NV40_STATE_MAX]; +}; + +static INLINE struct nv40_screen * +nv40_screen(struct pipe_screen *screen) +{ + return (struct nv40_screen *)screen; +} + +#endif diff --git a/src/gallium/drivers/nv40/nv40_shader.h b/src/gallium/drivers/nv40/nv40_shader.h new file mode 100644 index 0000000000..854dccf548 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_shader.h @@ -0,0 +1,556 @@ +#ifndef __NV40_SHADER_H__ +#define __NV40_SHADER_H__ + +/* Vertex programs instruction set + * + * The NV40 instruction set is very similar to NV30. Most fields are in + * a slightly different position in the instruction however. + * + * Merged instructions + * In some cases it is possible to put two instructions into one opcode + * slot. The rules for when this is OK is not entirely clear to me yet. + * + * There are separate writemasks and dest temp register fields for each + * grouping of instructions. There is however only one field with the + * ID of a result register. Writing to temp/result regs is selected by + * setting VEC_RESULT/SCA_RESULT. + * + * Temporary registers + * The source/dest temp register fields have been extended by 1 bit, to + * give a total of 32 temporary registers. + * + * Relative Addressing + * NV40 can use an address register to index into vertex attribute regs. + * This is done by putting the offset value into INPUT_SRC and setting + * the INDEX_INPUT flag. + * + * Conditional execution (see NV_vertex_program{2,3} for details) + * There is a second condition code register on NV40, it's use is enabled + * by setting the COND_REG_SELECT_1 flag. + * + * Texture lookup + * TODO + */ + +/* ---- OPCODE BITS 127:96 / data DWORD 0 --- */ +#define NV40_VP_INST_VEC_RESULT (1 << 30) +/* uncertain.. */ +#define NV40_VP_INST_COND_UPDATE_ENABLE ((1 << 14)|1<<29) +/* use address reg as index into attribs */ +#define NV40_VP_INST_INDEX_INPUT (1 << 27) +#define NV40_VP_INST_COND_REG_SELECT_1 (1 << 25) +#define NV40_VP_INST_ADDR_REG_SELECT_1 (1 << 24) +#define NV40_VP_INST_SRC2_ABS (1 << 23) +#define NV40_VP_INST_SRC1_ABS (1 << 22) +#define NV40_VP_INST_SRC0_ABS (1 << 21) +#define NV40_VP_INST_VEC_DEST_TEMP_SHIFT 15 +#define NV40_VP_INST_VEC_DEST_TEMP_MASK (0x1F << 15) +#define NV40_VP_INST_COND_TEST_ENABLE (1 << 13) +#define NV40_VP_INST_COND_SHIFT 10 +#define NV40_VP_INST_COND_MASK (0x7 << 10) +# define NV40_VP_INST_COND_FL 0 +# define NV40_VP_INST_COND_LT 1 +# define NV40_VP_INST_COND_EQ 2 +# define NV40_VP_INST_COND_LE 3 +# define NV40_VP_INST_COND_GT 4 +# define NV40_VP_INST_COND_NE 5 +# define NV40_VP_INST_COND_GE 6 +# define NV40_VP_INST_COND_TR 7 +#define NV40_VP_INST_COND_SWZ_X_SHIFT 8 +#define NV40_VP_INST_COND_SWZ_X_MASK (3 << 8) +#define NV40_VP_INST_COND_SWZ_Y_SHIFT 6 +#define NV40_VP_INST_COND_SWZ_Y_MASK (3 << 6) +#define NV40_VP_INST_COND_SWZ_Z_SHIFT 4 +#define NV40_VP_INST_COND_SWZ_Z_MASK (3 << 4) +#define NV40_VP_INST_COND_SWZ_W_SHIFT 2 +#define NV40_VP_INST_COND_SWZ_W_MASK (3 << 2) +#define NV40_VP_INST_COND_SWZ_ALL_SHIFT 2 +#define NV40_VP_INST_COND_SWZ_ALL_MASK (0xFF << 2) +#define NV40_VP_INST_ADDR_SWZ_SHIFT 0 +#define NV40_VP_INST_ADDR_SWZ_MASK (0x03 << 0) +#define NV40_VP_INST0_KNOWN ( \ + NV40_VP_INST_INDEX_INPUT | \ + NV40_VP_INST_COND_REG_SELECT_1 | \ + NV40_VP_INST_ADDR_REG_SELECT_1 | \ + NV40_VP_INST_SRC2_ABS | \ + NV40_VP_INST_SRC1_ABS | \ + NV40_VP_INST_SRC0_ABS | \ + NV40_VP_INST_VEC_DEST_TEMP_MASK | \ + NV40_VP_INST_COND_TEST_ENABLE | \ + NV40_VP_INST_COND_MASK | \ + NV40_VP_INST_COND_SWZ_ALL_MASK | \ + NV40_VP_INST_ADDR_SWZ_MASK) + +/* ---- OPCODE BITS 95:64 / data DWORD 1 --- */ +#define NV40_VP_INST_VEC_OPCODE_SHIFT 22 +#define NV40_VP_INST_VEC_OPCODE_MASK (0x1F << 22) +# define NV40_VP_INST_OP_NOP 0x00 +# define NV40_VP_INST_OP_MOV 0x01 +# define NV40_VP_INST_OP_MUL 0x02 +# define NV40_VP_INST_OP_ADD 0x03 +# define NV40_VP_INST_OP_MAD 0x04 +# define NV40_VP_INST_OP_DP3 0x05 +# define NV40_VP_INST_OP_DPH 0x06 +# define NV40_VP_INST_OP_DP4 0x07 +# define NV40_VP_INST_OP_DST 0x08 +# define NV40_VP_INST_OP_MIN 0x09 +# define NV40_VP_INST_OP_MAX 0x0A +# define NV40_VP_INST_OP_SLT 0x0B +# define NV40_VP_INST_OP_SGE 0x0C +# define NV40_VP_INST_OP_ARL 0x0D +# define NV40_VP_INST_OP_FRC 0x0E +# define NV40_VP_INST_OP_FLR 0x0F +# define NV40_VP_INST_OP_SEQ 0x10 +# define NV40_VP_INST_OP_SFL 0x11 +# define NV40_VP_INST_OP_SGT 0x12 +# define NV40_VP_INST_OP_SLE 0x13 +# define NV40_VP_INST_OP_SNE 0x14 +# define NV40_VP_INST_OP_STR 0x15 +# define NV40_VP_INST_OP_SSG 0x16 +# define NV40_VP_INST_OP_ARR 0x17 +# define NV40_VP_INST_OP_ARA 0x18 +# define NV40_VP_INST_OP_TXL 0x19 +#define NV40_VP_INST_SCA_OPCODE_SHIFT 27 +#define NV40_VP_INST_SCA_OPCODE_MASK (0x1F << 27) +# define NV40_VP_INST_OP_NOP 0x00 +# define NV40_VP_INST_OP_MOV 0x01 +# define NV40_VP_INST_OP_RCP 0x02 +# define NV40_VP_INST_OP_RCC 0x03 +# define NV40_VP_INST_OP_RSQ 0x04 +# define NV40_VP_INST_OP_EXP 0x05 +# define NV40_VP_INST_OP_LOG 0x06 +# define NV40_VP_INST_OP_LIT 0x07 +# define NV40_VP_INST_OP_BRA 0x09 +# define NV40_VP_INST_OP_CAL 0x0B +# define NV40_VP_INST_OP_RET 0x0C +# define NV40_VP_INST_OP_LG2 0x0D +# define NV40_VP_INST_OP_EX2 0x0E +# define NV40_VP_INST_OP_SIN 0x0F +# define NV40_VP_INST_OP_COS 0x10 +# define NV40_VP_INST_OP_PUSHA 0x13 +# define NV40_VP_INST_OP_POPA 0x14 +#define NV40_VP_INST_CONST_SRC_SHIFT 12 +#define NV40_VP_INST_CONST_SRC_MASK (0xFF << 12) +#define NV40_VP_INST_INPUT_SRC_SHIFT 8 +#define NV40_VP_INST_INPUT_SRC_MASK (0x0F << 8) +# define NV40_VP_INST_IN_POS 0 +# define NV40_VP_INST_IN_WEIGHT 1 +# define NV40_VP_INST_IN_NORMAL 2 +# define NV40_VP_INST_IN_COL0 3 +# define NV40_VP_INST_IN_COL1 4 +# define NV40_VP_INST_IN_FOGC 5 +# define NV40_VP_INST_IN_TC0 8 +# define NV40_VP_INST_IN_TC(n) (8+n) +#define NV40_VP_INST_SRC0H_SHIFT 0 +#define NV40_VP_INST_SRC0H_MASK (0xFF << 0) +#define NV40_VP_INST1_KNOWN ( \ + NV40_VP_INST_VEC_OPCODE_MASK | \ + NV40_VP_INST_SCA_OPCODE_MASK | \ + NV40_VP_INST_CONST_SRC_MASK | \ + NV40_VP_INST_INPUT_SRC_MASK | \ + NV40_VP_INST_SRC0H_MASK \ + ) + +/* ---- OPCODE BITS 63:32 / data DWORD 2 --- */ +#define NV40_VP_INST_SRC0L_SHIFT 23 +#define NV40_VP_INST_SRC0L_MASK (0x1FF << 23) +#define NV40_VP_INST_SRC1_SHIFT 6 +#define NV40_VP_INST_SRC1_MASK (0x1FFFF << 6) +#define NV40_VP_INST_SRC2H_SHIFT 0 +#define NV40_VP_INST_SRC2H_MASK (0x3F << 0) +#define NV40_VP_INST_IADDRH_SHIFT 0 +#define NV40_VP_INST_IADDRH_MASK (0x1F << 0) + +/* ---- OPCODE BITS 31:0 / data DWORD 3 --- */ +#define NV40_VP_INST_IADDRL_SHIFT 29 +#define NV40_VP_INST_IADDRL_MASK (7 << 29) +#define NV40_VP_INST_SRC2L_SHIFT 21 +#define NV40_VP_INST_SRC2L_MASK (0x7FF << 21) +#define NV40_VP_INST_SCA_WRITEMASK_SHIFT 17 +#define NV40_VP_INST_SCA_WRITEMASK_MASK (0xF << 17) +# define NV40_VP_INST_SCA_WRITEMASK_X (1 << 20) +# define NV40_VP_INST_SCA_WRITEMASK_Y (1 << 19) +# define NV40_VP_INST_SCA_WRITEMASK_Z (1 << 18) +# define NV40_VP_INST_SCA_WRITEMASK_W (1 << 17) +#define NV40_VP_INST_VEC_WRITEMASK_SHIFT 13 +#define NV40_VP_INST_VEC_WRITEMASK_MASK (0xF << 13) +# define NV40_VP_INST_VEC_WRITEMASK_X (1 << 16) +# define NV40_VP_INST_VEC_WRITEMASK_Y (1 << 15) +# define NV40_VP_INST_VEC_WRITEMASK_Z (1 << 14) +# define NV40_VP_INST_VEC_WRITEMASK_W (1 << 13) +#define NV40_VP_INST_SCA_RESULT (1 << 12) +#define NV40_VP_INST_SCA_DEST_TEMP_SHIFT 7 +#define NV40_VP_INST_SCA_DEST_TEMP_MASK (0x1F << 7) +#define NV40_VP_INST_DEST_SHIFT 2 +#define NV40_VP_INST_DEST_MASK (31 << 2) +# define NV40_VP_INST_DEST_POS 0 +# define NV40_VP_INST_DEST_COL0 1 +# define NV40_VP_INST_DEST_COL1 2 +# define NV40_VP_INST_DEST_BFC0 3 +# define NV40_VP_INST_DEST_BFC1 4 +# define NV40_VP_INST_DEST_FOGC 5 +# define NV40_VP_INST_DEST_PSZ 6 +# define NV40_VP_INST_DEST_TC0 7 +# define NV40_VP_INST_DEST_TC(n) (7+n) +# define NV40_VP_INST_DEST_TEMP 0x1F +#define NV40_VP_INST_INDEX_CONST (1 << 1) +#define NV40_VP_INST_LAST (1 << 0) +#define NV40_VP_INST3_KNOWN ( \ + NV40_VP_INST_SRC2L_MASK |\ + NV40_VP_INST_SCA_WRITEMASK_MASK |\ + NV40_VP_INST_VEC_WRITEMASK_MASK |\ + NV40_VP_INST_SCA_DEST_TEMP_MASK |\ + NV40_VP_INST_DEST_MASK |\ + NV40_VP_INST_INDEX_CONST) + +/* Useful to split the source selection regs into their pieces */ +#define NV40_VP_SRC0_HIGH_SHIFT 9 +#define NV40_VP_SRC0_HIGH_MASK 0x0001FE00 +#define NV40_VP_SRC0_LOW_MASK 0x000001FF +#define NV40_VP_SRC2_HIGH_SHIFT 11 +#define NV40_VP_SRC2_HIGH_MASK 0x0001F800 +#define NV40_VP_SRC2_LOW_MASK 0x000007FF + +/* Source selection - these are the bits you fill NV40_VP_INST_SRCn with */ +#define NV40_VP_SRC_NEGATE (1 << 16) +#define NV40_VP_SRC_SWZ_X_SHIFT 14 +#define NV40_VP_SRC_SWZ_X_MASK (3 << 14) +#define NV40_VP_SRC_SWZ_Y_SHIFT 12 +#define NV40_VP_SRC_SWZ_Y_MASK (3 << 12) +#define NV40_VP_SRC_SWZ_Z_SHIFT 10 +#define NV40_VP_SRC_SWZ_Z_MASK (3 << 10) +#define NV40_VP_SRC_SWZ_W_SHIFT 8 +#define NV40_VP_SRC_SWZ_W_MASK (3 << 8) +#define NV40_VP_SRC_SWZ_ALL_SHIFT 8 +#define NV40_VP_SRC_SWZ_ALL_MASK (0xFF << 8) +#define NV40_VP_SRC_TEMP_SRC_SHIFT 2 +#define NV40_VP_SRC_TEMP_SRC_MASK (0x1F << 2) +#define NV40_VP_SRC_REG_TYPE_SHIFT 0 +#define NV40_VP_SRC_REG_TYPE_MASK (3 << 0) +# define NV40_VP_SRC_REG_TYPE_UNK0 0 +# define NV40_VP_SRC_REG_TYPE_TEMP 1 +# define NV40_VP_SRC_REG_TYPE_INPUT 2 +# define NV40_VP_SRC_REG_TYPE_CONST 3 + + +/* + * Each fragment program opcode appears to be comprised of 4 32-bit values. + * + * 0 - Opcode, output reg/mask, ATTRIB source + * 1 - Source 0 + * 2 - Source 1 + * 3 - Source 2 + * + * There appears to be no special difference between result regs and temp regs. + * result.color == R0.xyzw + * result.depth == R1.z + * When the fragprog contains instructions to write depth, + * NV30_TCL_PRIMITIVE_3D_UNK1D78=0 otherwise it is set to 1. + * + * Constants are inserted directly after the instruction that uses them. + * + * It appears that it's not possible to use two input registers in one + * instruction as the input sourcing is done in the instruction dword + * and not the source selection dwords. As such instructions such as: + * + * ADD result.color, fragment.color, fragment.texcoord[0]; + * + * must be split into two MOV's and then an ADD (nvidia does this) but + * I'm not sure why it's not just one MOV and then source the second input + * in the ADD instruction.. + * + * Negation of the full source is done with NV30_FP_REG_NEGATE, arbitrary + * negation requires multiplication with a const. + * + * Arbitrary swizzling is supported with the exception of SWIZZLE_ZERO and + * SWIZZLE_ONE. + * + * The temp/result regs appear to be initialised to (0.0, 0.0, 0.0, 0.0) as + * SWIZZLE_ZERO is implemented simply by not writing to the relevant components + * of the destination. + * + * Looping + * Loops appear to be fairly expensive on NV40 at least, the proprietary + * driver goes to a lot of effort to avoid using the native looping + * instructions. If the total number of *executed* instructions between + * REP/ENDREP or LOOP/ENDLOOP is <=500, the driver will unroll the loop. + * The maximum loop count is 255. + * + * Conditional execution + * TODO + * + * Non-native instructions: + * LIT + * LRP - MAD+MAD + * SUB - ADD, negate second source + * RSQ - LG2 + EX2 + * POW - LG2 + MUL + EX2 + * SCS - COS + SIN + * XPD + * DP2 - MUL + ADD + * NRM + */ + +//== Opcode / Destination selection == +#define NV40_FP_OP_PROGRAM_END (1 << 0) +#define NV40_FP_OP_OUT_REG_SHIFT 1 +#define NV40_FP_OP_OUT_REG_MASK (63 << 1) +/* Needs to be set when writing outputs to get expected result.. */ +#define NV40_FP_OP_OUT_REG_HALF (1 << 7) +#define NV40_FP_OP_COND_WRITE_ENABLE (1 << 8) +#define NV40_FP_OP_OUTMASK_SHIFT 9 +#define NV40_FP_OP_OUTMASK_MASK (0xF << 9) +# define NV40_FP_OP_OUT_X (1 << 9) +# define NV40_FP_OP_OUT_Y (1 <<10) +# define NV40_FP_OP_OUT_Z (1 <<11) +# define NV40_FP_OP_OUT_W (1 <<12) +/* Uncertain about these, especially the input_src values.. it's possible that + * they can be dynamically changed. + */ +#define NV40_FP_OP_INPUT_SRC_SHIFT 13 +#define NV40_FP_OP_INPUT_SRC_MASK (15 << 13) +# define NV40_FP_OP_INPUT_SRC_POSITION 0x0 +# define NV40_FP_OP_INPUT_SRC_COL0 0x1 +# define NV40_FP_OP_INPUT_SRC_COL1 0x2 +# define NV40_FP_OP_INPUT_SRC_FOGC 0x3 +# define NV40_FP_OP_INPUT_SRC_TC0 0x4 +# define NV40_FP_OP_INPUT_SRC_TC(n) (0x4 + n) +# define NV40_FP_OP_INPUT_SRC_FACING 0xE +#define NV40_FP_OP_TEX_UNIT_SHIFT 17 +#define NV40_FP_OP_TEX_UNIT_MASK (0xF << 17) +#define NV40_FP_OP_PRECISION_SHIFT 22 +#define NV40_FP_OP_PRECISION_MASK (3 << 22) +# define NV40_FP_PRECISION_FP32 0 +# define NV40_FP_PRECISION_FP16 1 +# define NV40_FP_PRECISION_FX12 2 +#define NV40_FP_OP_OPCODE_SHIFT 24 +#define NV40_FP_OP_OPCODE_MASK (0x3F << 24) +# define NV40_FP_OP_OPCODE_NOP 0x00 +# define NV40_FP_OP_OPCODE_MOV 0x01 +# define NV40_FP_OP_OPCODE_MUL 0x02 +# define NV40_FP_OP_OPCODE_ADD 0x03 +# define NV40_FP_OP_OPCODE_MAD 0x04 +# define NV40_FP_OP_OPCODE_DP3 0x05 +# define NV40_FP_OP_OPCODE_DP4 0x06 +# define NV40_FP_OP_OPCODE_DST 0x07 +# define NV40_FP_OP_OPCODE_MIN 0x08 +# define NV40_FP_OP_OPCODE_MAX 0x09 +# define NV40_FP_OP_OPCODE_SLT 0x0A +# define NV40_FP_OP_OPCODE_SGE 0x0B +# define NV40_FP_OP_OPCODE_SLE 0x0C +# define NV40_FP_OP_OPCODE_SGT 0x0D +# define NV40_FP_OP_OPCODE_SNE 0x0E +# define NV40_FP_OP_OPCODE_SEQ 0x0F +# define NV40_FP_OP_OPCODE_FRC 0x10 +# define NV40_FP_OP_OPCODE_FLR 0x11 +# define NV40_FP_OP_OPCODE_KIL 0x12 +# define NV40_FP_OP_OPCODE_PK4B 0x13 +# define NV40_FP_OP_OPCODE_UP4B 0x14 +/* DDX/DDY can only write to XY */ +# define NV40_FP_OP_OPCODE_DDX 0x15 +# define NV40_FP_OP_OPCODE_DDY 0x16 +# define NV40_FP_OP_OPCODE_TEX 0x17 +# define NV40_FP_OP_OPCODE_TXP 0x18 +# define NV40_FP_OP_OPCODE_TXD 0x19 +# define NV40_FP_OP_OPCODE_RCP 0x1A +# define NV40_FP_OP_OPCODE_EX2 0x1C +# define NV40_FP_OP_OPCODE_LG2 0x1D +# define NV40_FP_OP_OPCODE_STR 0x20 +# define NV40_FP_OP_OPCODE_SFL 0x21 +# define NV40_FP_OP_OPCODE_COS 0x22 +# define NV40_FP_OP_OPCODE_SIN 0x23 +# define NV40_FP_OP_OPCODE_PK2H 0x24 +# define NV40_FP_OP_OPCODE_UP2H 0x25 +# define NV40_FP_OP_OPCODE_PK4UB 0x27 +# define NV40_FP_OP_OPCODE_UP4UB 0x28 +# define NV40_FP_OP_OPCODE_PK2US 0x29 +# define NV40_FP_OP_OPCODE_UP2US 0x2A +# define NV40_FP_OP_OPCODE_DP2A 0x2E +# define NV40_FP_OP_OPCODE_TXL 0x2F +# define NV40_FP_OP_OPCODE_TXB 0x31 +# define NV40_FP_OP_OPCODE_DIV 0x3A +# define NV40_FP_OP_OPCODE_UNK_LIT 0x3C +/* The use of these instructions appears to be indicated by bit 31 of DWORD 2.*/ +# define NV40_FP_OP_BRA_OPCODE_BRK 0x0 +# define NV40_FP_OP_BRA_OPCODE_CAL 0x1 +# define NV40_FP_OP_BRA_OPCODE_IF 0x2 +# define NV40_FP_OP_BRA_OPCODE_LOOP 0x3 +# define NV40_FP_OP_BRA_OPCODE_REP 0x4 +# define NV40_FP_OP_BRA_OPCODE_RET 0x5 +#define NV40_FP_OP_OUT_SAT (1 << 31) + +/* high order bits of SRC0 */ +#define NV40_FP_OP_OUT_ABS (1 << 29) +#define NV40_FP_OP_COND_SWZ_W_SHIFT 27 +#define NV40_FP_OP_COND_SWZ_W_MASK (3 << 27) +#define NV40_FP_OP_COND_SWZ_Z_SHIFT 25 +#define NV40_FP_OP_COND_SWZ_Z_MASK (3 << 25) +#define NV40_FP_OP_COND_SWZ_Y_SHIFT 23 +#define NV40_FP_OP_COND_SWZ_Y_MASK (3 << 23) +#define NV40_FP_OP_COND_SWZ_X_SHIFT 21 +#define NV40_FP_OP_COND_SWZ_X_MASK (3 << 21) +#define NV40_FP_OP_COND_SWZ_ALL_SHIFT 21 +#define NV40_FP_OP_COND_SWZ_ALL_MASK (0xFF << 21) +#define NV40_FP_OP_COND_SHIFT 18 +#define NV40_FP_OP_COND_MASK (0x07 << 18) +# define NV40_FP_OP_COND_FL 0 +# define NV40_FP_OP_COND_LT 1 +# define NV40_FP_OP_COND_EQ 2 +# define NV40_FP_OP_COND_LE 3 +# define NV40_FP_OP_COND_GT 4 +# define NV40_FP_OP_COND_NE 5 +# define NV40_FP_OP_COND_GE 6 +# define NV40_FP_OP_COND_TR 7 + +/* high order bits of SRC1 */ +#define NV40_FP_OP_OPCODE_IS_BRANCH (1<<31) +#define NV40_FP_OP_DST_SCALE_SHIFT 28 +#define NV40_FP_OP_DST_SCALE_MASK (3 << 28) +#define NV40_FP_OP_DST_SCALE_1X 0 +#define NV40_FP_OP_DST_SCALE_2X 1 +#define NV40_FP_OP_DST_SCALE_4X 2 +#define NV40_FP_OP_DST_SCALE_8X 3 +#define NV40_FP_OP_DST_SCALE_INV_2X 5 +#define NV40_FP_OP_DST_SCALE_INV_4X 6 +#define NV40_FP_OP_DST_SCALE_INV_8X 7 + +/* SRC1 LOOP */ +#define NV40_FP_OP_LOOP_INCR_SHIFT 19 +#define NV40_FP_OP_LOOP_INCR_MASK (0xFF << 19) +#define NV40_FP_OP_LOOP_INDEX_SHIFT 10 +#define NV40_FP_OP_LOOP_INDEX_MASK (0xFF << 10) +#define NV40_FP_OP_LOOP_COUNT_SHIFT 2 +#define NV40_FP_OP_LOOP_COUNT_MASK (0xFF << 2) + +/* SRC1 IF */ +#define NV40_FP_OP_ELSE_ID_SHIFT 2 +#define NV40_FP_OP_ELSE_ID_MASK (0xFF << 2) + +/* SRC1 CAL */ +#define NV40_FP_OP_IADDR_SHIFT 2 +#define NV40_FP_OP_IADDR_MASK (0xFF << 2) + +/* SRC1 REP + * I have no idea why there are 3 count values here.. but they + * have always been filled with the same value in my tests so + * far.. + */ +#define NV40_FP_OP_REP_COUNT1_SHIFT 2 +#define NV40_FP_OP_REP_COUNT1_MASK (0xFF << 2) +#define NV40_FP_OP_REP_COUNT2_SHIFT 10 +#define NV40_FP_OP_REP_COUNT2_MASK (0xFF << 10) +#define NV40_FP_OP_REP_COUNT3_SHIFT 19 +#define NV40_FP_OP_REP_COUNT3_MASK (0xFF << 19) + +/* SRC2 REP/IF */ +#define NV40_FP_OP_END_ID_SHIFT 2 +#define NV40_FP_OP_END_ID_MASK (0xFF << 2) + +// SRC2 high-order +#define NV40_FP_OP_INDEX_INPUT (1 << 30) +#define NV40_FP_OP_ADDR_INDEX_SHIFT 19 +#define NV40_FP_OP_ADDR_INDEX_MASK (0xF << 19) + +//== Register selection == +#define NV40_FP_REG_TYPE_SHIFT 0 +#define NV40_FP_REG_TYPE_MASK (3 << 0) +# define NV40_FP_REG_TYPE_TEMP 0 +# define NV40_FP_REG_TYPE_INPUT 1 +# define NV40_FP_REG_TYPE_CONST 2 +#define NV40_FP_REG_SRC_SHIFT 2 +#define NV40_FP_REG_SRC_MASK (63 << 2) +#define NV40_FP_REG_SRC_HALF (1 << 8) +#define NV40_FP_REG_SWZ_ALL_SHIFT 9 +#define NV40_FP_REG_SWZ_ALL_MASK (255 << 9) +#define NV40_FP_REG_SWZ_X_SHIFT 9 +#define NV40_FP_REG_SWZ_X_MASK (3 << 9) +#define NV40_FP_REG_SWZ_Y_SHIFT 11 +#define NV40_FP_REG_SWZ_Y_MASK (3 << 11) +#define NV40_FP_REG_SWZ_Z_SHIFT 13 +#define NV40_FP_REG_SWZ_Z_MASK (3 << 13) +#define NV40_FP_REG_SWZ_W_SHIFT 15 +#define NV40_FP_REG_SWZ_W_MASK (3 << 15) +# define NV40_FP_SWIZZLE_X 0 +# define NV40_FP_SWIZZLE_Y 1 +# define NV40_FP_SWIZZLE_Z 2 +# define NV40_FP_SWIZZLE_W 3 +#define NV40_FP_REG_NEGATE (1 << 17) + +#ifndef NV40_SHADER_NO_FUCKEDNESS +#define NV40SR_NONE 0 +#define NV40SR_OUTPUT 1 +#define NV40SR_INPUT 2 +#define NV40SR_TEMP 3 +#define NV40SR_CONST 4 + +struct nv40_sreg { + int type; + int index; + + int dst_scale; + + int negate; + int abs; + int swz[4]; + + int cc_update; + int cc_update_reg; + int cc_test; + int cc_test_reg; + int cc_swz[4]; +}; + +static INLINE struct nv40_sreg +nv40_sr(int type, int index) +{ + struct nv40_sreg temp = { + .type = type, + .index = index, + .dst_scale = DEF_SCALE, + .abs = 0, + .negate = 0, + .swz = { 0, 1, 2, 3 }, + .cc_update = 0, + .cc_update_reg = 0, + .cc_test = DEF_CTEST, + .cc_test_reg = 0, + .cc_swz = { 0, 1, 2, 3 }, + }; + return temp; +} + +static INLINE struct nv40_sreg +nv40_sr_swz(struct nv40_sreg src, int x, int y, int z, int w) +{ + struct nv40_sreg dst = src; + + dst.swz[SWZ_X] = src.swz[x]; + dst.swz[SWZ_Y] = src.swz[y]; + dst.swz[SWZ_Z] = src.swz[z]; + dst.swz[SWZ_W] = src.swz[w]; + return dst; +} + +static INLINE struct nv40_sreg +nv40_sr_neg(struct nv40_sreg src) +{ + src.negate = !src.negate; + return src; +} + +static INLINE struct nv40_sreg +nv40_sr_abs(struct nv40_sreg src) +{ + src.abs = 1; + return src; +} + +static INLINE struct nv40_sreg +nv40_sr_scale(struct nv40_sreg src, int scale) +{ + src.dst_scale = scale; + return src; +} +#endif + +#endif diff --git a/src/gallium/drivers/nv40/nv40_state.c b/src/gallium/drivers/nv40/nv40_state.c new file mode 100644 index 0000000000..2eff25aa83 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state.c @@ -0,0 +1,740 @@ +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "draw/draw_context.h" + +#include "tgsi/tgsi_parse.h" + +#include "nv40_context.h" +#include "nv40_state.h" + +static void * +nv40_blend_state_create(struct pipe_context *pipe, + const struct pipe_blend_state *cso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nouveau_grobj *curie = nv40->screen->curie; + struct nv40_blend_state *bso = CALLOC(1, sizeof(*bso)); + struct nouveau_stateobj *so = so_new(16, 0); + + if (cso->blend_enable) { + so_method(so, curie, NV40TCL_BLEND_ENABLE, 3); + so_data (so, 1); + so_data (so, (nvgl_blend_func(cso->alpha_src_factor) << 16) | + nvgl_blend_func(cso->rgb_src_factor)); + so_data (so, nvgl_blend_func(cso->alpha_dst_factor) << 16 | + nvgl_blend_func(cso->rgb_dst_factor)); + so_method(so, curie, NV40TCL_BLEND_EQUATION, 1); + so_data (so, nvgl_blend_eqn(cso->alpha_func) << 16 | + nvgl_blend_eqn(cso->rgb_func)); + } else { + so_method(so, curie, NV40TCL_BLEND_ENABLE, 1); + so_data (so, 0); + } + + so_method(so, curie, NV40TCL_COLOR_MASK, 1); + so_data (so, (((cso->colormask & PIPE_MASK_A) ? (0x01 << 24) : 0) | + ((cso->colormask & PIPE_MASK_R) ? (0x01 << 16) : 0) | + ((cso->colormask & PIPE_MASK_G) ? (0x01 << 8) : 0) | + ((cso->colormask & PIPE_MASK_B) ? (0x01 << 0) : 0))); + + if (cso->logicop_enable) { + so_method(so, curie, NV40TCL_COLOR_LOGIC_OP_ENABLE, 2); + so_data (so, 1); + so_data (so, nvgl_logicop_func(cso->logicop_func)); + } else { + so_method(so, curie, NV40TCL_COLOR_LOGIC_OP_ENABLE, 1); + so_data (so, 0); + } + + so_method(so, curie, NV40TCL_DITHER_ENABLE, 1); + so_data (so, cso->dither ? 1 : 0); + + so_ref(so, &bso->so); + bso->pipe = *cso; + return (void *)bso; +} + +static void +nv40_blend_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->blend = hwcso; + nv40->dirty |= NV40_NEW_BLEND; +} + +static void +nv40_blend_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_blend_state *bso = hwcso; + + so_ref(NULL, &bso->so); + FREE(bso); +} + + +static INLINE unsigned +wrap_mode(unsigned wrap) { + unsigned ret; + + switch (wrap) { + case PIPE_TEX_WRAP_REPEAT: + ret = NV40TCL_TEX_WRAP_S_REPEAT; + break; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + ret = NV40TCL_TEX_WRAP_S_MIRRORED_REPEAT; + break; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + ret = NV40TCL_TEX_WRAP_S_CLAMP_TO_EDGE; + break; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + ret = NV40TCL_TEX_WRAP_S_CLAMP_TO_BORDER; + break; + case PIPE_TEX_WRAP_CLAMP: + ret = NV40TCL_TEX_WRAP_S_CLAMP; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_EDGE; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP_TO_BORDER; + break; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + ret = NV40TCL_TEX_WRAP_S_MIRROR_CLAMP; + break; + default: + NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); + ret = NV40TCL_TEX_WRAP_S_REPEAT; + break; + } + + return ret >> NV40TCL_TEX_WRAP_S_SHIFT; +} + +static void * +nv40_sampler_state_create(struct pipe_context *pipe, + const struct pipe_sampler_state *cso) +{ + struct nv40_sampler_state *ps; + uint32_t filter = 0; + + ps = MALLOC(sizeof(struct nv40_sampler_state)); + + ps->fmt = 0; + if (!cso->normalized_coords) + ps->fmt |= NV40TCL_TEX_FORMAT_RECT; + + ps->wrap = ((wrap_mode(cso->wrap_s) << NV40TCL_TEX_WRAP_S_SHIFT) | + (wrap_mode(cso->wrap_t) << NV40TCL_TEX_WRAP_T_SHIFT) | + (wrap_mode(cso->wrap_r) << NV40TCL_TEX_WRAP_R_SHIFT)); + + ps->en = 0; + if (cso->max_anisotropy >= 2.0) { + /* no idea, binary driver sets it, works without it.. meh.. */ + ps->wrap |= (1 << 5); + + if (cso->max_anisotropy >= 16.0) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_16X; + } else + if (cso->max_anisotropy >= 12.0) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_12X; + } else + if (cso->max_anisotropy >= 10.0) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_10X; + } else + if (cso->max_anisotropy >= 8.0) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_8X; + } else + if (cso->max_anisotropy >= 6.0) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_6X; + } else + if (cso->max_anisotropy >= 4.0) { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_4X; + } else { + ps->en |= NV40TCL_TEX_ENABLE_ANISO_2X; + } + } + + switch (cso->mag_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + filter |= NV40TCL_TEX_FILTER_MAG_LINEAR; + break; + case PIPE_TEX_FILTER_NEAREST: + default: + filter |= NV40TCL_TEX_FILTER_MAG_NEAREST; + break; + } + + switch (cso->min_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + filter |= NV40TCL_TEX_FILTER_MIN_LINEAR_MIPMAP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + filter |= NV40TCL_TEX_FILTER_MIN_LINEAR_MIPMAP_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + filter |= NV40TCL_TEX_FILTER_MIN_LINEAR; + break; + } + break; + case PIPE_TEX_FILTER_NEAREST: + default: + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_NEAREST: + filter |= NV40TCL_TEX_FILTER_MIN_NEAREST_MIPMAP_NEAREST; + break; + case PIPE_TEX_MIPFILTER_LINEAR: + filter |= NV40TCL_TEX_FILTER_MIN_NEAREST_MIPMAP_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + filter |= NV40TCL_TEX_FILTER_MIN_NEAREST; + break; + } + break; + } + + ps->filt = filter; + + { + float limit; + + limit = CLAMP(cso->lod_bias, -16.0, 15.0); + ps->filt |= (int)(cso->lod_bias * 256.0) & 0x1fff; + + limit = CLAMP(cso->max_lod, 0.0, 15.0); + ps->en |= (int)(limit * 256.0) << 7; + + limit = CLAMP(cso->min_lod, 0.0, 15.0); + ps->en |= (int)(limit * 256.0) << 19; + } + + + if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + switch (cso->compare_func) { + case PIPE_FUNC_NEVER: + ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_NEVER; + break; + case PIPE_FUNC_GREATER: + ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_GREATER; + break; + case PIPE_FUNC_EQUAL: + ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_EQUAL; + break; + case PIPE_FUNC_GEQUAL: + ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_GEQUAL; + break; + case PIPE_FUNC_LESS: + ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_LESS; + break; + case PIPE_FUNC_NOTEQUAL: + ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_NOTEQUAL; + break; + case PIPE_FUNC_LEQUAL: + ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_LEQUAL; + break; + case PIPE_FUNC_ALWAYS: + ps->wrap |= NV40TCL_TEX_WRAP_RCOMP_ALWAYS; + break; + default: + break; + } + } + + ps->bcol = ((float_to_ubyte(cso->border_color[3]) << 24) | + (float_to_ubyte(cso->border_color[0]) << 16) | + (float_to_ubyte(cso->border_color[1]) << 8) | + (float_to_ubyte(cso->border_color[2]) << 0)); + + return (void *)ps; +} + +static void +nv40_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) +{ + struct nv40_context *nv40 = nv40_context(pipe); + unsigned unit; + + for (unit = 0; unit < nr; unit++) { + nv40->tex_sampler[unit] = sampler[unit]; + nv40->dirty_samplers |= (1 << unit); + } + + for (unit = nr; unit < nv40->nr_samplers; unit++) { + nv40->tex_sampler[unit] = NULL; + nv40->dirty_samplers |= (1 << unit); + } + + nv40->nr_samplers = nr; + nv40->dirty |= NV40_NEW_SAMPLER; +} + +static void +nv40_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void +nv40_set_sampler_texture(struct pipe_context *pipe, unsigned nr, + struct pipe_texture **miptree) +{ + struct nv40_context *nv40 = nv40_context(pipe); + unsigned unit; + + for (unit = 0; unit < nr; unit++) { + pipe_texture_reference((struct pipe_texture **) + &nv40->tex_miptree[unit], miptree[unit]); + nv40->dirty_samplers |= (1 << unit); + } + + for (unit = nr; unit < nv40->nr_textures; unit++) { + pipe_texture_reference((struct pipe_texture **) + &nv40->tex_miptree[unit], NULL); + nv40->dirty_samplers |= (1 << unit); + } + + nv40->nr_textures = nr; + nv40->dirty |= NV40_NEW_SAMPLER; +} + +static void * +nv40_rasterizer_state_create(struct pipe_context *pipe, + const struct pipe_rasterizer_state *cso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_rasterizer_state *rsso = CALLOC(1, sizeof(*rsso)); + struct nouveau_stateobj *so = so_new(32, 0); + struct nouveau_grobj *curie = nv40->screen->curie; + + /*XXX: ignored: + * light_twoside + * point_smooth -nohw + * multisample + */ + + so_method(so, curie, NV40TCL_SHADE_MODEL, 1); + so_data (so, cso->flatshade ? NV40TCL_SHADE_MODEL_FLAT : + NV40TCL_SHADE_MODEL_SMOOTH); + + so_method(so, curie, NV40TCL_LINE_WIDTH, 2); + so_data (so, (unsigned char)(cso->line_width * 8.0) & 0xff); + so_data (so, cso->line_smooth ? 1 : 0); + so_method(so, curie, NV40TCL_LINE_STIPPLE_ENABLE, 2); + so_data (so, cso->line_stipple_enable ? 1 : 0); + so_data (so, (cso->line_stipple_pattern << 16) | + cso->line_stipple_factor); + + so_method(so, curie, NV40TCL_POINT_SIZE, 1); + so_data (so, fui(cso->point_size)); + + so_method(so, curie, NV40TCL_POLYGON_MODE_FRONT, 6); + if (cso->front_winding == PIPE_WINDING_CCW) { + so_data(so, nvgl_polygon_mode(cso->fill_ccw)); + so_data(so, nvgl_polygon_mode(cso->fill_cw)); + switch (cso->cull_mode) { + case PIPE_WINDING_CCW: + so_data(so, NV40TCL_CULL_FACE_FRONT); + break; + case PIPE_WINDING_CW: + so_data(so, NV40TCL_CULL_FACE_BACK); + break; + case PIPE_WINDING_BOTH: + so_data(so, NV40TCL_CULL_FACE_FRONT_AND_BACK); + break; + default: + so_data(so, NV40TCL_CULL_FACE_BACK); + break; + } + so_data(so, NV40TCL_FRONT_FACE_CCW); + } else { + so_data(so, nvgl_polygon_mode(cso->fill_cw)); + so_data(so, nvgl_polygon_mode(cso->fill_ccw)); + switch (cso->cull_mode) { + case PIPE_WINDING_CCW: + so_data(so, NV40TCL_CULL_FACE_BACK); + break; + case PIPE_WINDING_CW: + so_data(so, NV40TCL_CULL_FACE_FRONT); + break; + case PIPE_WINDING_BOTH: + so_data(so, NV40TCL_CULL_FACE_FRONT_AND_BACK); + break; + default: + so_data(so, NV40TCL_CULL_FACE_BACK); + break; + } + so_data(so, NV40TCL_FRONT_FACE_CW); + } + so_data(so, cso->poly_smooth ? 1 : 0); + so_data(so, (cso->cull_mode != PIPE_WINDING_NONE) ? 1 : 0); + + so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); + so_data (so, cso->poly_stipple_enable ? 1 : 0); + + so_method(so, curie, NV40TCL_POLYGON_OFFSET_POINT_ENABLE, 3); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT)) + so_data(so, 1); + else + so_data(so, 0); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_LINE) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_LINE)) + so_data(so, 1); + else + so_data(so, 0); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_FILL) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_FILL)) + so_data(so, 1); + else + so_data(so, 0); + if (cso->offset_cw || cso->offset_ccw) { + so_method(so, curie, NV40TCL_POLYGON_OFFSET_FACTOR, 2); + so_data (so, fui(cso->offset_scale)); + so_data (so, fui(cso->offset_units * 2)); + } + + so_method(so, curie, NV40TCL_POINT_SPRITE, 1); + if (cso->point_sprite) { + unsigned psctl = (1 << 0), i; + + for (i = 0; i < 8; i++) { + if (cso->sprite_coord_mode[i] != PIPE_SPRITE_COORD_NONE) + psctl |= (1 << (8 + i)); + } + + so_data(so, psctl); + } else { + so_data(so, 0); + } + + so_ref(so, &rsso->so); + rsso->pipe = *cso; + return (void *)rsso; +} + +static void +nv40_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->rasterizer = hwcso; + nv40->dirty |= NV40_NEW_RAST; + nv40->draw_dirty |= NV40_NEW_RAST; +} + +static void +nv40_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_rasterizer_state *rsso = hwcso; + + so_ref(NULL, &rsso->so); + FREE(rsso); +} + +static void * +nv40_depth_stencil_alpha_state_create(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *cso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_zsa_state *zsaso = CALLOC(1, sizeof(*zsaso)); + struct nouveau_stateobj *so = so_new(32, 0); + struct nouveau_grobj *curie = nv40->screen->curie; + + so_method(so, curie, NV40TCL_DEPTH_FUNC, 3); + so_data (so, nvgl_comparison_op(cso->depth.func)); + so_data (so, cso->depth.writemask ? 1 : 0); + so_data (so, cso->depth.enabled ? 1 : 0); + + so_method(so, curie, NV40TCL_ALPHA_TEST_ENABLE, 3); + so_data (so, cso->alpha.enabled ? 1 : 0); + so_data (so, nvgl_comparison_op(cso->alpha.func)); + so_data (so, float_to_ubyte(cso->alpha.ref_value)); + + if (cso->stencil[0].enabled) { + so_method(so, curie, NV40TCL_STENCIL_FRONT_ENABLE, 8); + so_data (so, cso->stencil[0].enabled ? 1 : 0); + so_data (so, cso->stencil[0].writemask); + so_data (so, nvgl_comparison_op(cso->stencil[0].func)); + so_data (so, cso->stencil[0].ref_value); + so_data (so, cso->stencil[0].valuemask); + so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); + } else { + so_method(so, curie, NV40TCL_STENCIL_FRONT_ENABLE, 1); + so_data (so, 0); + } + + if (cso->stencil[1].enabled) { + so_method(so, curie, NV40TCL_STENCIL_BACK_ENABLE, 8); + so_data (so, cso->stencil[1].enabled ? 1 : 0); + so_data (so, cso->stencil[1].writemask); + so_data (so, nvgl_comparison_op(cso->stencil[1].func)); + so_data (so, cso->stencil[1].ref_value); + so_data (so, cso->stencil[1].valuemask); + so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); + } else { + so_method(so, curie, NV40TCL_STENCIL_BACK_ENABLE, 1); + so_data (so, 0); + } + + so_ref(so, &zsaso->so); + zsaso->pipe = *cso; + return (void *)zsaso; +} + +static void +nv40_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->zsa = hwcso; + nv40->dirty |= NV40_NEW_ZSA; +} + +static void +nv40_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_zsa_state *zsaso = hwcso; + + so_ref(NULL, &zsaso->so); + FREE(zsaso); +} + +static void * +nv40_vp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_vertex_program *vp; + + vp = CALLOC(1, sizeof(struct nv40_vertex_program)); + vp->pipe.tokens = tgsi_dup_tokens(cso->tokens); + vp->draw = draw_create_vertex_shader(nv40->draw, &vp->pipe); + + return (void *)vp; +} + +static void +nv40_vp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->vertprog = hwcso; + nv40->dirty |= NV40_NEW_VERTPROG; + nv40->draw_dirty |= NV40_NEW_VERTPROG; +} + +static void +nv40_vp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_vertex_program *vp = hwcso; + + draw_delete_vertex_shader(nv40->draw, vp->draw); + nv40_vertprog_destroy(nv40, vp); + FREE((void*)vp->pipe.tokens); + FREE(vp); +} + +static void * +nv40_fp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nv40_fragment_program *fp; + + fp = CALLOC(1, sizeof(struct nv40_fragment_program)); + fp->pipe.tokens = tgsi_dup_tokens(cso->tokens); + + tgsi_scan_shader(fp->pipe.tokens, &fp->info); + + return (void *)fp; +} + +static void +nv40_fp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->fragprog = hwcso; + nv40->dirty |= NV40_NEW_FRAGPROG; +} + +static void +nv40_fp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nv40_fragment_program *fp = hwcso; + + nv40_fragprog_destroy(nv40, fp); + FREE((void*)fp->pipe.tokens); + FREE(fp); +} + +static void +nv40_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *bcol) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->blend_colour = *bcol; + nv40->dirty |= NV40_NEW_BCOL; +} + +static void +nv40_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->clip = *clip; + nv40->dirty |= NV40_NEW_UCP; + nv40->draw_dirty |= NV40_NEW_UCP; +} + +static void +nv40_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, + const struct pipe_constant_buffer *buf ) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->constbuf[shader] = buf->buffer; + nv40->constbuf_nr[shader] = buf->buffer->size / (4 * sizeof(float)); + + if (shader == PIPE_SHADER_VERTEX) { + nv40->dirty |= NV40_NEW_VERTPROG; + } else + if (shader == PIPE_SHADER_FRAGMENT) { + nv40->dirty |= NV40_NEW_FRAGPROG; + } +} + +static void +nv40_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->framebuffer = *fb; + nv40->dirty |= NV40_NEW_FB; +} + +static void +nv40_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + memcpy(nv40->stipple, stipple->stipple, 4 * 32); + nv40->dirty |= NV40_NEW_STIPPLE; +} + +static void +nv40_set_scissor_state(struct pipe_context *pipe, + const struct pipe_scissor_state *s) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->scissor = *s; + nv40->dirty |= NV40_NEW_SCISSOR; +} + +static void +nv40_set_viewport_state(struct pipe_context *pipe, + const struct pipe_viewport_state *vpt) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->viewport = *vpt; + nv40->dirty |= NV40_NEW_VIEWPORT; + nv40->draw_dirty |= NV40_NEW_VIEWPORT; +} + +static void +nv40_set_vertex_buffers(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_buffer *vb) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + memcpy(nv40->vtxbuf, vb, sizeof(*vb) * count); + nv40->vtxbuf_nr = count; + + nv40->dirty |= NV40_NEW_ARRAYS; + nv40->draw_dirty |= NV40_NEW_ARRAYS; +} + +static void +nv40_set_vertex_elements(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_element *ve) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + memcpy(nv40->vtxelt, ve, sizeof(*ve) * count); + nv40->vtxelt_nr = count; + + nv40->dirty |= NV40_NEW_ARRAYS; + nv40->draw_dirty |= NV40_NEW_ARRAYS; +} + +static void +nv40_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) +{ + struct nv40_context *nv40 = nv40_context(pipe); + + nv40->edgeflags = bitfield; + nv40->dirty |= NV40_NEW_ARRAYS; + nv40->draw_dirty |= NV40_NEW_ARRAYS; +} + +void +nv40_init_state_functions(struct nv40_context *nv40) +{ + nv40->pipe.create_blend_state = nv40_blend_state_create; + nv40->pipe.bind_blend_state = nv40_blend_state_bind; + nv40->pipe.delete_blend_state = nv40_blend_state_delete; + + nv40->pipe.create_sampler_state = nv40_sampler_state_create; + nv40->pipe.bind_sampler_states = nv40_sampler_state_bind; + nv40->pipe.delete_sampler_state = nv40_sampler_state_delete; + nv40->pipe.set_sampler_textures = nv40_set_sampler_texture; + + nv40->pipe.create_rasterizer_state = nv40_rasterizer_state_create; + nv40->pipe.bind_rasterizer_state = nv40_rasterizer_state_bind; + nv40->pipe.delete_rasterizer_state = nv40_rasterizer_state_delete; + + nv40->pipe.create_depth_stencil_alpha_state = + nv40_depth_stencil_alpha_state_create; + nv40->pipe.bind_depth_stencil_alpha_state = + nv40_depth_stencil_alpha_state_bind; + nv40->pipe.delete_depth_stencil_alpha_state = + nv40_depth_stencil_alpha_state_delete; + + nv40->pipe.create_vs_state = nv40_vp_state_create; + nv40->pipe.bind_vs_state = nv40_vp_state_bind; + nv40->pipe.delete_vs_state = nv40_vp_state_delete; + + nv40->pipe.create_fs_state = nv40_fp_state_create; + nv40->pipe.bind_fs_state = nv40_fp_state_bind; + nv40->pipe.delete_fs_state = nv40_fp_state_delete; + + nv40->pipe.set_blend_color = nv40_set_blend_color; + nv40->pipe.set_clip_state = nv40_set_clip_state; + nv40->pipe.set_constant_buffer = nv40_set_constant_buffer; + nv40->pipe.set_framebuffer_state = nv40_set_framebuffer_state; + nv40->pipe.set_polygon_stipple = nv40_set_polygon_stipple; + nv40->pipe.set_scissor_state = nv40_set_scissor_state; + nv40->pipe.set_viewport_state = nv40_set_viewport_state; + + nv40->pipe.set_edgeflags = nv40_set_edgeflags; + nv40->pipe.set_vertex_buffers = nv40_set_vertex_buffers; + nv40->pipe.set_vertex_elements = nv40_set_vertex_elements; +} + diff --git a/src/gallium/drivers/nv40/nv40_state.h b/src/gallium/drivers/nv40/nv40_state.h new file mode 100644 index 0000000000..9c55903ae3 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state.h @@ -0,0 +1,91 @@ +#ifndef __NV40_STATE_H__ +#define __NV40_STATE_H__ + +#include "pipe/p_state.h" +#include "tgsi/tgsi_scan.h" + +struct nv40_sampler_state { + uint32_t fmt; + uint32_t wrap; + uint32_t en; + uint32_t filt; + uint32_t bcol; +}; + +struct nv40_vertex_program_exec { + uint32_t data[4]; + boolean has_branch_offset; + int const_index; +}; + +struct nv40_vertex_program_data { + int index; /* immediates == -1 */ + float value[4]; +}; + +struct nv40_vertex_program { + struct pipe_shader_state pipe; + + struct draw_vertex_shader *draw; + + boolean translated; + + struct pipe_clip_state ucp; + + struct nv40_vertex_program_exec *insns; + unsigned nr_insns; + struct nv40_vertex_program_data *consts; + unsigned nr_consts; + + struct nouveau_resource *exec; + unsigned exec_start; + struct nouveau_resource *data; + unsigned data_start; + unsigned data_start_min; + + uint32_t ir; + uint32_t or; + uint32_t clip_ctrl; + struct nouveau_stateobj *so; +}; + +struct nv40_fragment_program_data { + unsigned offset; + unsigned index; +}; + +struct nv40_fragment_program { + struct pipe_shader_state pipe; + struct tgsi_shader_info info; + + boolean translated; + unsigned samplers; + + uint32_t *insn; + int insn_len; + + struct nv40_fragment_program_data *consts; + unsigned nr_consts; + + struct pipe_buffer *buffer; + + uint32_t fp_control; + struct nouveau_stateobj *so; +}; + +struct nv40_miptree { + struct pipe_texture base; + + struct pipe_buffer *buffer; + uint total_size; + + struct pipe_texture *shadow_tex; + struct pipe_surface *shadow_surface; + + struct { + uint pitch; + uint *image_offset; + } level[PIPE_MAX_TEXTURE_LEVELS]; +}; + +#endif diff --git a/src/gallium/drivers/nv40/nv40_state_blend.c b/src/gallium/drivers/nv40/nv40_state_blend.c new file mode 100644 index 0000000000..95e6d7394f --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_blend.c @@ -0,0 +1,40 @@ +#include "nv40_context.h" + +static boolean +nv40_state_blend_validate(struct nv40_context *nv40) +{ + so_ref(nv40->blend->so, &nv40->state.hw[NV40_STATE_BLEND]); + return TRUE; +} + +struct nv40_state_entry nv40_state_blend = { + .validate = nv40_state_blend_validate, + .dirty = { + .pipe = NV40_NEW_BLEND, + .hw = NV40_STATE_BLEND + } +}; + +static boolean +nv40_state_blend_colour_validate(struct nv40_context *nv40) +{ + struct nouveau_stateobj *so = so_new(2, 0); + struct pipe_blend_color *bcol = &nv40->blend_colour; + + so_method(so, nv40->screen->curie, NV40TCL_BLEND_COLOR, 1); + so_data (so, ((float_to_ubyte(bcol->color[3]) << 24) | + (float_to_ubyte(bcol->color[0]) << 16) | + (float_to_ubyte(bcol->color[1]) << 8) | + (float_to_ubyte(bcol->color[2]) << 0))); + + so_ref(so, &nv40->state.hw[NV40_STATE_BCOL]); + return TRUE; +} + +struct nv40_state_entry nv40_state_blend_colour = { + .validate = nv40_state_blend_colour_validate, + .dirty = { + .pipe = NV40_NEW_BCOL, + .hw = NV40_STATE_BCOL + } +}; diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c new file mode 100644 index 0000000000..ce859def10 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_emit.c @@ -0,0 +1,184 @@ +#include "nv40_context.h" +#include "nv40_state.h" +#include "draw/draw_context.h" + +static struct nv40_state_entry *render_states[] = { + &nv40_state_framebuffer, + &nv40_state_rasterizer, + &nv40_state_scissor, + &nv40_state_stipple, + &nv40_state_fragprog, + &nv40_state_fragtex, + &nv40_state_vertprog, + &nv40_state_blend, + &nv40_state_blend_colour, + &nv40_state_zsa, + &nv40_state_viewport, + &nv40_state_vbo, + NULL +}; + +static struct nv40_state_entry *swtnl_states[] = { + &nv40_state_framebuffer, + &nv40_state_rasterizer, + &nv40_state_scissor, + &nv40_state_stipple, + &nv40_state_fragprog, + &nv40_state_fragtex, + &nv40_state_vertprog, + &nv40_state_blend, + &nv40_state_blend_colour, + &nv40_state_zsa, + &nv40_state_viewport, + &nv40_state_vtxfmt, + NULL +}; + +static void +nv40_state_do_validate(struct nv40_context *nv40, + struct nv40_state_entry **states) +{ + const struct pipe_framebuffer_state *fb = &nv40->framebuffer; + unsigned i; + + for (i = 0; i < fb->nr_cbufs; i++) + fb->cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED; + if (fb->zsbuf) + fb->zsbuf->status = PIPE_SURFACE_STATUS_DEFINED; + + while (*states) { + struct nv40_state_entry *e = *states; + + if (nv40->dirty & e->dirty.pipe) { + if (e->validate(nv40)) + nv40->state.dirty |= (1ULL << e->dirty.hw); + } + + states++; + } + nv40->dirty = 0; +} + +void +nv40_state_emit(struct nv40_context *nv40) +{ + struct nv40_state *state = &nv40->state; + struct nv40_screen *screen = nv40->screen; + unsigned i, samplers; + uint64_t states; + + if (nv40->pctx_id != screen->cur_pctx) { + for (i = 0; i < NV40_STATE_MAX; i++) { + if (state->hw[i] && screen->state[i] != state->hw[i]) + state->dirty |= (1ULL << i); + } + + screen->cur_pctx = nv40->pctx_id; + } + + for (i = 0, states = state->dirty; states; i++) { + if (!(states & (1ULL << i))) + continue; + so_ref (state->hw[i], &nv40->screen->state[i]); + if (state->hw[i]) + so_emit(nv40->nvws, nv40->screen->state[i]); + states &= ~(1ULL << i); + } + + if (state->dirty & ((1ULL << NV40_STATE_FRAGPROG) | + (1ULL << NV40_STATE_FRAGTEX0))) { + BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1); + OUT_RING (2); + BEGIN_RING(curie, NV40TCL_TEX_CACHE_CTL, 1); + OUT_RING (1); + } + + state->dirty = 0; + + so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_FB]); + for (i = 0, samplers = state->fp_samplers; i < 16 && samplers; i++) { + if (!(samplers & (1 << i))) + continue; + so_emit_reloc_markers(nv40->nvws, + state->hw[NV40_STATE_FRAGTEX0+i]); + samplers &= ~(1ULL << i); + } + so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_FRAGPROG]); + if (state->hw[NV40_STATE_VTXBUF] && nv40->render_mode == HW) + so_emit_reloc_markers(nv40->nvws, state->hw[NV40_STATE_VTXBUF]); +} + +boolean +nv40_state_validate(struct nv40_context *nv40) +{ + boolean was_sw = nv40->fallback_swtnl ? TRUE : FALSE; + + if (nv40->render_mode != HW) { + /* Don't even bother trying to go back to hw if none + * of the states that caused swtnl previously have changed. + */ + if ((nv40->fallback_swtnl & nv40->dirty) + != nv40->fallback_swtnl) + return FALSE; + + /* Attempt to go to hwtnl again */ + nv40->pipe.flush(&nv40->pipe, 0, NULL); + nv40->dirty |= (NV40_NEW_VIEWPORT | + NV40_NEW_VERTPROG | + NV40_NEW_ARRAYS); + nv40->render_mode = HW; + } + + nv40_state_do_validate(nv40, render_states); + if (nv40->fallback_swtnl || nv40->fallback_swrast) + return FALSE; + + if (was_sw) + NOUVEAU_ERR("swtnl->hw\n"); + + return TRUE; +} + +boolean +nv40_state_validate_swtnl(struct nv40_context *nv40) +{ + struct draw_context *draw = nv40->draw; + + /* Setup for swtnl */ + if (nv40->render_mode == HW) { + NOUVEAU_ERR("hw->swtnl 0x%08x\n", nv40->fallback_swtnl); + nv40->pipe.flush(&nv40->pipe, 0, NULL); + nv40->dirty |= (NV40_NEW_VIEWPORT | + NV40_NEW_VERTPROG | + NV40_NEW_ARRAYS); + nv40->render_mode = SWTNL; + } + + if (nv40->draw_dirty & NV40_NEW_VERTPROG) + draw_bind_vertex_shader(draw, nv40->vertprog->draw); + + if (nv40->draw_dirty & NV40_NEW_RAST) + draw_set_rasterizer_state(draw, &nv40->rasterizer->pipe); + + if (nv40->draw_dirty & NV40_NEW_UCP) + draw_set_clip_state(draw, &nv40->clip); + + if (nv40->draw_dirty & NV40_NEW_VIEWPORT) + draw_set_viewport_state(draw, &nv40->viewport); + + if (nv40->draw_dirty & NV40_NEW_ARRAYS) { + draw_set_edgeflags(draw, nv40->edgeflags); + draw_set_vertex_buffers(draw, nv40->vtxbuf_nr, nv40->vtxbuf); + draw_set_vertex_elements(draw, nv40->vtxelt_nr, nv40->vtxelt); + } + + nv40_state_do_validate(nv40, swtnl_states); + if (nv40->fallback_swrast) { + NOUVEAU_ERR("swtnl->swrast 0x%08x\n", nv40->fallback_swrast); + return FALSE; + } + + nv40->draw_dirty = 0; + return TRUE; +} + diff --git a/src/gallium/drivers/nv40/nv40_state_fb.c b/src/gallium/drivers/nv40/nv40_state_fb.c new file mode 100644 index 0000000000..454abad31f --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_fb.c @@ -0,0 +1,162 @@ +#include "nv40_context.h" +#include "nouveau/nouveau_util.h" + +static struct pipe_buffer * +nv40_surface_buffer(struct pipe_surface *surface) +{ + struct nv40_miptree *mt = (struct nv40_miptree *)surface->texture; + return mt->buffer; +} + +static boolean +nv40_state_framebuffer_validate(struct nv40_context *nv40) +{ + struct pipe_framebuffer_state *fb = &nv40->framebuffer; + struct pipe_surface *rt[4], *zeta; + uint32_t rt_enable, rt_format; + int i, colour_format = 0, zeta_format = 0; + struct nouveau_stateobj *so = so_new(64, 10); + unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; + unsigned w = fb->width; + unsigned h = fb->height; + + rt_enable = 0; + for (i = 0; i < fb->nr_cbufs; i++) { + if (colour_format) { + assert(colour_format == fb->cbufs[i]->format); + } else { + colour_format = fb->cbufs[i]->format; + rt_enable |= (NV40TCL_RT_ENABLE_COLOR0 << i); + rt[i] = fb->cbufs[i]; + } + } + + if (rt_enable & (NV40TCL_RT_ENABLE_COLOR1 | NV40TCL_RT_ENABLE_COLOR2 | + NV40TCL_RT_ENABLE_COLOR3)) + rt_enable |= NV40TCL_RT_ENABLE_MRT; + + if (fb->zsbuf) { + zeta_format = fb->zsbuf->format; + zeta = fb->zsbuf; + } + + if (!(rt[0]->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { + assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); + for (i = 1; i < fb->nr_cbufs; i++) + assert(!(rt[i]->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)); + + rt_format = NV40TCL_RT_FORMAT_TYPE_SWIZZLED | + log2i(fb->width) << NV40TCL_RT_FORMAT_LOG2_WIDTH_SHIFT | + log2i(fb->height) << NV40TCL_RT_FORMAT_LOG2_HEIGHT_SHIFT; + } + else + rt_format = NV40TCL_RT_FORMAT_TYPE_LINEAR; + + switch (colour_format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case 0: + rt_format |= NV40TCL_RT_FORMAT_COLOR_A8R8G8B8; + break; + case PIPE_FORMAT_R5G6B5_UNORM: + rt_format |= NV40TCL_RT_FORMAT_COLOR_R5G6B5; + break; + default: + assert(0); + } + + switch (zeta_format) { + case PIPE_FORMAT_Z16_UNORM: + rt_format |= NV40TCL_RT_FORMAT_ZETA_Z16; + break; + case PIPE_FORMAT_Z24S8_UNORM: + case 0: + rt_format |= NV40TCL_RT_FORMAT_ZETA_Z24S8; + break; + default: + assert(0); + } + + if (rt_enable & NV40TCL_RT_ENABLE_COLOR0) { + so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR0, 1); + so_reloc (so, nv40_surface_buffer(rt[0]), 0, rt_flags | NOUVEAU_BO_OR, + nv40->nvws->channel->vram->handle, + nv40->nvws->channel->gart->handle); + so_method(so, nv40->screen->curie, NV40TCL_COLOR0_PITCH, 2); + so_data (so, rt[0]->stride); + so_reloc (so, nv40_surface_buffer(rt[0]), rt[0]->offset, rt_flags | + NOUVEAU_BO_LOW, 0, 0); + } + + if (rt_enable & NV40TCL_RT_ENABLE_COLOR1) { + so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR1, 1); + so_reloc (so, nv40_surface_buffer(rt[1]), 0, rt_flags | NOUVEAU_BO_OR, + nv40->nvws->channel->vram->handle, + nv40->nvws->channel->gart->handle); + so_method(so, nv40->screen->curie, NV40TCL_COLOR1_OFFSET, 2); + so_reloc (so, nv40_surface_buffer(rt[1]), rt[1]->offset, rt_flags | + NOUVEAU_BO_LOW, 0, 0); + so_data (so, rt[1]->stride); + } + + if (rt_enable & NV40TCL_RT_ENABLE_COLOR2) { + so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR2, 1); + so_reloc (so, nv40_surface_buffer(rt[2]), 0, rt_flags | NOUVEAU_BO_OR, + nv40->nvws->channel->vram->handle, + nv40->nvws->channel->gart->handle); + so_method(so, nv40->screen->curie, NV40TCL_COLOR2_OFFSET, 1); + so_reloc (so, nv40_surface_buffer(rt[2]), rt[2]->offset, rt_flags | + NOUVEAU_BO_LOW, 0, 0); + so_method(so, nv40->screen->curie, NV40TCL_COLOR2_PITCH, 1); + so_data (so, rt[2]->stride); + } + + if (rt_enable & NV40TCL_RT_ENABLE_COLOR3) { + so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR3, 1); + so_reloc (so, nv40_surface_buffer(rt[3]), 0, rt_flags | NOUVEAU_BO_OR, + nv40->nvws->channel->vram->handle, + nv40->nvws->channel->gart->handle); + so_method(so, nv40->screen->curie, NV40TCL_COLOR3_OFFSET, 1); + so_reloc (so, nv40_surface_buffer(rt[3]), rt[3]->offset, rt_flags | + NOUVEAU_BO_LOW, 0, 0); + so_method(so, nv40->screen->curie, NV40TCL_COLOR3_PITCH, 1); + so_data (so, rt[3]->stride); + } + + if (zeta_format) { + so_method(so, nv40->screen->curie, NV40TCL_DMA_ZETA, 1); + so_reloc (so, nv40_surface_buffer(zeta), 0, rt_flags | NOUVEAU_BO_OR, + nv40->nvws->channel->vram->handle, + nv40->nvws->channel->gart->handle); + so_method(so, nv40->screen->curie, NV40TCL_ZETA_OFFSET, 1); + so_reloc (so, nv40_surface_buffer(zeta), zeta->offset, rt_flags | + NOUVEAU_BO_LOW, 0, 0); + so_method(so, nv40->screen->curie, NV40TCL_ZETA_PITCH, 1); + so_data (so, zeta->stride); + } + + so_method(so, nv40->screen->curie, NV40TCL_RT_ENABLE, 1); + so_data (so, rt_enable); + so_method(so, nv40->screen->curie, NV40TCL_RT_HORIZ, 3); + so_data (so, (w << 16) | 0); + so_data (so, (h << 16) | 0); + so_data (so, rt_format); + so_method(so, nv40->screen->curie, NV40TCL_VIEWPORT_HORIZ, 2); + so_data (so, (w << 16) | 0); + so_data (so, (h << 16) | 0); + so_method(so, nv40->screen->curie, NV40TCL_VIEWPORT_CLIP_HORIZ(0), 2); + so_data (so, ((w - 1) << 16) | 0); + so_data (so, ((h - 1) << 16) | 0); + so_method(so, nv40->screen->curie, 0x1d88, 1); + so_data (so, (1 << 12) | h); + + so_ref(so, &nv40->state.hw[NV40_STATE_FB]); + return TRUE; +} + +struct nv40_state_entry nv40_state_framebuffer = { + .validate = nv40_state_framebuffer_validate, + .dirty = { + .pipe = NV40_NEW_FB, + .hw = NV40_STATE_FB + } +}; diff --git a/src/gallium/drivers/nv40/nv40_state_rasterizer.c b/src/gallium/drivers/nv40/nv40_state_rasterizer.c new file mode 100644 index 0000000000..9ecda5990f --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_rasterizer.c @@ -0,0 +1,17 @@ +#include "nv40_context.h" + +static boolean +nv40_state_rasterizer_validate(struct nv40_context *nv40) +{ + so_ref(nv40->rasterizer->so, + &nv40->state.hw[NV40_STATE_RAST]); + return TRUE; +} + +struct nv40_state_entry nv40_state_rasterizer = { + .validate = nv40_state_rasterizer_validate, + .dirty = { + .pipe = NV40_NEW_RAST, + .hw = NV40_STATE_RAST + } +}; diff --git a/src/gallium/drivers/nv40/nv40_state_scissor.c b/src/gallium/drivers/nv40/nv40_state_scissor.c new file mode 100644 index 0000000000..285239ef41 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_scissor.c @@ -0,0 +1,35 @@ +#include "nv40_context.h" + +static boolean +nv40_state_scissor_validate(struct nv40_context *nv40) +{ + struct pipe_rasterizer_state *rast = &nv40->rasterizer->pipe; + struct pipe_scissor_state *s = &nv40->scissor; + struct nouveau_stateobj *so; + + if (nv40->state.hw[NV40_STATE_SCISSOR] && + (rast->scissor == 0 && nv40->state.scissor_enabled == 0)) + return FALSE; + nv40->state.scissor_enabled = rast->scissor; + + so = so_new(3, 0); + so_method(so, nv40->screen->curie, NV40TCL_SCISSOR_HORIZ, 2); + if (nv40->state.scissor_enabled) { + so_data (so, ((s->maxx - s->minx) << 16) | s->minx); + so_data (so, ((s->maxy - s->miny) << 16) | s->miny); + } else { + so_data (so, 4096 << 16); + so_data (so, 4096 << 16); + } + + so_ref(so, &nv40->state.hw[NV40_STATE_SCISSOR]); + return TRUE; +} + +struct nv40_state_entry nv40_state_scissor = { + .validate = nv40_state_scissor_validate, + .dirty = { + .pipe = NV40_NEW_SCISSOR | NV40_NEW_RAST, + .hw = NV40_STATE_SCISSOR + } +}; diff --git a/src/gallium/drivers/nv40/nv40_state_stipple.c b/src/gallium/drivers/nv40/nv40_state_stipple.c new file mode 100644 index 0000000000..b51024ad9b --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_stipple.c @@ -0,0 +1,39 @@ +#include "nv40_context.h" + +static boolean +nv40_state_stipple_validate(struct nv40_context *nv40) +{ + struct pipe_rasterizer_state *rast = &nv40->rasterizer->pipe; + struct nouveau_grobj *curie = nv40->screen->curie; + struct nouveau_stateobj *so; + + if (nv40->state.hw[NV40_STATE_STIPPLE] && + (rast->poly_stipple_enable == 0 && nv40->state.stipple_enabled == 0)) + return FALSE; + + if (rast->poly_stipple_enable) { + unsigned i; + + so = so_new(35, 0); + so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); + so_data (so, 1); + so_method(so, curie, NV40TCL_POLYGON_STIPPLE_PATTERN(0), 32); + for (i = 0; i < 32; i++) + so_data(so, nv40->stipple[i]); + } else { + so = so_new(2, 0); + so_method(so, curie, NV40TCL_POLYGON_STIPPLE_ENABLE, 1); + so_data (so, 0); + } + + so_ref(so, &nv40->state.hw[NV40_STATE_STIPPLE]); + return TRUE; +} + +struct nv40_state_entry nv40_state_stipple = { + .validate = nv40_state_stipple_validate, + .dirty = { + .pipe = NV40_NEW_STIPPLE | NV40_NEW_RAST, + .hw = NV40_STATE_STIPPLE, + } +}; diff --git a/src/gallium/drivers/nv40/nv40_state_viewport.c b/src/gallium/drivers/nv40/nv40_state_viewport.c new file mode 100644 index 0000000000..869a55b405 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_viewport.c @@ -0,0 +1,67 @@ +#include "nv40_context.h" + +static boolean +nv40_state_viewport_validate(struct nv40_context *nv40) +{ + struct pipe_viewport_state *vpt = &nv40->viewport; + struct nouveau_stateobj *so; + unsigned bypass; + + if (nv40->render_mode == HW && !nv40->rasterizer->pipe.bypass_clipping) + bypass = 0; + else + bypass = 1; + + if (nv40->state.hw[NV40_STATE_VIEWPORT] && + (bypass || !(nv40->dirty & NV40_NEW_VIEWPORT)) && + nv40->state.viewport_bypass == bypass) + return FALSE; + nv40->state.viewport_bypass = bypass; + + so = so_new(11, 0); + if (!bypass) { + so_method(so, nv40->screen->curie, + NV40TCL_VIEWPORT_TRANSLATE_X, 8); + so_data (so, fui(vpt->translate[0])); + so_data (so, fui(vpt->translate[1])); + so_data (so, fui(vpt->translate[2])); + so_data (so, fui(vpt->translate[3])); + so_data (so, fui(vpt->scale[0])); + so_data (so, fui(vpt->scale[1])); + so_data (so, fui(vpt->scale[2])); + so_data (so, fui(vpt->scale[3])); + so_method(so, nv40->screen->curie, 0x1d78, 1); + so_data (so, 1); + } else { + so_method(so, nv40->screen->curie, + NV40TCL_VIEWPORT_TRANSLATE_X, 8); + so_data (so, fui(0.0)); + so_data (so, fui(0.0)); + so_data (so, fui(0.0)); + so_data (so, fui(0.0)); + so_data (so, fui(1.0)); + so_data (so, fui(1.0)); + so_data (so, fui(1.0)); + so_data (so, fui(0.0)); + /* Not entirely certain what this is yet. The DDX uses this + * value also as it fixes rendering when you pass + * pre-transformed vertices to the GPU. My best gusss is that + * this bypasses some culling/clipping stage. Might be worth + * noting that points/lines are uneffected by whatever this + * value fixes, only filled polygons are effected. + */ + so_method(so, nv40->screen->curie, 0x1d78, 1); + so_data (so, 0x110); + } + + so_ref(so, &nv40->state.hw[NV40_STATE_VIEWPORT]); + return TRUE; +} + +struct nv40_state_entry nv40_state_viewport = { + .validate = nv40_state_viewport_validate, + .dirty = { + .pipe = NV40_NEW_VIEWPORT | NV40_NEW_RAST, + .hw = NV40_STATE_VIEWPORT + } +}; diff --git a/src/gallium/drivers/nv40/nv40_state_zsa.c b/src/gallium/drivers/nv40/nv40_state_zsa.c new file mode 100644 index 0000000000..fb760677c8 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_state_zsa.c @@ -0,0 +1,17 @@ +#include "nv40_context.h" + +static boolean +nv40_state_zsa_validate(struct nv40_context *nv40) +{ + so_ref(nv40->zsa->so, + &nv40->state.hw[NV40_STATE_ZSA]); + return TRUE; +} + +struct nv40_state_entry nv40_state_zsa = { + .validate = nv40_state_zsa_validate, + .dirty = { + .pipe = NV40_NEW_ZSA, + .hw = NV40_STATE_ZSA + } +}; diff --git a/src/gallium/drivers/nv40/nv40_surface.c b/src/gallium/drivers/nv40/nv40_surface.c new file mode 100644 index 0000000000..c4a5fb20d9 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_surface.c @@ -0,0 +1,72 @@ + +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "nv40_context.h" +#include "pipe/p_defines.h" +#include "pipe/internal/p_winsys_screen.h" +#include "pipe/p_inlines.h" +#include "util/u_tile.h" + +static void +nv40_surface_copy(struct pipe_context *pipe, boolean do_flip, + struct pipe_surface *dest, unsigned destx, unsigned desty, + struct pipe_surface *src, unsigned srcx, unsigned srcy, + unsigned width, unsigned height) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nv04_surface_2d *eng2d = nv40->screen->eng2d; + + if (do_flip) { + desty += height; + while (height--) { + eng2d->copy(eng2d, dest, destx, desty--, src, + srcx, srcy++, width, 1); + } + return; + } + + eng2d->copy(eng2d, dest, destx, desty, src, srcx, srcy, width, height); +} + +static void +nv40_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, + unsigned destx, unsigned desty, unsigned width, + unsigned height, unsigned value) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nv04_surface_2d *eng2d = nv40->screen->eng2d; + + eng2d->fill(eng2d, dest, destx, desty, width, height, value); +} + +void +nv40_init_surface_functions(struct nv40_context *nv40) +{ + nv40->pipe.surface_copy = nv40_surface_copy; + nv40->pipe.surface_fill = nv40_surface_fill; +} diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c new file mode 100644 index 0000000000..8f1834628f --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_vbo.c @@ -0,0 +1,555 @@ +#include "pipe/p_context.h" +#include "pipe/p_state.h" + +#include "nv40_context.h" +#include "nv40_state.h" + +#include "nouveau/nouveau_channel.h" +#include "nouveau/nouveau_pushbuf.h" +#include "nouveau/nouveau_util.h" + +#define FORCE_SWTNL 0 + +static INLINE int +nv40_vbo_format_to_hw(enum pipe_format pipe, unsigned *fmt, unsigned *ncomp) +{ + switch (pipe) { + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_R32G32_FLOAT: + case PIPE_FORMAT_R32G32B32_FLOAT: + case PIPE_FORMAT_R32G32B32A32_FLOAT: + *fmt = NV40TCL_VTXFMT_TYPE_FLOAT; + break; + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R8G8_UNORM: + case PIPE_FORMAT_R8G8B8_UNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: + *fmt = NV40TCL_VTXFMT_TYPE_UBYTE; + break; + case PIPE_FORMAT_R16_SSCALED: + case PIPE_FORMAT_R16G16_SSCALED: + case PIPE_FORMAT_R16G16B16_SSCALED: + case PIPE_FORMAT_R16G16B16A16_SSCALED: + *fmt = NV40TCL_VTXFMT_TYPE_USHORT; + break; + default: + NOUVEAU_ERR("Unknown format %s\n", pf_name(pipe)); + return 1; + } + + switch (pipe) { + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_R16_SSCALED: + *ncomp = 1; + break; + case PIPE_FORMAT_R8G8_UNORM: + case PIPE_FORMAT_R32G32_FLOAT: + case PIPE_FORMAT_R16G16_SSCALED: + *ncomp = 2; + break; + case PIPE_FORMAT_R8G8B8_UNORM: + case PIPE_FORMAT_R32G32B32_FLOAT: + case PIPE_FORMAT_R16G16B16_SSCALED: + *ncomp = 3; + break; + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R16G16B16A16_SSCALED: + *ncomp = 4; + break; + default: + NOUVEAU_ERR("Unknown format %s\n", pf_name(pipe)); + return 1; + } + + return 0; +} + +static boolean +nv40_vbo_set_idxbuf(struct nv40_context *nv40, struct pipe_buffer *ib, + unsigned ib_size) +{ + struct pipe_screen *pscreen = &nv40->screen->pipe; + unsigned type; + + if (!ib) { + nv40->idxbuf = NULL; + nv40->idxbuf_format = 0xdeadbeef; + return FALSE; + } + + if (!pscreen->get_param(pscreen, NOUVEAU_CAP_HW_IDXBUF) || ib_size == 1) + return FALSE; + + switch (ib_size) { + case 2: + type = NV40TCL_IDXBUF_FORMAT_TYPE_U16; + break; + case 4: + type = NV40TCL_IDXBUF_FORMAT_TYPE_U32; + break; + default: + return FALSE; + } + + if (ib != nv40->idxbuf || + type != nv40->idxbuf_format) { + nv40->dirty |= NV40_NEW_ARRAYS; + nv40->idxbuf = ib; + nv40->idxbuf_format = type; + } + + return TRUE; +} + +static boolean +nv40_vbo_static_attrib(struct nv40_context *nv40, struct nouveau_stateobj *so, + int attrib, struct pipe_vertex_element *ve, + struct pipe_vertex_buffer *vb) +{ + struct pipe_winsys *ws = nv40->pipe.winsys; + struct nouveau_grobj *curie = nv40->screen->curie; + unsigned type, ncomp; + void *map; + + if (nv40_vbo_format_to_hw(ve->src_format, &type, &ncomp)) + return FALSE; + + map = ws->buffer_map(ws, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ); + map += vb->buffer_offset + ve->src_offset; + + switch (type) { + case NV40TCL_VTXFMT_TYPE_FLOAT: + { + float *v = map; + + switch (ncomp) { + case 4: + so_method(so, curie, NV40TCL_VTX_ATTR_4F_X(attrib), 4); + so_data (so, fui(v[0])); + so_data (so, fui(v[1])); + so_data (so, fui(v[2])); + so_data (so, fui(v[3])); + break; + case 3: + so_method(so, curie, NV40TCL_VTX_ATTR_3F_X(attrib), 3); + so_data (so, fui(v[0])); + so_data (so, fui(v[1])); + so_data (so, fui(v[2])); + break; + case 2: + so_method(so, curie, NV40TCL_VTX_ATTR_2F_X(attrib), 2); + so_data (so, fui(v[0])); + so_data (so, fui(v[1])); + break; + case 1: + so_method(so, curie, NV40TCL_VTX_ATTR_1F(attrib), 1); + so_data (so, fui(v[0])); + break; + default: + ws->buffer_unmap(ws, vb->buffer); + return FALSE; + } + } + break; + default: + ws->buffer_unmap(ws, vb->buffer); + return FALSE; + } + + ws->buffer_unmap(ws, vb->buffer); + + return TRUE; +} + +boolean +nv40_draw_arrays(struct pipe_context *pipe, + unsigned mode, unsigned start, unsigned count) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nouveau_channel *chan = nv40->nvws->channel; + unsigned restart; + + nv40_vbo_set_idxbuf(nv40, NULL, 0); + if (FORCE_SWTNL || !nv40_state_validate(nv40)) { + return nv40_draw_elements_swtnl(pipe, NULL, 0, + mode, start, count); + } + + while (count) { + unsigned vc, nr; + + nv40_state_emit(nv40); + + vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256, + mode, start, count, &restart); + if (!vc) { + FIRE_RING(NULL); + continue; + } + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); + + nr = (vc & 0xff); + if (nr) { + BEGIN_RING(curie, NV40TCL_VB_VERTEX_BATCH, 1); + OUT_RING (((nr - 1) << 24) | start); + start += nr; + } + + nr = vc >> 8; + while (nr) { + unsigned push = nr > 2047 ? 2047 : nr; + + nr -= push; + + BEGIN_RING_NI(curie, NV40TCL_VB_VERTEX_BATCH, push); + while (push--) { + OUT_RING(((0x100 - 1) << 24) | start); + start += 0x100; + } + } + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (0); + + count -= vc; + start = restart; + } + + pipe->flush(pipe, 0, NULL); + return TRUE; +} + +static INLINE void +nv40_draw_elements_u08(struct nv40_context *nv40, void *ib, + unsigned mode, unsigned start, unsigned count) +{ + struct nouveau_channel *chan = nv40->nvws->channel; + + while (count) { + uint8_t *elts = (uint8_t *)ib + start; + unsigned vc, push, restart; + + nv40_state_emit(nv40); + + vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2, + mode, start, count, &restart); + if (vc == 0) { + FIRE_RING(NULL); + continue; + } + count -= vc; + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); + + if (vc & 1) { + BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1); + OUT_RING (elts[0]); + elts++; vc--; + } + + while (vc) { + unsigned i; + + push = MIN2(vc, 2047 * 2); + + BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1); + for (i = 0; i < push; i+=2) + OUT_RING((elts[i+1] << 16) | elts[i]); + + vc -= push; + elts += push; + } + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (0); + + start = restart; + } +} + +static INLINE void +nv40_draw_elements_u16(struct nv40_context *nv40, void *ib, + unsigned mode, unsigned start, unsigned count) +{ + struct nouveau_channel *chan = nv40->nvws->channel; + + while (count) { + uint16_t *elts = (uint16_t *)ib + start; + unsigned vc, push, restart; + + nv40_state_emit(nv40); + + vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 2, + mode, start, count, &restart); + if (vc == 0) { + FIRE_RING(NULL); + continue; + } + count -= vc; + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); + + if (vc & 1) { + BEGIN_RING(curie, NV40TCL_VB_ELEMENT_U32, 1); + OUT_RING (elts[0]); + elts++; vc--; + } + + while (vc) { + unsigned i; + + push = MIN2(vc, 2047 * 2); + + BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U16, push >> 1); + for (i = 0; i < push; i+=2) + OUT_RING((elts[i+1] << 16) | elts[i]); + + vc -= push; + elts += push; + } + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (0); + + start = restart; + } +} + +static INLINE void +nv40_draw_elements_u32(struct nv40_context *nv40, void *ib, + unsigned mode, unsigned start, unsigned count) +{ + struct nouveau_channel *chan = nv40->nvws->channel; + + while (count) { + uint32_t *elts = (uint32_t *)ib + start; + unsigned vc, push, restart; + + nv40_state_emit(nv40); + + vc = nouveau_vbuf_split(chan->pushbuf->remaining, 5, 1, + mode, start, count, &restart); + if (vc == 0) { + FIRE_RING(NULL); + continue; + } + count -= vc; + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); + + while (vc) { + push = MIN2(vc, 2047); + + BEGIN_RING_NI(curie, NV40TCL_VB_ELEMENT_U32, push); + OUT_RINGp (elts, push); + + vc -= push; + elts += push; + } + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (0); + + start = restart; + } +} + +static boolean +nv40_draw_elements_inline(struct pipe_context *pipe, + struct pipe_buffer *ib, unsigned ib_size, + unsigned mode, unsigned start, unsigned count) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct pipe_winsys *ws = pipe->winsys; + void *map; + + map = ws->buffer_map(ws, ib, PIPE_BUFFER_USAGE_CPU_READ); + if (!ib) { + NOUVEAU_ERR("failed mapping ib\n"); + return FALSE; + } + + switch (ib_size) { + case 1: + nv40_draw_elements_u08(nv40, map, mode, start, count); + break; + case 2: + nv40_draw_elements_u16(nv40, map, mode, start, count); + break; + case 4: + nv40_draw_elements_u32(nv40, map, mode, start, count); + break; + default: + NOUVEAU_ERR("invalid idxbuf fmt %d\n", ib_size); + break; + } + + ws->buffer_unmap(ws, ib); + return TRUE; +} + +static boolean +nv40_draw_elements_vbo(struct pipe_context *pipe, + unsigned mode, unsigned start, unsigned count) +{ + struct nv40_context *nv40 = nv40_context(pipe); + struct nouveau_channel *chan = nv40->nvws->channel; + unsigned restart; + + while (count) { + unsigned nr, vc; + + nv40_state_emit(nv40); + + vc = nouveau_vbuf_split(chan->pushbuf->remaining, 6, 256, + mode, start, count, &restart); + if (!vc) { + FIRE_RING(NULL); + continue; + } + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (nvgl_primitive(mode)); + + nr = (vc & 0xff); + if (nr) { + BEGIN_RING(curie, NV40TCL_VB_INDEX_BATCH, 1); + OUT_RING (((nr - 1) << 24) | start); + start += nr; + } + + nr = vc >> 8; + while (nr) { + unsigned push = nr > 2047 ? 2047 : nr; + + nr -= push; + + BEGIN_RING_NI(curie, NV40TCL_VB_INDEX_BATCH, push); + while (push--) { + OUT_RING(((0x100 - 1) << 24) | start); + start += 0x100; + } + } + + BEGIN_RING(curie, NV40TCL_BEGIN_END, 1); + OUT_RING (0); + + count -= vc; + start = restart; + } + + return TRUE; +} + +boolean +nv40_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, unsigned indexSize, + unsigned mode, unsigned start, unsigned count) +{ + struct nv40_context *nv40 = nv40_context(pipe); + boolean idxbuf; + + idxbuf = nv40_vbo_set_idxbuf(nv40, indexBuffer, indexSize); + if (FORCE_SWTNL || !nv40_state_validate(nv40)) { + return nv40_draw_elements_swtnl(pipe, NULL, 0, + mode, start, count); + } + + if (idxbuf) { + nv40_draw_elements_vbo(pipe, mode, start, count); + } else { + nv40_draw_elements_inline(pipe, indexBuffer, indexSize, + mode, start, count); + } + + pipe->flush(pipe, 0, NULL); + return TRUE; +} + +static boolean +nv40_vbo_validate(struct nv40_context *nv40) +{ + struct nouveau_stateobj *vtxbuf, *vtxfmt, *sattr = NULL; + struct nouveau_grobj *curie = nv40->screen->curie; + struct pipe_buffer *ib = nv40->idxbuf; + unsigned ib_format = nv40->idxbuf_format; + unsigned vb_flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD; + int hw; + + if (nv40->edgeflags) { + nv40->fallback_swtnl |= NV40_NEW_ARRAYS; + return FALSE; + } + + vtxbuf = so_new(20, 18); + so_method(vtxbuf, curie, NV40TCL_VTXBUF_ADDRESS(0), nv40->vtxelt_nr); + vtxfmt = so_new(17, 0); + so_method(vtxfmt, curie, NV40TCL_VTXFMT(0), nv40->vtxelt_nr); + + for (hw = 0; hw < nv40->vtxelt_nr; hw++) { + struct pipe_vertex_element *ve; + struct pipe_vertex_buffer *vb; + unsigned type, ncomp; + + ve = &nv40->vtxelt[hw]; + vb = &nv40->vtxbuf[ve->vertex_buffer_index]; + + if (!vb->stride) { + if (!sattr) + sattr = so_new(16 * 5, 0); + + if (nv40_vbo_static_attrib(nv40, sattr, hw, ve, vb)) { + so_data(vtxbuf, 0); + so_data(vtxfmt, NV40TCL_VTXFMT_TYPE_FLOAT); + continue; + } + } + + if (nv40_vbo_format_to_hw(ve->src_format, &type, &ncomp)) { + nv40->fallback_swtnl |= NV40_NEW_ARRAYS; + so_ref(NULL, &vtxbuf); + so_ref(NULL, &vtxfmt); + return FALSE; + } + + so_reloc(vtxbuf, vb->buffer, vb->buffer_offset + ve->src_offset, + vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, + 0, NV40TCL_VTXBUF_ADDRESS_DMA1); + so_data (vtxfmt, ((vb->stride << NV40TCL_VTXFMT_STRIDE_SHIFT) | + (ncomp << NV40TCL_VTXFMT_SIZE_SHIFT) | type)); + } + + if (ib) { + so_method(vtxbuf, curie, NV40TCL_IDXBUF_ADDRESS, 2); + so_reloc (vtxbuf, ib, 0, vb_flags | NOUVEAU_BO_LOW, 0, 0); + so_reloc (vtxbuf, ib, ib_format, vb_flags | NOUVEAU_BO_OR, + 0, NV40TCL_IDXBUF_FORMAT_DMA1); + } + + so_method(vtxbuf, curie, 0x1710, 1); + so_data (vtxbuf, 0); + + so_ref(vtxbuf, &nv40->state.hw[NV40_STATE_VTXBUF]); + nv40->state.dirty |= (1ULL << NV40_STATE_VTXBUF); + so_ref(vtxfmt, &nv40->state.hw[NV40_STATE_VTXFMT]); + nv40->state.dirty |= (1ULL << NV40_STATE_VTXFMT); + so_ref(sattr, &nv40->state.hw[NV40_STATE_VTXATTR]); + nv40->state.dirty |= (1ULL << NV40_STATE_VTXATTR); + return FALSE; +} + +struct nv40_state_entry nv40_state_vbo = { + .validate = nv40_vbo_validate, + .dirty = { + .pipe = NV40_NEW_ARRAYS, + .hw = 0, + } +}; + diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c new file mode 100644 index 0000000000..0862386638 --- /dev/null +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -0,0 +1,1070 @@ +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" + +#include "nv40_context.h" +#include "nv40_state.h" + +/* TODO (at least...): + * 1. Indexed consts + ARL + * 3. NV_vp11, NV_vp2, NV_vp3 features + * - extra arith opcodes + * - branching + * - texture sampling + * - indexed attribs + * - indexed results + * 4. bugs + */ + +#define SWZ_X 0 +#define SWZ_Y 1 +#define SWZ_Z 2 +#define SWZ_W 3 +#define MASK_X 8 +#define MASK_Y 4 +#define MASK_Z 2 +#define MASK_W 1 +#define MASK_ALL (MASK_X|MASK_Y|MASK_Z|MASK_W) +#define DEF_SCALE 0 +#define DEF_CTEST 0 +#include "nv40_shader.h" + +#define swz(s,x,y,z,w) nv40_sr_swz((s), SWZ_##x, SWZ_##y, SWZ_##z, SWZ_##w) +#define neg(s) nv40_sr_neg((s)) +#define abs(s) nv40_sr_abs((s)) + +#define NV40_VP_INST_DEST_CLIP(n) ((~0 - 6) + (n)) + +struct nv40_vpc { + struct nv40_vertex_program *vp; + + struct nv40_vertex_program_exec *vpi; + + unsigned r_temps; + unsigned r_temps_discard; + struct nv40_sreg r_result[PIPE_MAX_SHADER_OUTPUTS]; + struct nv40_sreg *r_address; + struct nv40_sreg *r_temp; + + struct nv40_sreg *imm; + unsigned nr_imm; + + unsigned hpos_idx; +}; + +static struct nv40_sreg +temp(struct nv40_vpc *vpc) +{ + int idx = ffs(~vpc->r_temps) - 1; + + if (idx < 0) { + NOUVEAU_ERR("out of temps!!\n"); + assert(0); + return nv40_sr(NV40SR_TEMP, 0); + } + + vpc->r_temps |= (1 << idx); + vpc->r_temps_discard |= (1 << idx); + return nv40_sr(NV40SR_TEMP, idx); +} + +static INLINE void +release_temps(struct nv40_vpc *vpc) +{ + vpc->r_temps &= ~vpc->r_temps_discard; + vpc->r_temps_discard = 0; +} + +static struct nv40_sreg +constant(struct nv40_vpc *vpc, int pipe, float x, float y, float z, float w) +{ + struct nv40_vertex_program *vp = vpc->vp; + struct nv40_vertex_program_data *vpd; + int idx; + + if (pipe >= 0) { + for (idx = 0; idx < vp->nr_consts; idx++) { + if (vp->consts[idx].index == pipe) + return nv40_sr(NV40SR_CONST, idx); + } + } + + idx = vp->nr_consts++; + vp->consts = realloc(vp->consts, sizeof(*vpd) * vp->nr_consts); + vpd = &vp->consts[idx]; + + vpd->index = pipe; + vpd->value[0] = x; + vpd->value[1] = y; + vpd->value[2] = z; + vpd->value[3] = w; + return nv40_sr(NV40SR_CONST, idx); +} + +#define arith(cc,s,o,d,m,s0,s1,s2) \ + nv40_vp_arith((cc), (s), NV40_VP_INST_##o, (d), (m), (s0), (s1), (s2)) + +static void +emit_src(struct nv40_vpc *vpc, uint32_t *hw, int pos, struct nv40_sreg src) +{ + struct nv40_vertex_program *vp = vpc->vp; + uint32_t sr = 0; + + switch (src.type) { + case NV40SR_TEMP: + sr |= (NV40_VP_SRC_REG_TYPE_TEMP << NV40_VP_SRC_REG_TYPE_SHIFT); + sr |= (src.index << NV40_VP_SRC_TEMP_SRC_SHIFT); + break; + case NV40SR_INPUT: + sr |= (NV40_VP_SRC_REG_TYPE_INPUT << + NV40_VP_SRC_REG_TYPE_SHIFT); + vp->ir |= (1 << src.index); + hw[1] |= (src.index << NV40_VP_INST_INPUT_SRC_SHIFT); + break; + case NV40SR_CONST: + sr |= (NV40_VP_SRC_REG_TYPE_CONST << + NV40_VP_SRC_REG_TYPE_SHIFT); + assert(vpc->vpi->const_index == -1 || + vpc->vpi->const_index == src.index); + vpc->vpi->const_index = src.index; + break; + case NV40SR_NONE: + sr |= (NV40_VP_SRC_REG_TYPE_INPUT << + NV40_VP_SRC_REG_TYPE_SHIFT); + break; + default: + assert(0); + } + + if (src.negate) + sr |= NV40_VP_SRC_NEGATE; + + if (src.abs) + hw[0] |= (1 << (21 + pos)); + + sr |= ((src.swz[0] << NV40_VP_SRC_SWZ_X_SHIFT) | + (src.swz[1] << NV40_VP_SRC_SWZ_Y_SHIFT) | + (src.swz[2] << NV40_VP_SRC_SWZ_Z_SHIFT) | + (src.swz[3] << NV40_VP_SRC_SWZ_W_SHIFT)); + + switch (pos) { + case 0: + hw[1] |= ((sr & NV40_VP_SRC0_HIGH_MASK) >> + NV40_VP_SRC0_HIGH_SHIFT) << NV40_VP_INST_SRC0H_SHIFT; + hw[2] |= (sr & NV40_VP_SRC0_LOW_MASK) << + NV40_VP_INST_SRC0L_SHIFT; + break; + case 1: + hw[2] |= sr << NV40_VP_INST_SRC1_SHIFT; + break; + case 2: + hw[2] |= ((sr & NV40_VP_SRC2_HIGH_MASK) >> + NV40_VP_SRC2_HIGH_SHIFT) << NV40_VP_INST_SRC2H_SHIFT; + hw[3] |= (sr & NV40_VP_SRC2_LOW_MASK) << + NV40_VP_INST_SRC2L_SHIFT; + break; + default: + assert(0); + } +} + +static void +emit_dst(struct nv40_vpc *vpc, uint32_t *hw, int slot, struct nv40_sreg dst) +{ + struct nv40_vertex_program *vp = vpc->vp; + + switch (dst.type) { + case NV40SR_TEMP: + hw[3] |= NV40_VP_INST_DEST_MASK; + if (slot == 0) { + hw[0] |= (dst.index << + NV40_VP_INST_VEC_DEST_TEMP_SHIFT); + } else { + hw[3] |= (dst.index << + NV40_VP_INST_SCA_DEST_TEMP_SHIFT); + } + break; + case NV40SR_OUTPUT: + switch (dst.index) { + case NV40_VP_INST_DEST_COL0 : vp->or |= (1 << 0); break; + case NV40_VP_INST_DEST_COL1 : vp->or |= (1 << 1); break; + case NV40_VP_INST_DEST_BFC0 : vp->or |= (1 << 2); break; + case NV40_VP_INST_DEST_BFC1 : vp->or |= (1 << 3); break; + case NV40_VP_INST_DEST_FOGC : vp->or |= (1 << 4); break; + case NV40_VP_INST_DEST_PSZ : vp->or |= (1 << 5); break; + case NV40_VP_INST_DEST_TC(0): vp->or |= (1 << 14); break; + case NV40_VP_INST_DEST_TC(1): vp->or |= (1 << 15); break; + case NV40_VP_INST_DEST_TC(2): vp->or |= (1 << 16); break; + case NV40_VP_INST_DEST_TC(3): vp->or |= (1 << 17); break; + case NV40_VP_INST_DEST_TC(4): vp->or |= (1 << 18); break; + case NV40_VP_INST_DEST_TC(5): vp->or |= (1 << 19); break; + case NV40_VP_INST_DEST_TC(6): vp->or |= (1 << 20); break; + case NV40_VP_INST_DEST_TC(7): vp->or |= (1 << 21); break; + case NV40_VP_INST_DEST_CLIP(0): + vp->or |= (1 << 6); + vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE0; + dst.index = NV40_VP_INST_DEST_FOGC; + break; + case NV40_VP_INST_DEST_CLIP(1): + vp->or |= (1 << 7); + vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE1; + dst.index = NV40_VP_INST_DEST_FOGC; + break; + case NV40_VP_INST_DEST_CLIP(2): + vp->or |= (1 << 8); + vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE2; + dst.index = NV40_VP_INST_DEST_FOGC; + break; + case NV40_VP_INST_DEST_CLIP(3): + vp->or |= (1 << 9); + vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE3; + dst.index = NV40_VP_INST_DEST_PSZ; + break; + case NV40_VP_INST_DEST_CLIP(4): + vp->or |= (1 << 10); + vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE4; + dst.index = NV40_VP_INST_DEST_PSZ; + break; + case NV40_VP_INST_DEST_CLIP(5): + vp->or |= (1 << 11); + vp->clip_ctrl |= NV40TCL_CLIP_PLANE_ENABLE_PLANE5; + dst.index = NV40_VP_INST_DEST_PSZ; + break; + default: + break; + } + + hw[3] |= (dst.index << NV40_VP_INST_DEST_SHIFT); + if (slot == 0) { + hw[0] |= NV40_VP_INST_VEC_RESULT; + hw[0] |= NV40_VP_INST_VEC_DEST_TEMP_MASK | (1<<20); + } else { + hw[3] |= NV40_VP_INST_SCA_RESULT; + hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; + } + break; + default: + assert(0); + } +} + +static void +nv40_vp_arith(struct nv40_vpc *vpc, int slot, int op, + struct nv40_sreg dst, int mask, + struct nv40_sreg s0, struct nv40_sreg s1, + struct nv40_sreg s2) +{ + struct nv40_vertex_program *vp = vpc->vp; + uint32_t *hw; + + vp->insns = realloc(vp->insns, ++vp->nr_insns * sizeof(*vpc->vpi)); + vpc->vpi = &vp->insns[vp->nr_insns - 1]; + memset(vpc->vpi, 0, sizeof(*vpc->vpi)); + vpc->vpi->const_index = -1; + + hw = vpc->vpi->data; + + hw[0] |= (NV40_VP_INST_COND_TR << NV40_VP_INST_COND_SHIFT); + hw[0] |= ((0 << NV40_VP_INST_COND_SWZ_X_SHIFT) | + (1 << NV40_VP_INST_COND_SWZ_Y_SHIFT) | + (2 << NV40_VP_INST_COND_SWZ_Z_SHIFT) | + (3 << NV40_VP_INST_COND_SWZ_W_SHIFT)); + + if (slot == 0) { + hw[1] |= (op << NV40_VP_INST_VEC_OPCODE_SHIFT); + hw[3] |= NV40_VP_INST_SCA_DEST_TEMP_MASK; + hw[3] |= (mask << NV40_VP_INST_VEC_WRITEMASK_SHIFT); + } else { + hw[1] |= (op << NV40_VP_INST_SCA_OPCODE_SHIFT); + hw[0] |= (NV40_VP_INST_VEC_DEST_TEMP_MASK | (1 << 20)); + hw[3] |= (mask << NV40_VP_INST_SCA_WRITEMASK_SHIFT); + } + + emit_dst(vpc, hw, slot, dst); + emit_src(vpc, hw, 0, s0); + emit_src(vpc, hw, 1, s1); + emit_src(vpc, hw, 2, s2); +} + +static INLINE struct nv40_sreg +tgsi_src(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc) { + struct nv40_sreg src; + + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + src = nv40_sr(NV40SR_INPUT, fsrc->SrcRegister.Index); + break; + case TGSI_FILE_CONSTANT: + src = constant(vpc, fsrc->SrcRegister.Index, 0, 0, 0, 0); + break; + case TGSI_FILE_IMMEDIATE: + src = vpc->imm[fsrc->SrcRegister.Index]; + break; + case TGSI_FILE_TEMPORARY: + src = vpc->r_temp[fsrc->SrcRegister.Index]; + break; + default: + NOUVEAU_ERR("bad src file\n"); + break; + } + + src.abs = fsrc->SrcRegisterExtMod.Absolute; + src.negate = fsrc->SrcRegister.Negate; + src.swz[0] = fsrc->SrcRegister.SwizzleX; + src.swz[1] = fsrc->SrcRegister.SwizzleY; + src.swz[2] = fsrc->SrcRegister.SwizzleZ; + src.swz[3] = fsrc->SrcRegister.SwizzleW; + return src; +} + +static INLINE struct nv40_sreg +tgsi_dst(struct nv40_vpc *vpc, const struct tgsi_full_dst_register *fdst) { + struct nv40_sreg dst; + + switch (fdst->DstRegister.File) { + case TGSI_FILE_OUTPUT: + dst = vpc->r_result[fdst->DstRegister.Index]; + break; + case TGSI_FILE_TEMPORARY: + dst = vpc->r_temp[fdst->DstRegister.Index]; + break; + case TGSI_FILE_ADDRESS: + dst = vpc->r_address[fdst->DstRegister.Index]; + break; + default: + NOUVEAU_ERR("bad dst file\n"); + break; + } + + return dst; +} + +static INLINE int +tgsi_mask(uint tgsi) +{ + int mask = 0; + + if (tgsi & TGSI_WRITEMASK_X) mask |= MASK_X; + if (tgsi & TGSI_WRITEMASK_Y) mask |= MASK_Y; + if (tgsi & TGSI_WRITEMASK_Z) mask |= MASK_Z; + if (tgsi & TGSI_WRITEMASK_W) mask |= MASK_W; + return mask; +} + +static boolean +src_native_swz(struct nv40_vpc *vpc, const struct tgsi_full_src_register *fsrc, + struct nv40_sreg *src) +{ + const struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); + struct nv40_sreg tgsi = tgsi_src(vpc, fsrc); + uint mask = 0, zero_mask = 0, one_mask = 0, neg_mask = 0; + uint neg[4] = { fsrc->SrcRegisterExtSwz.NegateX, + fsrc->SrcRegisterExtSwz.NegateY, + fsrc->SrcRegisterExtSwz.NegateZ, + fsrc->SrcRegisterExtSwz.NegateW }; + uint c; + + for (c = 0; c < 4; c++) { + switch (tgsi_util_get_full_src_register_extswizzle(fsrc, c)) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + mask |= tgsi_mask(1 << c); + break; + case TGSI_EXTSWIZZLE_ZERO: + zero_mask |= tgsi_mask(1 << c); + tgsi.swz[c] = SWZ_X; + break; + case TGSI_EXTSWIZZLE_ONE: + one_mask |= tgsi_mask(1 << c); + tgsi.swz[c] = SWZ_X; + break; + default: + assert(0); + } + + if (!tgsi.negate && neg[c]) + neg_mask |= tgsi_mask(1 << c); + } + + if (mask == MASK_ALL && !neg_mask) + return TRUE; + + *src = temp(vpc); + + if (mask) + arith(vpc, 0, OP_MOV, *src, mask, tgsi, none, none); + + if (zero_mask) + arith(vpc, 0, OP_SFL, *src, zero_mask, *src, none, none); + + if (one_mask) + arith(vpc, 0, OP_STR, *src, one_mask, *src, none, none); + + if (neg_mask) { + struct nv40_sreg one = temp(vpc); + arith(vpc, 0, OP_STR, one, neg_mask, one, none, none); + arith(vpc, 0, OP_MUL, *src, neg_mask, *src, neg(one), none); + } + + return FALSE; +} + +static boolean +nv40_vertprog_parse_instruction(struct nv40_vpc *vpc, + const struct tgsi_full_instruction *finst) +{ + struct nv40_sreg src[3], dst, tmp; + struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); + int mask; + int ai = -1, ci = -1, ii = -1; + int i; + + if (finst->Instruction.Opcode == TGSI_OPCODE_END) + return TRUE; + + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->FullSrcRegisters[i]; + if (fsrc->SrcRegister.File == TGSI_FILE_TEMPORARY) { + src[i] = tgsi_src(vpc, fsrc); + } + } + + for (i = 0; i < finst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fsrc; + + fsrc = &finst->FullSrcRegisters[i]; + + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + case TGSI_FILE_CONSTANT: + case TGSI_FILE_TEMPORARY: + if (!src_native_swz(vpc, fsrc, &src[i])) + continue; + break; + default: + break; + } + + switch (fsrc->SrcRegister.File) { + case TGSI_FILE_INPUT: + if (ai == -1 || ai == fsrc->SrcRegister.Index) { + ai = fsrc->SrcRegister.Index; + src[i] = tgsi_src(vpc, fsrc); + } else { + src[i] = temp(vpc); + arith(vpc, 0, OP_MOV, src[i], MASK_ALL, + tgsi_src(vpc, fsrc), none, none); + } + break; + case TGSI_FILE_CONSTANT: + if ((ci == -1 && ii == -1) || + ci == fsrc->SrcRegister.Index) { + ci = fsrc->SrcRegister.Index; + src[i] = tgsi_src(vpc, fsrc); + } else { + src[i] = temp(vpc); + arith(vpc, 0, OP_MOV, src[i], MASK_ALL, + tgsi_src(vpc, fsrc), none, none); + } + break; + case TGSI_FILE_IMMEDIATE: + if ((ci == -1 && ii == -1) || + ii == fsrc->SrcRegister.Index) { + ii = fsrc->SrcRegister.Index; + src[i] = tgsi_src(vpc, fsrc); + } else { + src[i] = temp(vpc); + arith(vpc, 0, OP_MOV, src[i], MASK_ALL, + tgsi_src(vpc, fsrc), none, none); + } + break; + case TGSI_FILE_TEMPORARY: + /* handled above */ + break; + default: + NOUVEAU_ERR("bad src file\n"); + return FALSE; + } + } + + dst = tgsi_dst(vpc, &finst->FullDstRegisters[0]); + mask = tgsi_mask(finst->FullDstRegisters[0].DstRegister.WriteMask); + + switch (finst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + arith(vpc, 0, OP_MOV, dst, mask, abs(src[0]), none, none); + break; + case TGSI_OPCODE_ADD: + arith(vpc, 0, OP_ADD, dst, mask, src[0], none, src[1]); + break; + case TGSI_OPCODE_ARL: + arith(vpc, 0, OP_ARL, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_DP3: + arith(vpc, 0, OP_DP3, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DP4: + arith(vpc, 0, OP_DP4, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DPH: + arith(vpc, 0, OP_DPH, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_DST: + arith(vpc, 0, OP_DST, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_EX2: + arith(vpc, 1, OP_EX2, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_EXP: + arith(vpc, 1, OP_EXP, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_FLR: + arith(vpc, 0, OP_FLR, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_FRC: + arith(vpc, 0, OP_FRC, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_LG2: + arith(vpc, 1, OP_LG2, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_LIT: + arith(vpc, 1, OP_LIT, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_LOG: + arith(vpc, 1, OP_LOG, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_MAD: + arith(vpc, 0, OP_MAD, dst, mask, src[0], src[1], src[2]); + break; + case TGSI_OPCODE_MAX: + arith(vpc, 0, OP_MAX, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MIN: + arith(vpc, 0, OP_MIN, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_MOV: + arith(vpc, 0, OP_MOV, dst, mask, src[0], none, none); + break; + case TGSI_OPCODE_MUL: + arith(vpc, 0, OP_MUL, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_POW: + tmp = temp(vpc); + arith(vpc, 1, OP_LG2, tmp, MASK_X, none, none, + swz(src[0], X, X, X, X)); + arith(vpc, 0, OP_MUL, tmp, MASK_X, swz(tmp, X, X, X, X), + swz(src[1], X, X, X, X), none); + arith(vpc, 1, OP_EX2, dst, mask, none, none, + swz(tmp, X, X, X, X)); + break; + case TGSI_OPCODE_RCP: + arith(vpc, 1, OP_RCP, dst, mask, none, none, src[0]); + break; + case TGSI_OPCODE_RET: + break; + case TGSI_OPCODE_RSQ: + arith(vpc, 1, OP_RSQ, dst, mask, none, none, abs(src[0])); + break; + case TGSI_OPCODE_SGE: + arith(vpc, 0, OP_SGE, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SLT: + arith(vpc, 0, OP_SLT, dst, mask, src[0], src[1], none); + break; + case TGSI_OPCODE_SUB: + arith(vpc, 0, OP_ADD, dst, mask, src[0], none, neg(src[1])); + break; + case TGSI_OPCODE_XPD: + tmp = temp(vpc); + arith(vpc, 0, OP_MUL, tmp, mask, + swz(src[0], Z, X, Y, Y), swz(src[1], Y, Z, X, X), none); + arith(vpc, 0, OP_MAD, dst, (mask & ~MASK_W), + swz(src[0], Y, Z, X, X), swz(src[1], Z, X, Y, Y), + neg(tmp)); + break; + default: + NOUVEAU_ERR("invalid opcode %d\n", finst->Instruction.Opcode); + return FALSE; + } + + release_temps(vpc); + return TRUE; +} + +static boolean +nv40_vertprog_parse_decl_output(struct nv40_vpc *vpc, + const struct tgsi_full_declaration *fdec) +{ + unsigned idx = fdec->DeclarationRange.First; + int hw; + + switch (fdec->Semantic.SemanticName) { + case TGSI_SEMANTIC_POSITION: + hw = NV40_VP_INST_DEST_POS; + vpc->hpos_idx = idx; + break; + case TGSI_SEMANTIC_COLOR: + if (fdec->Semantic.SemanticIndex == 0) { + hw = NV40_VP_INST_DEST_COL0; + } else + if (fdec->Semantic.SemanticIndex == 1) { + hw = NV40_VP_INST_DEST_COL1; + } else { + NOUVEAU_ERR("bad colour semantic index\n"); + return FALSE; + } + break; + case TGSI_SEMANTIC_BCOLOR: + if (fdec->Semantic.SemanticIndex == 0) { + hw = NV40_VP_INST_DEST_BFC0; + } else + if (fdec->Semantic.SemanticIndex == 1) { + hw = NV40_VP_INST_DEST_BFC1; + } else { + NOUVEAU_ERR("bad bcolour semantic index\n"); + return FALSE; + } + break; + case TGSI_SEMANTIC_FOG: + hw = NV40_VP_INST_DEST_FOGC; + break; + case TGSI_SEMANTIC_PSIZE: + hw = NV40_VP_INST_DEST_PSZ; + break; + case TGSI_SEMANTIC_GENERIC: + if (fdec->Semantic.SemanticIndex <= 7) { + hw = NV40_VP_INST_DEST_TC(fdec->Semantic.SemanticIndex); + } else { + NOUVEAU_ERR("bad generic semantic index\n"); + return FALSE; + } + break; + default: + NOUVEAU_ERR("bad output semantic\n"); + return FALSE; + } + + vpc->r_result[idx] = nv40_sr(NV40SR_OUTPUT, hw); + return TRUE; +} + +static boolean +nv40_vertprog_prepare(struct nv40_vpc *vpc) +{ + struct tgsi_parse_context p; + int high_temp = -1, high_addr = -1, nr_imm = 0, i; + + tgsi_parse_init(&p, vpc->vp->pipe.tokens); + while (!tgsi_parse_end_of_tokens(&p)) { + const union tgsi_full_token *tok = &p.FullToken; + + tgsi_parse_token(&p); + switch(tok->Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: + nr_imm++; + break; + case TGSI_TOKEN_TYPE_DECLARATION: + { + const struct tgsi_full_declaration *fdec; + + fdec = &p.FullToken.FullDeclaration; + switch (fdec->Declaration.File) { + case TGSI_FILE_TEMPORARY: + if (fdec->DeclarationRange.Last > high_temp) { + high_temp = + fdec->DeclarationRange.Last; + } + break; +#if 0 /* this would be nice.. except gallium doesn't track it */ + case TGSI_FILE_ADDRESS: + if (fdec->DeclarationRange.Last > high_addr) { + high_addr = + fdec->DeclarationRange.Last; + } + break; +#endif + case TGSI_FILE_OUTPUT: + if (!nv40_vertprog_parse_decl_output(vpc, fdec)) + return FALSE; + break; + default: + break; + } + } + break; +#if 1 /* yay, parse instructions looking for address regs instead */ + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + const struct tgsi_full_instruction *finst; + const struct tgsi_full_dst_register *fdst; + + finst = &p.FullToken.FullInstruction; + fdst = &finst->FullDstRegisters[0]; + + if (fdst->DstRegister.File == TGSI_FILE_ADDRESS) { + if (fdst->DstRegister.Index > high_addr) + high_addr = fdst->DstRegister.Index; + } + + } + break; +#endif + default: + break; + } + } + tgsi_parse_free(&p); + + if (nr_imm) { + vpc->imm = CALLOC(nr_imm, sizeof(struct nv40_sreg)); + assert(vpc->imm); + } + + if (++high_temp) { + vpc->r_temp = CALLOC(high_temp, sizeof(struct nv40_sreg)); + for (i = 0; i < high_temp; i++) + vpc->r_temp[i] = temp(vpc); + } + + if (++high_addr) { + vpc->r_address = CALLOC(high_addr, sizeof(struct nv40_sreg)); + for (i = 0; i < high_addr; i++) + vpc->r_address[i] = temp(vpc); + } + + vpc->r_temps_discard = 0; + return TRUE; +} + +static void +nv40_vertprog_translate(struct nv40_context *nv40, + struct nv40_vertex_program *vp) +{ + struct tgsi_parse_context parse; + struct nv40_vpc *vpc = NULL; + struct nv40_sreg none = nv40_sr(NV40SR_NONE, 0); + int i; + + vpc = CALLOC(1, sizeof(struct nv40_vpc)); + if (!vpc) + return; + vpc->vp = vp; + + if (!nv40_vertprog_prepare(vpc)) { + FREE(vpc); + return; + } + + /* Redirect post-transform vertex position to a temp if user clip + * planes are enabled. We need to append code the the vtxprog + * to handle clip planes later. + */ + if (vp->ucp.nr) { + vpc->r_result[vpc->hpos_idx] = temp(vpc); + vpc->r_temps_discard = 0; + } + + tgsi_parse_init(&parse, vp->pipe.tokens); + + while (!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + const struct tgsi_full_immediate *imm; + + imm = &parse.FullToken.FullImmediate; + assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32); + assert(imm->Immediate.NrTokens == 4 + 1); + vpc->imm[vpc->nr_imm++] = + constant(vpc, -1, + imm->u.ImmediateFloat32[0].Float, + imm->u.ImmediateFloat32[1].Float, + imm->u.ImmediateFloat32[2].Float, + imm->u.ImmediateFloat32[3].Float); + } + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + const struct tgsi_full_instruction *finst; + finst = &parse.FullToken.FullInstruction; + if (!nv40_vertprog_parse_instruction(vpc, finst)) + goto out_err; + } + break; + default: + break; + } + } + + /* Write out HPOS if it was redirected to a temp earlier */ + if (vpc->r_result[vpc->hpos_idx].type != NV40SR_OUTPUT) { + struct nv40_sreg hpos = nv40_sr(NV40SR_OUTPUT, + NV40_VP_INST_DEST_POS); + struct nv40_sreg htmp = vpc->r_result[vpc->hpos_idx]; + + arith(vpc, 0, OP_MOV, hpos, MASK_ALL, htmp, none, none); + } + + /* Insert code to handle user clip planes */ + for (i = 0; i < vp->ucp.nr; i++) { + struct nv40_sreg cdst = nv40_sr(NV40SR_OUTPUT, + NV40_VP_INST_DEST_CLIP(i)); + struct nv40_sreg ceqn = constant(vpc, -1, + nv40->clip.ucp[i][0], + nv40->clip.ucp[i][1], + nv40->clip.ucp[i][2], + nv40->clip.ucp[i][3]); + struct nv40_sreg htmp = vpc->r_result[vpc->hpos_idx]; + unsigned mask; + + switch (i) { + case 0: case 3: mask = MASK_Y; break; + case 1: case 4: mask = MASK_Z; break; + case 2: case 5: mask = MASK_W; break; + default: + NOUVEAU_ERR("invalid clip dist #%d\n", i); + goto out_err; + } + + arith(vpc, 0, OP_DP4, cdst, mask, htmp, ceqn, none); + } + + vp->insns[vp->nr_insns - 1].data[3] |= NV40_VP_INST_LAST; + vp->translated = TRUE; +out_err: + tgsi_parse_free(&parse); + if (vpc->r_temp) + FREE(vpc->r_temp); + if (vpc->r_address) + FREE(vpc->r_address); + if (vpc->imm) + FREE(vpc->imm); + FREE(vpc); +} + +static boolean +nv40_vertprog_validate(struct nv40_context *nv40) +{ + struct nouveau_winsys *nvws = nv40->nvws; + struct pipe_winsys *ws = nv40->pipe.winsys; + struct nouveau_grobj *curie = nv40->screen->curie; + struct nv40_vertex_program *vp; + struct pipe_buffer *constbuf; + boolean upload_code = FALSE, upload_data = FALSE; + int i; + + if (nv40->render_mode == HW) { + vp = nv40->vertprog; + constbuf = nv40->constbuf[PIPE_SHADER_VERTEX]; + + if ((nv40->dirty & NV40_NEW_UCP) || + memcmp(&nv40->clip, &vp->ucp, sizeof(vp->ucp))) { + nv40_vertprog_destroy(nv40, vp); + memcpy(&vp->ucp, &nv40->clip, sizeof(vp->ucp)); + } + } else { + vp = nv40->swtnl.vertprog; + constbuf = NULL; + } + + /* Translate TGSI shader into hw bytecode */ + if (vp->translated) + goto check_gpu_resources; + + nv40->fallback_swtnl &= ~NV40_NEW_VERTPROG; + nv40_vertprog_translate(nv40, vp); + if (!vp->translated) { + nv40->fallback_swtnl |= NV40_NEW_VERTPROG; + return FALSE; + } + +check_gpu_resources: + /* Allocate hw vtxprog exec slots */ + if (!vp->exec) { + struct nouveau_resource *heap = nv40->screen->vp_exec_heap; + struct nouveau_stateobj *so; + uint vplen = vp->nr_insns; + + if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) { + while (heap->next && heap->size < vplen) { + struct nv40_vertex_program *evict; + + evict = heap->next->priv; + nvws->res_free(&evict->exec); + } + + if (nvws->res_alloc(heap, vplen, vp, &vp->exec)) + assert(0); + } + + so = so_new(7, 0); + so_method(so, curie, NV40TCL_VP_START_FROM_ID, 1); + so_data (so, vp->exec->start); + so_method(so, curie, NV40TCL_VP_ATTRIB_EN, 2); + so_data (so, vp->ir); + so_data (so, vp->or); + so_method(so, curie, NV40TCL_CLIP_PLANE_ENABLE, 1); + so_data (so, vp->clip_ctrl); + so_ref(so, &vp->so); + + upload_code = TRUE; + } + + /* Allocate hw vtxprog const slots */ + if (vp->nr_consts && !vp->data) { + struct nouveau_resource *heap = nv40->screen->vp_data_heap; + + if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) { + while (heap->next && heap->size < vp->nr_consts) { + struct nv40_vertex_program *evict; + + evict = heap->next->priv; + nvws->res_free(&evict->data); + } + + if (nvws->res_alloc(heap, vp->nr_consts, vp, &vp->data)) + assert(0); + } + + /*XXX: handle this some day */ + assert(vp->data->start >= vp->data_start_min); + + upload_data = TRUE; + if (vp->data_start != vp->data->start) + upload_code = TRUE; + } + + /* If exec or data segments moved we need to patch the program to + * fixup offsets and register IDs. + */ + if (vp->exec_start != vp->exec->start) { + for (i = 0; i < vp->nr_insns; i++) { + struct nv40_vertex_program_exec *vpi = &vp->insns[i]; + + if (vpi->has_branch_offset) { + assert(0); + } + } + + vp->exec_start = vp->exec->start; + } + + if (vp->nr_consts && vp->data_start != vp->data->start) { + for (i = 0; i < vp->nr_insns; i++) { + struct nv40_vertex_program_exec *vpi = &vp->insns[i]; + + if (vpi->const_index >= 0) { + vpi->data[1] &= ~NV40_VP_INST_CONST_SRC_MASK; + vpi->data[1] |= + (vpi->const_index + vp->data->start) << + NV40_VP_INST_CONST_SRC_SHIFT; + + } + } + + vp->data_start = vp->data->start; + } + + /* Update + Upload constant values */ + if (vp->nr_consts) { + float *map = NULL; + + if (constbuf) { + map = ws->buffer_map(ws, constbuf, + PIPE_BUFFER_USAGE_CPU_READ); + } + + for (i = 0; i < vp->nr_consts; i++) { + struct nv40_vertex_program_data *vpd = &vp->consts[i]; + + if (vpd->index >= 0) { + if (!upload_data && + !memcmp(vpd->value, &map[vpd->index * 4], + 4 * sizeof(float))) + continue; + memcpy(vpd->value, &map[vpd->index * 4], + 4 * sizeof(float)); + } + + BEGIN_RING(curie, NV40TCL_VP_UPLOAD_CONST_ID, 5); + OUT_RING (i + vp->data->start); + OUT_RINGp ((uint32_t *)vpd->value, 4); + } + + if (constbuf) + ws->buffer_unmap(ws, constbuf); + } + + /* Upload vtxprog */ + if (upload_code) { +#if 0 + for (i = 0; i < vp->nr_insns; i++) { + NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[0]); + NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[1]); + NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[2]); + NOUVEAU_MSG("VP %d: 0x%08x\n", i, vp->insns[i].data[3]); + } +#endif + BEGIN_RING(curie, NV40TCL_VP_UPLOAD_FROM_ID, 1); + OUT_RING (vp->exec->start); + for (i = 0; i < vp->nr_insns; i++) { + BEGIN_RING(curie, NV40TCL_VP_UPLOAD_INST(0), 4); + OUT_RINGp (vp->insns[i].data, 4); + } + } + + if (vp->so != nv40->state.hw[NV40_STATE_VERTPROG]) { + so_ref(vp->so, &nv40->state.hw[NV40_STATE_VERTPROG]); + return TRUE; + } + + return FALSE; +} + +void +nv40_vertprog_destroy(struct nv40_context *nv40, struct nv40_vertex_program *vp) +{ + struct nouveau_winsys *nvws = nv40->screen->nvws; + + vp->translated = FALSE; + + if (vp->nr_insns) { + FREE(vp->insns); + vp->insns = NULL; + vp->nr_insns = 0; + } + + if (vp->nr_consts) { + FREE(vp->consts); + vp->consts = NULL; + vp->nr_consts = 0; + } + + nvws->res_free(&vp->exec); + vp->exec_start = 0; + nvws->res_free(&vp->data); + vp->data_start = 0; + vp->data_start_min = 0; + + vp->ir = vp->or = vp->clip_ctrl = 0; + so_ref(NULL, &vp->so); +} + +struct nv40_state_entry nv40_state_vertprog = { + .validate = nv40_vertprog_validate, + .dirty = { + .pipe = NV40_NEW_VERTPROG | NV40_NEW_UCP, + .hw = NV40_STATE_VERTPROG, + } +}; + diff --git a/src/gallium/drivers/nv50/Makefile b/src/gallium/drivers/nv50/Makefile new file mode 100644 index 0000000000..612aea28a3 --- /dev/null +++ b/src/gallium/drivers/nv50/Makefile @@ -0,0 +1,21 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = nv50 + +C_SOURCES = \ + nv50_clear.c \ + nv50_context.c \ + nv50_draw.c \ + nv50_miptree.c \ + nv50_query.c \ + nv50_program.c \ + nv50_screen.c \ + nv50_state.c \ + nv50_state_validate.c \ + nv50_surface.c \ + nv50_tex.c \ + nv50_transfer.c \ + nv50_vbo.c + +include ../../Makefile.template diff --git a/src/gallium/drivers/nv50/nv50_clear.c b/src/gallium/drivers/nv50/nv50_clear.c new file mode 100644 index 0000000000..f9bc3b53ca --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_clear.c @@ -0,0 +1,92 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "nv50_context.h" + +void +nv50_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_channel *chan = nv50->screen->nvws->channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct pipe_framebuffer_state fb, s_fb = nv50->framebuffer; + struct pipe_scissor_state sc, s_sc = nv50->scissor; + unsigned dirty = nv50->dirty; + + nv50->dirty = 0; + + if (ps->format == PIPE_FORMAT_Z24S8_UNORM || + ps->format == PIPE_FORMAT_Z16_UNORM) { + fb.nr_cbufs = 0; + fb.zsbuf = ps; + } else { + fb.nr_cbufs = 1; + fb.cbufs[0] = ps; + fb.zsbuf = NULL; + } + fb.width = ps->width; + fb.height = ps->height; + pipe->set_framebuffer_state(pipe, &fb); + + sc.minx = sc.miny = 0; + sc.maxx = fb.width; + sc.maxy = fb.height; + pipe->set_scissor_state(pipe, &sc); + + nv50_state_validate(nv50); + + switch (ps->format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + BEGIN_RING(chan, tesla, 0x0d80, 4); + OUT_RINGf (chan, ubyte_to_float((clearValue >> 16) & 0xff)); + OUT_RINGf (chan, ubyte_to_float((clearValue >> 8) & 0xff)); + OUT_RINGf (chan, ubyte_to_float((clearValue >> 0) & 0xff)); + OUT_RINGf (chan, ubyte_to_float((clearValue >> 24) & 0xff)); + BEGIN_RING(chan, tesla, 0x19d0, 1); + OUT_RING (chan, 0x3c); + break; + case PIPE_FORMAT_Z24S8_UNORM: + BEGIN_RING(chan, tesla, 0x0d90, 1); + OUT_RINGf (chan, (float)(clearValue >> 8) * (1.0 / 16777215.0)); + BEGIN_RING(chan, tesla, 0x0da0, 1); + OUT_RING (chan, clearValue & 0xff); + BEGIN_RING(chan, tesla, 0x19d0, 1); + OUT_RING (chan, 0x03); + break; + default: + pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, + clearValue); + break; + } + + pipe->set_framebuffer_state(pipe, &s_fb); + pipe->set_scissor_state(pipe, &s_sc); + nv50->dirty |= dirty; + + ps->status = PIPE_SURFACE_STATUS_CLEAR; +} + diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c new file mode 100644 index 0000000000..565a5da668 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -0,0 +1,90 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/internal/p_winsys_screen.h" + +#include "nv50_context.h" +#include "nv50_screen.h" + +static void +nv50_flush(struct pipe_context *pipe, unsigned flags, + struct pipe_fence_handle **fence) +{ + struct nv50_context *nv50 = (struct nv50_context *)pipe; + + FIRE_RING(nv50->screen->nvws->channel); +} + +static void +nv50_destroy(struct pipe_context *pipe) +{ + struct nv50_context *nv50 = (struct nv50_context *)pipe; + + draw_destroy(nv50->draw); + FREE(nv50); +} + + +static void +nv50_set_edgeflags(struct pipe_context *pipe, const unsigned *bitfield) +{ +} + +struct pipe_context * +nv50_create(struct pipe_screen *pscreen, unsigned pctx_id) +{ + struct pipe_winsys *pipe_winsys = pscreen->winsys; + struct nv50_screen *screen = nv50_screen(pscreen); + struct nv50_context *nv50; + + nv50 = CALLOC_STRUCT(nv50_context); + if (!nv50) + return NULL; + nv50->screen = screen; + nv50->pctx_id = pctx_id; + + nv50->pipe.winsys = pipe_winsys; + nv50->pipe.screen = pscreen; + + nv50->pipe.destroy = nv50_destroy; + + nv50->pipe.set_edgeflags = nv50_set_edgeflags; + nv50->pipe.draw_arrays = nv50_draw_arrays; + nv50->pipe.draw_elements = nv50_draw_elements; + nv50->pipe.clear = nv50_clear; + + nv50->pipe.flush = nv50_flush; + + nv50_init_surface_functions(nv50); + nv50_init_state_functions(nv50); + nv50_init_query_functions(nv50); + + nv50->draw = draw_create(); + assert(nv50->draw); + draw_set_rasterize_stage(nv50->draw, nv50_draw_render_stage(nv50)); + + return &nv50->pipe; +} + + diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h new file mode 100644 index 0000000000..313e435e7a --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -0,0 +1,201 @@ +#ifndef __NV50_CONTEXT_H__ +#define __NV50_CONTEXT_H__ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_compiler.h" + +#include "util/u_memory.h" +#include "util/u_math.h" + +#include "draw/draw_vertex.h" + +#include "nouveau/nouveau_winsys.h" +#include "nouveau/nouveau_gldefs.h" +#include "nouveau/nouveau_stateobj.h" + +#include "nv50_screen.h" +#include "nv50_program.h" + +#define NOUVEAU_ERR(fmt, args...) \ + fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); +#define NOUVEAU_MSG(fmt, args...) \ + fprintf(stderr, "nouveau: "fmt, ##args); + +/* Constant buffer assignment */ +#define NV50_CB_PMISC 0 +#define NV50_CB_PVP 1 +#define NV50_CB_PFP 2 +#define NV50_CB_PGP 3 +#define NV50_CB_TIC 4 +#define NV50_CB_TSC 5 +#define NV50_CB_PUPLOAD 6 + +#define NV50_NEW_BLEND (1 << 0) +#define NV50_NEW_ZSA (1 << 1) +#define NV50_NEW_BLEND_COLOUR (1 << 2) +#define NV50_NEW_STIPPLE (1 << 3) +#define NV50_NEW_SCISSOR (1 << 4) +#define NV50_NEW_VIEWPORT (1 << 5) +#define NV50_NEW_RASTERIZER (1 << 6) +#define NV50_NEW_FRAMEBUFFER (1 << 7) +#define NV50_NEW_VERTPROG (1 << 8) +#define NV50_NEW_VERTPROG_CB (1 << 9) +#define NV50_NEW_FRAGPROG (1 << 10) +#define NV50_NEW_FRAGPROG_CB (1 << 11) +#define NV50_NEW_ARRAYS (1 << 12) +#define NV50_NEW_SAMPLER (1 << 13) +#define NV50_NEW_TEXTURE (1 << 14) + +struct nv50_blend_stateobj { + struct pipe_blend_state pipe; + struct nouveau_stateobj *so; +}; + +struct nv50_zsa_stateobj { + struct pipe_depth_stencil_alpha_state pipe; + struct nouveau_stateobj *so; +}; + +struct nv50_rasterizer_stateobj { + struct pipe_rasterizer_state pipe; + struct nouveau_stateobj *so; +}; + +struct nv50_miptree_level { + int *image_offset; + unsigned pitch; +}; + +struct nv50_miptree { + struct pipe_texture base; + struct pipe_buffer *buffer; + + struct nv50_miptree_level level[PIPE_MAX_TEXTURE_LEVELS]; + int image_nr; + int total_size; +}; + +static INLINE struct nv50_miptree * +nv50_miptree(struct pipe_texture *pt) +{ + return (struct nv50_miptree *)pt; +} + +struct nv50_surface { + struct pipe_surface base; +}; + +static INLINE struct nv50_surface * +nv50_surface(struct pipe_surface *pt) +{ + return (struct nv50_surface *)pt; +} + +static INLINE struct pipe_buffer * +nv50_surface_buffer(struct pipe_surface *surface) +{ + struct nv50_miptree *mt = (struct nv50_miptree *)surface->texture; + return mt->buffer; +} + +struct nv50_state { + unsigned dirty; + + struct nouveau_stateobj *fb; + struct nouveau_stateobj *blend; + struct nouveau_stateobj *blend_colour; + struct nouveau_stateobj *zsa; + struct nouveau_stateobj *rast; + struct nouveau_stateobj *stipple; + struct nouveau_stateobj *scissor; + unsigned scissor_enabled; + struct nouveau_stateobj *viewport; + unsigned viewport_bypass; + struct nouveau_stateobj *tsc_upload; + struct nouveau_stateobj *tic_upload; + struct nouveau_stateobj *vertprog; + struct nouveau_stateobj *fragprog; + struct nouveau_stateobj *vtxfmt; + struct nouveau_stateobj *vtxbuf; +}; + +struct nv50_context { + struct pipe_context pipe; + + struct nv50_screen *screen; + unsigned pctx_id; + + struct draw_context *draw; + + struct nv50_state state; + + unsigned dirty; + struct nv50_blend_stateobj *blend; + struct nv50_zsa_stateobj *zsa; + struct nv50_rasterizer_stateobj *rasterizer; + struct pipe_blend_color blend_colour; + struct pipe_poly_stipple stipple; + struct pipe_scissor_state scissor; + struct pipe_viewport_state viewport; + struct pipe_framebuffer_state framebuffer; + struct nv50_program *vertprog; + struct nv50_program *fragprog; + struct pipe_buffer *constbuf[PIPE_SHADER_TYPES]; + struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; + unsigned vtxbuf_nr; + struct pipe_vertex_element vtxelt[PIPE_MAX_ATTRIBS]; + unsigned vtxelt_nr; + unsigned *sampler[PIPE_MAX_SAMPLERS]; + unsigned sampler_nr; + struct nv50_miptree *miptree[PIPE_MAX_SAMPLERS]; + unsigned miptree_nr; +}; + +static INLINE struct nv50_context * +nv50_context(struct pipe_context *pipe) +{ + return (struct nv50_context *)pipe; +} + +extern void nv50_init_surface_functions(struct nv50_context *nv50); +extern void nv50_init_state_functions(struct nv50_context *nv50); +extern void nv50_init_query_functions(struct nv50_context *nv50); + +extern void nv50_screen_init_miptree_functions(struct pipe_screen *pscreen); + +extern int +nv50_surface_do_copy(struct nv50_screen *screen, struct pipe_surface *dst, + int dx, int dy, struct pipe_surface *src, int sx, int sy, + int w, int h); + +/* nv50_draw.c */ +extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *nv50); + +/* nv50_vbo.c */ +extern boolean nv50_draw_arrays(struct pipe_context *, unsigned mode, + unsigned start, unsigned count); +extern boolean nv50_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, + unsigned count); +extern void nv50_vbo_validate(struct nv50_context *nv50); + +/* nv50_clear.c */ +extern void nv50_clear(struct pipe_context *pipe, struct pipe_surface *ps, + unsigned clearValue); + +/* nv50_program.c */ +extern void nv50_vertprog_validate(struct nv50_context *nv50); +extern void nv50_fragprog_validate(struct nv50_context *nv50); +extern void nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p); + +/* nv50_state_validate.c */ +extern boolean nv50_state_validate(struct nv50_context *nv50); + +/* nv50_tex.c */ +extern void nv50_tex_validate(struct nv50_context *); + +#endif diff --git a/src/gallium/drivers/nv50/nv50_draw.c b/src/gallium/drivers/nv50/nv50_draw.c new file mode 100644 index 0000000000..2f6f607261 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_draw.c @@ -0,0 +1,89 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "draw/draw_pipe.h" + +#include "nv50_context.h" + +struct nv50_render_stage { + struct draw_stage stage; + struct nv50_context *nv50; +}; + +static INLINE struct nv50_render_stage * +nv50_render_stage(struct draw_stage *stage) +{ + return (struct nv50_render_stage *)stage; +} + +static void +nv50_render_point(struct draw_stage *stage, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv50_render_line(struct draw_stage *stage, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv50_render_tri(struct draw_stage *stage, struct prim_header *prim) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv50_render_flush(struct draw_stage *stage, unsigned flags) +{ +} + +static void +nv50_render_reset_stipple_counter(struct draw_stage *stage) +{ + NOUVEAU_ERR("\n"); +} + +static void +nv50_render_destroy(struct draw_stage *stage) +{ + FREE(stage); +} + +struct draw_stage * +nv50_draw_render_stage(struct nv50_context *nv50) +{ + struct nv50_render_stage *rs = CALLOC_STRUCT(nv50_render_stage); + + rs->nv50 = nv50; + rs->stage.draw = nv50->draw; + rs->stage.destroy = nv50_render_destroy; + rs->stage.point = nv50_render_point; + rs->stage.line = nv50_render_line; + rs->stage.tri = nv50_render_tri; + rs->stage.flush = nv50_render_flush; + rs->stage.reset_stipple_counter = nv50_render_reset_stipple_counter; + + return &rs->stage; +} + diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c new file mode 100644 index 0000000000..24a68b7235 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -0,0 +1,207 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "nv50_context.h" + +static struct pipe_texture * +nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) +{ + struct pipe_winsys *ws = pscreen->winsys; + struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree); + struct pipe_texture *pt = &mt->base; + unsigned usage, width = tmp->width[0], height = tmp->height[0]; + unsigned depth = tmp->depth[0]; + int i, l; + + mt->base = *tmp; + mt->base.refcount = 1; + mt->base.screen = pscreen; + + usage = PIPE_BUFFER_USAGE_PIXEL; + switch (pt->format) { + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z16_UNORM: + usage |= NOUVEAU_BUFFER_USAGE_ZETA; + break; + default: + break; + } + + switch (pt->target) { + case PIPE_TEXTURE_3D: + mt->image_nr = pt->depth[0]; + break; + case PIPE_TEXTURE_CUBE: + mt->image_nr = 6; + break; + default: + mt->image_nr = 1; + break; + } + + for (l = 0; l <= pt->last_level; l++) { + struct nv50_miptree_level *lvl = &mt->level[l]; + + pt->width[l] = width; + pt->height[l] = height; + pt->depth[l] = depth; + pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width); + pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height); + + lvl->image_offset = CALLOC(mt->image_nr, sizeof(int)); + lvl->pitch = align(pt->width[l] * pt->block.size, 64); + + width = MAX2(1, width >> 1); + height = MAX2(1, height >> 1); + depth = MAX2(1, depth >> 1); + } + + for (i = 0; i < mt->image_nr; i++) { + for (l = 0; l <= pt->last_level; l++) { + struct nv50_miptree_level *lvl = &mt->level[l]; + int size; + + size = align(pt->width[l], 8) * pt->block.size; + size = align(size, 64); + size *= align(pt->height[l], 8) * pt->block.size; + + lvl->image_offset[i] = mt->total_size; + + mt->total_size += size; + } + } + + mt->buffer = ws->buffer_create(ws, 256, usage, mt->total_size); + if (!mt->buffer) { + FREE(mt); + return NULL; + } + + return &mt->base; +} + +static struct pipe_texture * +nv50_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt, + const unsigned *stride, struct pipe_buffer *pb) +{ + struct nv50_miptree *mt; + + /* Only supports 2D, non-mipmapped textures for the moment */ + if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 || + pt->depth[0] != 1) + return NULL; + + mt = CALLOC_STRUCT(nv50_miptree); + if (!mt) + return NULL; + + mt->base = *pt; + mt->base.refcount = 1; + mt->base.screen = pscreen; + mt->image_nr = 1; + mt->level[0].pitch = *stride; + mt->level[0].image_offset = CALLOC(1, sizeof(unsigned)); + + pipe_buffer_reference(pscreen, &mt->buffer, pb); + return &mt->base; +} + +static void +nv50_miptree_release(struct pipe_screen *pscreen, struct pipe_texture **ppt) +{ + struct pipe_texture *pt = *ppt; + + *ppt = NULL; + + if (--pt->refcount <= 0) { + struct nv50_miptree *mt = nv50_miptree(pt); + + pipe_buffer_reference(pscreen, &mt->buffer, NULL); + FREE(mt); + } +} + +static struct pipe_surface * +nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice, + unsigned flags) +{ + struct nv50_miptree *mt = nv50_miptree(pt); + struct nv50_miptree_level *lvl = &mt->level[level]; + struct pipe_surface *ps; + int img; + + if (pt->target == PIPE_TEXTURE_CUBE) + img = face; + else + if (pt->target == PIPE_TEXTURE_3D) + img = zslice; + else + img = 0; + + ps = CALLOC_STRUCT(pipe_surface); + if (!ps) + return NULL; + pipe_texture_reference(&ps->texture, pt); + ps->format = pt->format; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->usage = flags; + ps->status = PIPE_SURFACE_STATUS_DEFINED; + ps->refcount = 1; + ps->face = face; + ps->level = level; + ps->zslice = zslice; + ps->offset = lvl->image_offset[img]; + + return ps; +} + +static void +nv50_miptree_surface_del(struct pipe_screen *pscreen, + struct pipe_surface **psurface) +{ + struct pipe_surface *ps = *psurface; + struct nv50_surface *s = nv50_surface(ps); + + *psurface = NULL; + + if (--ps->refcount <= 0) { + pipe_texture_reference(&ps->texture, NULL); + FREE(s); + } +} + +void +nv50_screen_init_miptree_functions(struct pipe_screen *pscreen) +{ + pscreen->texture_create = nv50_miptree_create; + pscreen->texture_blanket = nv50_miptree_blanket; + pscreen->texture_release = nv50_miptree_release; + pscreen->get_tex_surface = nv50_miptree_surface_new; + pscreen->tex_surface_release = nv50_miptree_surface_del; +} + diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c new file mode 100644 index 0000000000..14c5d47e79 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -0,0 +1,1784 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_inlines.h" + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" + +#include "nv50_context.h" + +#define NV50_SU_MAX_TEMP 64 +//#define NV50_PROGRAM_DUMP + +/* ARL - gallium craps itself on progs/vp/arl.txt + * + * MSB - Like MAD, but MUL+SUB + * - Fuck it off, introduce a way to negate args for ops that + * support it. + * + * Look into inlining IMMD for ops other than MOV (make it general?) + * - Maybe even relax restrictions a bit, can't do P_RESULT + P_IMMD, + * but can emit to P_TEMP first - then MOV later. NVIDIA does this + * + * In ops such as ADD it's possible to construct a bad opcode in the !is_long() + * case, if the emit_src() causes the inst to suddenly become long. + * + * Verify half-insns work where expected - and force disable them where they + * don't work - MUL has it forcibly disabled atm as it fixes POW.. + * + * FUCK! watch dst==src vectors, can overwrite components that are needed. + * ie. SUB R0, R0.yzxw, R0 + * + * Things to check with renouveau: + * FP attr/result assignment - how? + * attrib + * - 0x16bc maps vp output onto fp hpos + * - 0x16c0 maps vp output onto fp col0 + * result + * - colr always 0-3 + * - depr always 4 + * 0x16bc->0x16e8 --> some binding between vp/fp regs + * 0x16b8 --> VP output count + * + * 0x1298 --> "MOV rcol.x, fcol.y" "MOV depr, fcol.y" = 0x00000005 + * "MOV rcol.x, fcol.y" = 0x00000004 + * 0x19a8 --> as above but 0x00000100 and 0x00000000 + * - 0x00100000 used when KIL used + * 0x196c --> as above but 0x00000011 and 0x00000000 + * + * 0x1988 --> 0xXXNNNNNN + * - XX == FP high something + */ +struct nv50_reg { + enum { + P_TEMP, + P_ATTR, + P_RESULT, + P_CONST, + P_IMMD + } type; + int index; + + int hw; + int neg; +}; + +struct nv50_pc { + struct nv50_program *p; + + /* hw resources */ + struct nv50_reg *r_temp[NV50_SU_MAX_TEMP]; + + /* tgsi resources */ + struct nv50_reg *temp; + int temp_nr; + struct nv50_reg *attr; + int attr_nr; + struct nv50_reg *result; + int result_nr; + struct nv50_reg *param; + int param_nr; + struct nv50_reg *immd; + float *immd_buf; + int immd_nr; + + struct nv50_reg *temp_temp[16]; + unsigned temp_temp_nr; +}; + +static void +alloc_reg(struct nv50_pc *pc, struct nv50_reg *reg) +{ + int i; + + if (reg->type == P_RESULT) { + if (pc->p->cfg.high_result < (reg->hw + 1)) + pc->p->cfg.high_result = reg->hw + 1; + } + + if (reg->type != P_TEMP) + return; + + if (reg->hw >= 0) { + /*XXX: do this here too to catch FP temp-as-attr usage.. + * not clean, but works */ + if (pc->p->cfg.high_temp < (reg->hw + 1)) + pc->p->cfg.high_temp = reg->hw + 1; + return; + } + + for (i = 0; i < NV50_SU_MAX_TEMP; i++) { + if (!(pc->r_temp[i])) { + pc->r_temp[i] = reg; + reg->hw = i; + if (pc->p->cfg.high_temp < (i + 1)) + pc->p->cfg.high_temp = i + 1; + return; + } + } + + assert(0); +} + +static struct nv50_reg * +alloc_temp(struct nv50_pc *pc, struct nv50_reg *dst) +{ + struct nv50_reg *r; + int i; + + if (dst && dst->type == P_TEMP && dst->hw == -1) + return dst; + + for (i = 0; i < NV50_SU_MAX_TEMP; i++) { + if (!pc->r_temp[i]) { + r = CALLOC_STRUCT(nv50_reg); + r->type = P_TEMP; + r->index = -1; + r->hw = i; + pc->r_temp[i] = r; + return r; + } + } + + assert(0); + return NULL; +} + +static void +free_temp(struct nv50_pc *pc, struct nv50_reg *r) +{ + if (r->index == -1) { + unsigned hw = r->hw; + + FREE(pc->r_temp[hw]); + pc->r_temp[hw] = NULL; + } +} + +static int +alloc_temp4(struct nv50_pc *pc, struct nv50_reg *dst[4], int idx) +{ + int i; + + if ((idx + 4) >= NV50_SU_MAX_TEMP) + return 1; + + if (pc->r_temp[idx] || pc->r_temp[idx + 1] || + pc->r_temp[idx + 2] || pc->r_temp[idx + 3]) + return alloc_temp4(pc, dst, idx + 1); + + for (i = 0; i < 4; i++) { + dst[i] = CALLOC_STRUCT(nv50_reg); + dst[i]->type = P_TEMP; + dst[i]->index = -1; + dst[i]->hw = idx + i; + pc->r_temp[idx + i] = dst[i]; + } + + return 0; +} + +static void +free_temp4(struct nv50_pc *pc, struct nv50_reg *reg[4]) +{ + int i; + + for (i = 0; i < 4; i++) + free_temp(pc, reg[i]); +} + +static struct nv50_reg * +temp_temp(struct nv50_pc *pc) +{ + if (pc->temp_temp_nr >= 16) + assert(0); + + pc->temp_temp[pc->temp_temp_nr] = alloc_temp(pc, NULL); + return pc->temp_temp[pc->temp_temp_nr++]; +} + +static void +kill_temp_temp(struct nv50_pc *pc) +{ + int i; + + for (i = 0; i < pc->temp_temp_nr; i++) + free_temp(pc, pc->temp_temp[i]); + pc->temp_temp_nr = 0; +} + +static int +ctor_immd(struct nv50_pc *pc, float x, float y, float z, float w) +{ + pc->immd_buf = REALLOC(pc->immd_buf, (pc->immd_nr * r * sizeof(float)), + (pc->immd_nr + 1) * 4 * sizeof(float)); + pc->immd_buf[(pc->immd_nr * 4) + 0] = x; + pc->immd_buf[(pc->immd_nr * 4) + 1] = y; + pc->immd_buf[(pc->immd_nr * 4) + 2] = z; + pc->immd_buf[(pc->immd_nr * 4) + 3] = w; + + return pc->immd_nr++; +} + +static struct nv50_reg * +alloc_immd(struct nv50_pc *pc, float f) +{ + struct nv50_reg *r = CALLOC_STRUCT(nv50_reg); + unsigned hw; + + hw = ctor_immd(pc, f, 0, 0, 0) * 4; + r->type = P_IMMD; + r->hw = hw; + r->index = -1; + return r; +} + +static struct nv50_program_exec * +exec(struct nv50_pc *pc) +{ + struct nv50_program_exec *e = CALLOC_STRUCT(nv50_program_exec); + + e->param.index = -1; + return e; +} + +static void +emit(struct nv50_pc *pc, struct nv50_program_exec *e) +{ + struct nv50_program *p = pc->p; + + if (p->exec_tail) + p->exec_tail->next = e; + if (!p->exec_head) + p->exec_head = e; + p->exec_tail = e; + p->exec_size += (e->inst[0] & 1) ? 2 : 1; +} + +static INLINE void set_long(struct nv50_pc *, struct nv50_program_exec *); + +static boolean +is_long(struct nv50_program_exec *e) +{ + if (e->inst[0] & 1) + return TRUE; + return FALSE; +} + +static boolean +is_immd(struct nv50_program_exec *e) +{ + if (is_long(e) && (e->inst[1] & 3) == 3) + return TRUE; + return FALSE; +} + +static INLINE void +set_pred(struct nv50_pc *pc, unsigned pred, unsigned idx, + struct nv50_program_exec *e) +{ + set_long(pc, e); + e->inst[1] &= ~((0x1f << 7) | (0x3 << 12)); + e->inst[1] |= (pred << 7) | (idx << 12); +} + +static INLINE void +set_pred_wr(struct nv50_pc *pc, unsigned on, unsigned idx, + struct nv50_program_exec *e) +{ + set_long(pc, e); + e->inst[1] &= ~((0x3 << 4) | (1 << 6)); + e->inst[1] |= (idx << 4) | (on << 6); +} + +static INLINE void +set_long(struct nv50_pc *pc, struct nv50_program_exec *e) +{ + if (is_long(e)) + return; + + e->inst[0] |= 1; + set_pred(pc, 0xf, 0, e); + set_pred_wr(pc, 0, 0, e); +} + +static INLINE void +set_dst(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_program_exec *e) +{ + if (dst->type == P_RESULT) { + set_long(pc, e); + e->inst[1] |= 0x00000008; + } + + alloc_reg(pc, dst); + e->inst[0] |= (dst->hw << 2); +} + +static INLINE void +set_immd(struct nv50_pc *pc, struct nv50_reg *imm, struct nv50_program_exec *e) +{ + unsigned val = fui(pc->immd_buf[imm->hw]); /* XXX */ + + set_long(pc, e); + /*XXX: can't be predicated - bits overlap.. catch cases where both + * are required and avoid them. */ + set_pred(pc, 0, 0, e); + set_pred_wr(pc, 0, 0, e); + + e->inst[1] |= 0x00000002 | 0x00000001; + e->inst[0] |= (val & 0x3f) << 16; + e->inst[1] |= (val >> 6) << 2; +} + +static void +emit_interp(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src, struct nv50_reg *iv) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] |= 0x80000000; + set_dst(pc, dst, e); + alloc_reg(pc, src); + e->inst[0] |= (src->hw << 16); + if (iv) { + e->inst[0] |= (1 << 25); + alloc_reg(pc, iv); + e->inst[0] |= (iv->hw << 9); + } + + emit(pc, e); +} + +static void +set_data(struct nv50_pc *pc, struct nv50_reg *src, unsigned m, unsigned s, + struct nv50_program_exec *e) +{ + set_long(pc, e); +#if 1 + e->inst[1] |= (1 << 22); +#else + if (src->type == P_IMMD) { + e->inst[1] |= (NV50_CB_PMISC << 22); + } else { + if (pc->p->type == PIPE_SHADER_VERTEX) + e->inst[1] |= (NV50_CB_PVP << 22); + else + e->inst[1] |= (NV50_CB_PFP << 22); + } +#endif + + e->param.index = src->hw; + e->param.shift = s; + e->param.mask = m << (s % 32); +} + +static void +emit_mov(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] |= 0x10000000; + + set_dst(pc, dst, e); + + if (0 && dst->type != P_RESULT && src->type == P_IMMD) { + set_immd(pc, src, e); + /*XXX: 32-bit, but steals part of "half" reg space - need to + * catch and handle this case if/when we do half-regs + */ + e->inst[0] |= 0x00008000; + } else + if (src->type == P_IMMD || src->type == P_CONST) { + set_long(pc, e); + set_data(pc, src, 0x7f, 9, e); + e->inst[1] |= 0x20000000; /* src0 const? */ + } else { + if (src->type == P_ATTR) { + set_long(pc, e); + e->inst[1] |= 0x00200000; + } + + alloc_reg(pc, src); + e->inst[0] |= (src->hw << 9); + } + + /* We really should support "half" instructions here at some point, + * but I don't feel confident enough about them yet. + */ + set_long(pc, e); + if (is_long(e) && !is_immd(e)) { + e->inst[1] |= 0x04000000; /* 32-bit */ + e->inst[1] |= 0x0003c000; /* "subsubop" 0xf == mov */ + } + + emit(pc, e); +} + +static boolean +check_swap_src_0_1(struct nv50_pc *pc, + struct nv50_reg **s0, struct nv50_reg **s1) +{ + struct nv50_reg *src0 = *s0, *src1 = *s1; + + if (src0->type == P_CONST) { + if (src1->type != P_CONST) { + *s0 = src1; + *s1 = src0; + return TRUE; + } + } else + if (src1->type == P_ATTR) { + if (src0->type != P_ATTR) { + *s0 = src1; + *s1 = src0; + return TRUE; + } + } + + return FALSE; +} + +static void +set_src_0(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) +{ + if (src->type == P_ATTR) { + set_long(pc, e); + e->inst[1] |= 0x00200000; + } else + if (src->type == P_CONST || src->type == P_IMMD) { + struct nv50_reg *temp = temp_temp(pc); + + emit_mov(pc, temp, src); + src = temp; + } + + alloc_reg(pc, src); + e->inst[0] |= (src->hw << 9); +} + +static void +set_src_1(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) +{ + if (src->type == P_ATTR) { + struct nv50_reg *temp = temp_temp(pc); + + emit_mov(pc, temp, src); + src = temp; + } else + if (src->type == P_CONST || src->type == P_IMMD) { + assert(!(e->inst[0] & 0x00800000)); + if (e->inst[0] & 0x01000000) { + struct nv50_reg *temp = temp_temp(pc); + + emit_mov(pc, temp, src); + src = temp; + } else { + set_data(pc, src, 0x7f, 16, e); + e->inst[0] |= 0x00800000; + } + } + + alloc_reg(pc, src); + e->inst[0] |= (src->hw << 16); +} + +static void +set_src_2(struct nv50_pc *pc, struct nv50_reg *src, struct nv50_program_exec *e) +{ + set_long(pc, e); + + if (src->type == P_ATTR) { + struct nv50_reg *temp = temp_temp(pc); + + emit_mov(pc, temp, src); + src = temp; + } else + if (src->type == P_CONST || src->type == P_IMMD) { + assert(!(e->inst[0] & 0x01000000)); + if (e->inst[0] & 0x00800000) { + struct nv50_reg *temp = temp_temp(pc); + + emit_mov(pc, temp, src); + src = temp; + } else { + set_data(pc, src, 0x7f, 32+14, e); + e->inst[0] |= 0x01000000; + } + } + + alloc_reg(pc, src); + e->inst[1] |= (src->hw << 14); +} + +static void +emit_mul(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, + struct nv50_reg *src1) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] |= 0xc0000000; + set_long(pc, e); + + check_swap_src_0_1(pc, &src0, &src1); + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + set_src_1(pc, src1, e); + + emit(pc, e); +} + +static void +emit_add(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *src0, struct nv50_reg *src1) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] |= 0xb0000000; + + check_swap_src_0_1(pc, &src0, &src1); + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + if (is_long(e)) + set_src_2(pc, src1, e); + else + set_src_1(pc, src1, e); + + emit(pc, e); +} + +static void +emit_minmax(struct nv50_pc *pc, unsigned sub, struct nv50_reg *dst, + struct nv50_reg *src0, struct nv50_reg *src1) +{ + struct nv50_program_exec *e = exec(pc); + + set_long(pc, e); + e->inst[0] |= 0xb0000000; + e->inst[1] |= (sub << 29); + + check_swap_src_0_1(pc, &src0, &src1); + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + set_src_1(pc, src1, e); + + emit(pc, e); +} + +static void +emit_sub(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, + struct nv50_reg *src1) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] |= 0xb0000000; + + set_long(pc, e); + if (check_swap_src_0_1(pc, &src0, &src1)) + e->inst[1] |= 0x04000000; + else + e->inst[1] |= 0x08000000; + + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + set_src_2(pc, src1, e); + + emit(pc, e); +} + +static void +emit_mad(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, + struct nv50_reg *src1, struct nv50_reg *src2) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] |= 0xe0000000; + + check_swap_src_0_1(pc, &src0, &src1); + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + set_src_1(pc, src1, e); + set_src_2(pc, src2, e); + + emit(pc, e); +} + +static void +emit_msb(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src0, + struct nv50_reg *src1, struct nv50_reg *src2) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] |= 0xe0000000; + set_long(pc, e); + e->inst[1] |= 0x08000000; /* src0 * src1 - src2 */ + + check_swap_src_0_1(pc, &src0, &src1); + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + set_src_1(pc, src1, e); + set_src_2(pc, src2, e); + + emit(pc, e); +} + +static void +emit_flop(struct nv50_pc *pc, unsigned sub, + struct nv50_reg *dst, struct nv50_reg *src) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] |= 0x90000000; + if (sub) { + set_long(pc, e); + e->inst[1] |= (sub << 29); + } + + set_dst(pc, dst, e); + set_src_0(pc, src, e); + + emit(pc, e); +} + +static void +emit_preex2(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] |= 0xb0000000; + + set_dst(pc, dst, e); + set_src_0(pc, src, e); + set_long(pc, e); + e->inst[1] |= (6 << 29) | 0x00004000; + + emit(pc, e); +} + +static void +emit_precossin(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] |= 0xb0000000; + + set_dst(pc, dst, e); + set_src_0(pc, src, e); + set_long(pc, e); + e->inst[1] |= (6 << 29); + + emit(pc, e); +} + +static void +emit_set(struct nv50_pc *pc, unsigned c_op, struct nv50_reg *dst, + struct nv50_reg *src0, struct nv50_reg *src1) +{ + struct nv50_program_exec *e = exec(pc); + unsigned inv_cop[8] = { 0, 4, 2, 6, 1, 5, 3, 7 }; + struct nv50_reg *rdst; + + assert(c_op <= 7); + if (check_swap_src_0_1(pc, &src0, &src1)) + c_op = inv_cop[c_op]; + + rdst = dst; + if (dst->type != P_TEMP) + dst = alloc_temp(pc, NULL); + + /* set.u32 */ + set_long(pc, e); + e->inst[0] |= 0xb0000000; + e->inst[1] |= (3 << 29); + e->inst[1] |= (c_op << 14); + /*XXX: breaks things, .u32 by default? + * decuda will disasm as .u16 and use .lo/.hi regs, but this + * doesn't seem to match what the hw actually does. + inst[1] |= 0x04000000; << breaks things.. .u32 by default? + */ + set_dst(pc, dst, e); + set_src_0(pc, src0, e); + set_src_1(pc, src1, e); + emit(pc, e); + + /* cvt.f32.u32 */ + e = exec(pc); + e->inst[0] = 0xa0000001; + e->inst[1] = 0x64014780; + set_dst(pc, rdst, e); + set_src_0(pc, dst, e); + emit(pc, e); + + if (dst != rdst) + free_temp(pc, dst); +} + +static void +emit_flr(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0xa0000000; /* cvt */ + set_long(pc, e); + e->inst[1] |= (6 << 29); /* cvt */ + e->inst[1] |= 0x08000000; /* integer mode */ + e->inst[1] |= 0x04000000; /* 32 bit */ + e->inst[1] |= ((0x1 << 3)) << 14; /* .rn */ + e->inst[1] |= (1 << 14); /* src .f32 */ + set_dst(pc, dst, e); + set_src_0(pc, src, e); + + emit(pc, e); +} + +static void +emit_pow(struct nv50_pc *pc, struct nv50_reg *dst, + struct nv50_reg *v, struct nv50_reg *e) +{ + struct nv50_reg *temp = alloc_temp(pc, NULL); + + emit_flop(pc, 3, temp, v); + emit_mul(pc, temp, temp, e); + emit_preex2(pc, temp, temp); + emit_flop(pc, 6, dst, temp); + + free_temp(pc, temp); +} + +static void +emit_abs(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + struct nv50_program_exec *e = exec(pc); + + e->inst[0] = 0xa0000000; /* cvt */ + set_long(pc, e); + e->inst[1] |= (6 << 29); /* cvt */ + e->inst[1] |= 0x04000000; /* 32 bit */ + e->inst[1] |= (1 << 14); /* src .f32 */ + e->inst[1] |= ((1 << 6) << 14); /* .abs */ + set_dst(pc, dst, e); + set_src_0(pc, src, e); + + emit(pc, e); +} + +static void +emit_lit(struct nv50_pc *pc, struct nv50_reg **dst, unsigned mask, + struct nv50_reg **src) +{ + struct nv50_reg *one = alloc_immd(pc, 1.0); + struct nv50_reg *zero = alloc_immd(pc, 0.0); + struct nv50_reg *neg128 = alloc_immd(pc, -127.999999); + struct nv50_reg *pos128 = alloc_immd(pc, 127.999999); + struct nv50_reg *tmp[4]; + + if (mask & (1 << 0)) + emit_mov(pc, dst[0], one); + + if (mask & (1 << 3)) + emit_mov(pc, dst[3], one); + + if (mask & (3 << 1)) { + if (mask & (1 << 1)) + tmp[0] = dst[1]; + else + tmp[0] = temp_temp(pc); + emit_minmax(pc, 4, tmp[0], src[0], zero); + } + + if (mask & (1 << 2)) { + set_pred_wr(pc, 1, 0, pc->p->exec_tail); + + tmp[1] = temp_temp(pc); + emit_minmax(pc, 4, tmp[1], src[1], zero); + + tmp[3] = temp_temp(pc); + emit_minmax(pc, 4, tmp[3], src[3], neg128); + emit_minmax(pc, 5, tmp[3], tmp[3], pos128); + + emit_pow(pc, dst[2], tmp[1], tmp[3]); + emit_mov(pc, dst[2], zero); + set_pred(pc, 3, 0, pc->p->exec_tail); + } +} + +static void +emit_neg(struct nv50_pc *pc, struct nv50_reg *dst, struct nv50_reg *src) +{ + struct nv50_program_exec *e = exec(pc); + + set_long(pc, e); + e->inst[0] |= 0xa0000000; /* delta */ + e->inst[1] |= (7 << 29); /* delta */ + e->inst[1] |= 0x04000000; /* negate arg0? probably not */ + e->inst[1] |= (1 << 14); /* src .f32 */ + set_dst(pc, dst, e); + set_src_0(pc, src, e); + + emit(pc, e); +} + +static void +emit_kil(struct nv50_pc *pc, struct nv50_reg *src) +{ + struct nv50_program_exec *e; + const int r_pred = 1; + + /* Sets predicate reg ? */ + e = exec(pc); + e->inst[0] = 0xa00001fd; + e->inst[1] = 0xc4014788; + set_src_0(pc, src, e); + set_pred_wr(pc, 1, r_pred, e); + emit(pc, e); + + /* This is probably KILP */ + e = exec(pc); + e->inst[0] = 0x000001fe; + set_long(pc, e); + set_pred(pc, 1 /* LT? */, r_pred, e); + emit(pc, e); +} + +static struct nv50_reg * +tgsi_dst(struct nv50_pc *pc, int c, const struct tgsi_full_dst_register *dst) +{ + switch (dst->DstRegister.File) { + case TGSI_FILE_TEMPORARY: + return &pc->temp[dst->DstRegister.Index * 4 + c]; + case TGSI_FILE_OUTPUT: + return &pc->result[dst->DstRegister.Index * 4 + c]; + case TGSI_FILE_NULL: + return NULL; + default: + break; + } + + return NULL; +} + +static struct nv50_reg * +tgsi_src(struct nv50_pc *pc, int chan, const struct tgsi_full_src_register *src) +{ + struct nv50_reg *r = NULL; + struct nv50_reg *temp; + unsigned c; + + c = tgsi_util_get_full_src_register_extswizzle(src, chan); + switch (c) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + switch (src->SrcRegister.File) { + case TGSI_FILE_INPUT: + r = &pc->attr[src->SrcRegister.Index * 4 + c]; + break; + case TGSI_FILE_TEMPORARY: + r = &pc->temp[src->SrcRegister.Index * 4 + c]; + break; + case TGSI_FILE_CONSTANT: + r = &pc->param[src->SrcRegister.Index * 4 + c]; + break; + case TGSI_FILE_IMMEDIATE: + r = &pc->immd[src->SrcRegister.Index * 4 + c]; + break; + case TGSI_FILE_SAMPLER: + break; + default: + assert(0); + break; + } + break; + case TGSI_EXTSWIZZLE_ZERO: + r = alloc_immd(pc, 0.0); + break; + case TGSI_EXTSWIZZLE_ONE: + r = alloc_immd(pc, 1.0); + break; + default: + assert(0); + break; + } + + switch (tgsi_util_get_full_src_register_sign_mode(src, chan)) { + case TGSI_UTIL_SIGN_KEEP: + break; + case TGSI_UTIL_SIGN_CLEAR: + temp = temp_temp(pc); + emit_abs(pc, temp, r); + r = temp; + break; + case TGSI_UTIL_SIGN_TOGGLE: + temp = temp_temp(pc); + emit_neg(pc, temp, r); + r = temp; + break; + case TGSI_UTIL_SIGN_SET: + temp = temp_temp(pc); + emit_abs(pc, temp, r); + emit_neg(pc, temp, r); + r = temp; + break; + default: + assert(0); + break; + } + + return r; +} + +static boolean +nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) +{ + const struct tgsi_full_instruction *inst = &tok->FullInstruction; + struct nv50_reg *rdst[4], *dst[4], *src[3][4], *temp; + unsigned mask, sat, unit; + int i, c; + + mask = inst->FullDstRegisters[0].DstRegister.WriteMask; + sat = inst->Instruction.Saturate == TGSI_SAT_ZERO_ONE; + + for (c = 0; c < 4; c++) { + if (mask & (1 << c)) + dst[c] = tgsi_dst(pc, c, &inst->FullDstRegisters[0]); + else + dst[c] = NULL; + } + + for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *fs = &inst->FullSrcRegisters[i]; + + if (fs->SrcRegister.File == TGSI_FILE_SAMPLER) + unit = fs->SrcRegister.Index; + + for (c = 0; c < 4; c++) + src[i][c] = tgsi_src(pc, c, fs); + } + + if (sat) { + for (c = 0; c < 4; c++) { + rdst[c] = dst[c]; + dst[c] = temp_temp(pc); + } + } + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_abs(pc, dst[c], src[0][c]); + } + break; + case TGSI_OPCODE_ADD: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_add(pc, dst[c], src[0][c], src[1][c]); + } + break; + case TGSI_OPCODE_COS: + temp = alloc_temp(pc, NULL); + emit_precossin(pc, temp, src[0][0]); + emit_flop(pc, 5, temp, temp); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mov(pc, dst[c], temp); + } + break; + case TGSI_OPCODE_DP3: + temp = alloc_temp(pc, NULL); + emit_mul(pc, temp, src[0][0], src[1][0]); + emit_mad(pc, temp, src[0][1], src[1][1], temp); + emit_mad(pc, temp, src[0][2], src[1][2], temp); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mov(pc, dst[c], temp); + } + free_temp(pc, temp); + break; + case TGSI_OPCODE_DP4: + temp = alloc_temp(pc, NULL); + emit_mul(pc, temp, src[0][0], src[1][0]); + emit_mad(pc, temp, src[0][1], src[1][1], temp); + emit_mad(pc, temp, src[0][2], src[1][2], temp); + emit_mad(pc, temp, src[0][3], src[1][3], temp); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mov(pc, dst[c], temp); + } + free_temp(pc, temp); + break; + case TGSI_OPCODE_DPH: + temp = alloc_temp(pc, NULL); + emit_mul(pc, temp, src[0][0], src[1][0]); + emit_mad(pc, temp, src[0][1], src[1][1], temp); + emit_mad(pc, temp, src[0][2], src[1][2], temp); + emit_add(pc, temp, src[1][3], temp); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mov(pc, dst[c], temp); + } + free_temp(pc, temp); + break; + case TGSI_OPCODE_DST: + { + struct nv50_reg *one = alloc_immd(pc, 1.0); + if (mask & (1 << 0)) + emit_mov(pc, dst[0], one); + if (mask & (1 << 1)) + emit_mul(pc, dst[1], src[0][1], src[1][1]); + if (mask & (1 << 2)) + emit_mov(pc, dst[2], src[0][2]); + if (mask & (1 << 3)) + emit_mov(pc, dst[3], src[1][3]); + FREE(one); + } + break; + case TGSI_OPCODE_EX2: + temp = alloc_temp(pc, NULL); + emit_preex2(pc, temp, src[0][0]); + emit_flop(pc, 6, temp, temp); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mov(pc, dst[c], temp); + } + free_temp(pc, temp); + break; + case TGSI_OPCODE_FLR: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_flr(pc, dst[c], src[0][c]); + } + break; + case TGSI_OPCODE_FRC: + temp = alloc_temp(pc, NULL); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_flr(pc, temp, src[0][c]); + emit_sub(pc, dst[c], src[0][c], temp); + } + free_temp(pc, temp); + break; + case TGSI_OPCODE_KIL: + emit_kil(pc, src[0][0]); + emit_kil(pc, src[0][1]); + emit_kil(pc, src[0][2]); + emit_kil(pc, src[0][3]); + break; + case TGSI_OPCODE_LIT: + emit_lit(pc, &dst[0], mask, &src[0][0]); + break; + case TGSI_OPCODE_LG2: + temp = alloc_temp(pc, NULL); + emit_flop(pc, 3, temp, src[0][0]); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mov(pc, dst[c], temp); + } + break; + case TGSI_OPCODE_LRP: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + /*XXX: we can do better than this */ + temp = alloc_temp(pc, NULL); + emit_neg(pc, temp, src[0][c]); + emit_mad(pc, temp, temp, src[2][c], src[2][c]); + emit_mad(pc, dst[c], src[0][c], src[1][c], temp); + free_temp(pc, temp); + } + break; + case TGSI_OPCODE_MAD: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mad(pc, dst[c], src[0][c], src[1][c], src[2][c]); + } + break; + case TGSI_OPCODE_MAX: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_minmax(pc, 4, dst[c], src[0][c], src[1][c]); + } + break; + case TGSI_OPCODE_MIN: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_minmax(pc, 5, dst[c], src[0][c], src[1][c]); + } + break; + case TGSI_OPCODE_MOV: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mov(pc, dst[c], src[0][c]); + } + break; + case TGSI_OPCODE_MUL: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mul(pc, dst[c], src[0][c], src[1][c]); + } + break; + case TGSI_OPCODE_POW: + temp = alloc_temp(pc, NULL); + emit_pow(pc, temp, src[0][0], src[1][0]); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mov(pc, dst[c], temp); + } + free_temp(pc, temp); + break; + case TGSI_OPCODE_RCP: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_flop(pc, 0, dst[c], src[0][0]); + } + break; + case TGSI_OPCODE_RSQ: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_flop(pc, 2, dst[c], src[0][0]); + } + break; + case TGSI_OPCODE_SCS: + temp = alloc_temp(pc, NULL); + emit_precossin(pc, temp, src[0][0]); + if (mask & (1 << 0)) + emit_flop(pc, 5, dst[0], temp); + if (mask & (1 << 1)) + emit_flop(pc, 4, dst[1], temp); + break; + case TGSI_OPCODE_SGE: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_set(pc, 6, dst[c], src[0][c], src[1][c]); + } + break; + case TGSI_OPCODE_SIN: + temp = alloc_temp(pc, NULL); + emit_precossin(pc, temp, src[0][0]); + emit_flop(pc, 4, temp, temp); + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_mov(pc, dst[c], temp); + } + break; + case TGSI_OPCODE_SLT: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_set(pc, 1, dst[c], src[0][c], src[1][c]); + } + break; + case TGSI_OPCODE_SUB: + for (c = 0; c < 4; c++) { + if (!(mask & (1 << c))) + continue; + emit_sub(pc, dst[c], src[0][c], src[1][c]); + } + break; + case TGSI_OPCODE_TEX: + case TGSI_OPCODE_TXP: + { + struct nv50_reg *t[4]; + struct nv50_program_exec *e; + + alloc_temp4(pc, t, 0); + emit_mov(pc, t[0], src[0][0]); + emit_mov(pc, t[1], src[0][1]); + + e = exec(pc); + e->inst[0] = 0xf6400000; + e->inst[0] |= (unit << 9); + set_long(pc, e); + e->inst[1] |= 0x0000c004; + set_dst(pc, t[0], e); + emit(pc, e); + + if (mask & (1 << 0)) emit_mov(pc, dst[0], t[0]); + if (mask & (1 << 1)) emit_mov(pc, dst[1], t[1]); + if (mask & (1 << 2)) emit_mov(pc, dst[2], t[2]); + if (mask & (1 << 3)) emit_mov(pc, dst[3], t[3]); + + free_temp4(pc, t); + } + break; + case TGSI_OPCODE_XPD: + temp = alloc_temp(pc, NULL); + if (mask & (1 << 0)) { + emit_mul(pc, temp, src[0][2], src[1][1]); + emit_msb(pc, dst[0], src[0][1], src[1][2], temp); + } + if (mask & (1 << 1)) { + emit_mul(pc, temp, src[0][0], src[1][2]); + emit_msb(pc, dst[1], src[0][2], src[1][0], temp); + } + if (mask & (1 << 2)) { + emit_mul(pc, temp, src[0][1], src[1][0]); + emit_msb(pc, dst[2], src[0][0], src[1][1], temp); + } + free_temp(pc, temp); + break; + case TGSI_OPCODE_END: + break; + default: + NOUVEAU_ERR("invalid opcode %d\n", inst->Instruction.Opcode); + return FALSE; + } + + if (sat) { + for (c = 0; c < 4; c++) { + struct nv50_program_exec *e; + + if (!(mask & (1 << c))) + continue; + e = exec(pc); + + e->inst[0] = 0xa0000000; /* cvt */ + set_long(pc, e); + e->inst[1] |= (6 << 29); /* cvt */ + e->inst[1] |= 0x04000000; /* 32 bit */ + e->inst[1] |= (1 << 14); /* src .f32 */ + e->inst[1] |= ((1 << 5) << 14); /* .sat */ + set_dst(pc, rdst[c], e); + set_src_0(pc, dst[c], e); + emit(pc, e); + } + } + + kill_temp_temp(pc); + return TRUE; +} + +static boolean +nv50_program_tx_prep(struct nv50_pc *pc) +{ + struct tgsi_parse_context p; + boolean ret = FALSE; + unsigned i, c; + + tgsi_parse_init(&p, pc->p->pipe.tokens); + while (!tgsi_parse_end_of_tokens(&p)) { + const union tgsi_full_token *tok = &p.FullToken; + + tgsi_parse_token(&p); + switch (tok->Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + const struct tgsi_full_immediate *imm = + &p.FullToken.FullImmediate; + + ctor_immd(pc, imm->u.ImmediateFloat32[0].Float, + imm->u.ImmediateFloat32[1].Float, + imm->u.ImmediateFloat32[2].Float, + imm->u.ImmediateFloat32[3].Float); + } + break; + case TGSI_TOKEN_TYPE_DECLARATION: + { + const struct tgsi_full_declaration *d; + unsigned last; + + d = &p.FullToken.FullDeclaration; + last = d->DeclarationRange.Last; + + switch (d->Declaration.File) { + case TGSI_FILE_TEMPORARY: + if (pc->temp_nr < (last + 1)) + pc->temp_nr = last + 1; + break; + case TGSI_FILE_OUTPUT: + if (pc->result_nr < (last + 1)) + pc->result_nr = last + 1; + break; + case TGSI_FILE_INPUT: + if (pc->attr_nr < (last + 1)) + pc->attr_nr = last + 1; + break; + case TGSI_FILE_CONSTANT: + if (pc->param_nr < (last + 1)) + pc->param_nr = last + 1; + break; + case TGSI_FILE_SAMPLER: + break; + default: + NOUVEAU_ERR("bad decl file %d\n", + d->Declaration.File); + goto out_err; + } + } + break; + case TGSI_TOKEN_TYPE_INSTRUCTION: + break; + default: + break; + } + } + + if (pc->temp_nr) { + pc->temp = CALLOC(pc->temp_nr * 4, sizeof(struct nv50_reg)); + if (!pc->temp) + goto out_err; + + for (i = 0; i < pc->temp_nr; i++) { + for (c = 0; c < 4; c++) { + pc->temp[i*4+c].type = P_TEMP; + pc->temp[i*4+c].hw = -1; + pc->temp[i*4+c].index = i; + } + } + } + + if (pc->attr_nr) { + struct nv50_reg *iv = NULL; + int aid = 0; + + pc->attr = CALLOC(pc->attr_nr * 4, sizeof(struct nv50_reg)); + if (!pc->attr) + goto out_err; + + if (pc->p->type == PIPE_SHADER_FRAGMENT) { + iv = alloc_temp(pc, NULL); + emit_interp(pc, iv, iv, NULL); + emit_flop(pc, 0, iv, iv); + aid++; + } + + for (i = 0; i < pc->attr_nr; i++) { + struct nv50_reg *a = &pc->attr[i*4]; + + for (c = 0; c < 4; c++) { + if (pc->p->type == PIPE_SHADER_FRAGMENT) { + struct nv50_reg *at = + alloc_temp(pc, NULL); + pc->attr[i*4+c].type = at->type; + pc->attr[i*4+c].hw = at->hw; + pc->attr[i*4+c].index = at->index; + } else { + pc->p->cfg.vp.attr[aid/32] |= + (1 << (aid % 32)); + pc->attr[i*4+c].type = P_ATTR; + pc->attr[i*4+c].hw = aid++; + pc->attr[i*4+c].index = i; + } + } + + if (pc->p->type != PIPE_SHADER_FRAGMENT) + continue; + + emit_interp(pc, &a[0], &a[0], iv); + emit_interp(pc, &a[1], &a[1], iv); + emit_interp(pc, &a[2], &a[2], iv); + emit_interp(pc, &a[3], &a[3], iv); + } + + if (iv) + free_temp(pc, iv); + } + + if (pc->result_nr) { + int rid = 0; + + pc->result = CALLOC(pc->result_nr * 4, sizeof(struct nv50_reg)); + if (!pc->result) + goto out_err; + + for (i = 0; i < pc->result_nr; i++) { + for (c = 0; c < 4; c++) { + if (pc->p->type == PIPE_SHADER_FRAGMENT) { + pc->result[i*4+c].type = P_TEMP; + pc->result[i*4+c].hw = -1; + } else { + pc->result[i*4+c].type = P_RESULT; + pc->result[i*4+c].hw = rid++; + } + pc->result[i*4+c].index = i; + } + } + } + + if (pc->param_nr) { + int rid = 0; + + pc->param = CALLOC(pc->param_nr * 4, sizeof(struct nv50_reg)); + if (!pc->param) + goto out_err; + + for (i = 0; i < pc->param_nr; i++) { + for (c = 0; c < 4; c++) { + pc->param[i*4+c].type = P_CONST; + pc->param[i*4+c].hw = rid++; + pc->param[i*4+c].index = i; + } + } + } + + if (pc->immd_nr) { + int rid = pc->param_nr * 4; + + pc->immd = CALLOC(pc->immd_nr * 4, sizeof(struct nv50_reg)); + if (!pc->immd) + goto out_err; + + for (i = 0; i < pc->immd_nr; i++) { + for (c = 0; c < 4; c++) { + pc->immd[i*4+c].type = P_IMMD; + pc->immd[i*4+c].hw = rid++; + pc->immd[i*4+c].index = i; + } + } + } + + ret = TRUE; +out_err: + tgsi_parse_free(&p); + return ret; +} + +static boolean +nv50_program_tx(struct nv50_program *p) +{ + struct tgsi_parse_context parse; + struct nv50_pc *pc; + boolean ret; + + pc = CALLOC_STRUCT(nv50_pc); + if (!pc) + return FALSE; + pc->p = p; + pc->p->cfg.high_temp = 4; + + ret = nv50_program_tx_prep(pc); + if (ret == FALSE) + goto out_cleanup; + + tgsi_parse_init(&parse, pc->p->pipe.tokens); + while (!tgsi_parse_end_of_tokens(&parse)) { + const union tgsi_full_token *tok = &parse.FullToken; + + tgsi_parse_token(&parse); + + switch (tok->Token.Type) { + case TGSI_TOKEN_TYPE_INSTRUCTION: + ret = nv50_program_tx_insn(pc, tok); + if (ret == FALSE) + goto out_err; + break; + default: + break; + } + } + + if (p->type == PIPE_SHADER_FRAGMENT) { + struct nv50_reg out; + + out.type = P_TEMP; + for (out.hw = 0; out.hw < pc->result_nr * 4; out.hw++) + emit_mov(pc, &out, &pc->result[out.hw]); + } + + assert(is_long(pc->p->exec_tail) && !is_immd(pc->p->exec_head)); + pc->p->exec_tail->inst[1] |= 0x00000001; + + p->param_nr = pc->param_nr * 4; + p->immd_nr = pc->immd_nr * 4; + p->immd = pc->immd_buf; + +out_err: + tgsi_parse_free(&parse); + +out_cleanup: + return ret; +} + +static void +nv50_program_validate(struct nv50_context *nv50, struct nv50_program *p) +{ + if (nv50_program_tx(p) == FALSE) + assert(0); + p->translated = TRUE; +} + +static void +nv50_program_upload_data(struct nv50_context *nv50, float *map, + unsigned start, unsigned count) +{ + struct nouveau_channel *chan = nv50->screen->nvws->channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; + + while (count) { + unsigned nr = count > 2047 ? 2047 : count; + + BEGIN_RING(chan, tesla, 0x00000f00, 1); + OUT_RING (chan, (NV50_CB_PMISC << 0) | (start << 8)); + BEGIN_RING(chan, tesla, 0x40000f04, nr); + OUT_RINGp (chan, map, nr); + + map += nr; + start += nr; + count -= nr; + } +} + +static void +nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) +{ + struct nouveau_winsys *nvws = nv50->screen->nvws; + struct pipe_winsys *ws = nv50->pipe.winsys; + unsigned nr = p->param_nr + p->immd_nr; + + if (!p->data && nr) { + struct nouveau_resource *heap = nv50->screen->vp_data_heap; + + if (nvws->res_alloc(heap, nr, p, &p->data)) { + while (heap->next && heap->size < nr) { + struct nv50_program *evict = heap->next->priv; + nvws->res_free(&evict->data); + } + + if (nvws->res_alloc(heap, nr, p, &p->data)) + assert(0); + } + } + + if (p->param_nr) { + float *map = ws->buffer_map(ws, nv50->constbuf[p->type], + PIPE_BUFFER_USAGE_CPU_READ); + nv50_program_upload_data(nv50, map, p->data->start, + p->param_nr); + ws->buffer_unmap(ws, nv50->constbuf[p->type]); + } + + if (p->immd_nr) { + nv50_program_upload_data(nv50, p->immd, + p->data->start + p->param_nr, + p->immd_nr); + } +} + +static void +nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) +{ + struct nouveau_channel *chan = nv50->screen->nvws->channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct pipe_winsys *ws = nv50->pipe.winsys; + struct nv50_program_exec *e; + struct nouveau_stateobj *so; + const unsigned flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_WR; + unsigned start, count, *up, *ptr; + boolean upload = FALSE; + + if (!p->buffer) { + p->buffer = ws->buffer_create(ws, 0x100, 0, p->exec_size * 4); + upload = TRUE; + } + + if (p->data && p->data->start != p->data_start) { + for (e = p->exec_head; e; e = e->next) { + unsigned ei, ci; + + if (e->param.index < 0) + continue; + ei = e->param.shift >> 5; + ci = e->param.index + p->data->start; + + e->inst[ei] &= ~e->param.mask; + e->inst[ei] |= (ci << e->param.shift); + } + + p->data_start = p->data->start; + upload = TRUE; + } + + if (!upload) + return; + +#ifdef NV50_PROGRAM_DUMP + NOUVEAU_ERR("-------\n"); + up = ptr = MALLOC(p->exec_size * 4); + for (e = p->exec_head; e; e = e->next) { + NOUVEAU_ERR("0x%08x\n", e->inst[0]); + if (is_long(e)) + NOUVEAU_ERR("0x%08x\n", e->inst[1]); + } + +#endif + + up = ptr = MALLOC(p->exec_size * 4); + for (e = p->exec_head; e; e = e->next) { + *(ptr++) = e->inst[0]; + if (is_long(e)) + *(ptr++) = e->inst[1]; + } + + so = so_new(4,2); + so_method(so, nv50->screen->tesla, 0x1280, 3); + so_reloc (so, p->buffer, 0, flags | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, p->buffer, 0, flags | NOUVEAU_BO_LOW, 0, 0); + so_data (so, (NV50_CB_PUPLOAD << 16) | 0x0800); //(p->exec_size * 4)); + + start = 0; count = p->exec_size; + while (count) { + struct nouveau_winsys *nvws = nv50->screen->nvws; + unsigned nr; + + so_emit(nvws, so); + + nr = MIN2(count, 2047); + nr = MIN2(nvws->channel->pushbuf->remaining, nr); + if (nvws->channel->pushbuf->remaining < (nr + 3)) { + FIRE_RING(chan); + continue; + } + + BEGIN_RING(chan, tesla, 0x0f00, 1); + OUT_RING (chan, (start << 8) | NV50_CB_PUPLOAD); + BEGIN_RING(chan, tesla, 0x40000f04, nr); + OUT_RINGp (chan, up + start, nr); + + start += nr; + count -= nr; + } + + FREE(up); + so_ref(NULL, &so); +} + +void +nv50_vertprog_validate(struct nv50_context *nv50) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nv50_program *p = nv50->vertprog; + struct nouveau_stateobj *so; + + if (!p->translated) { + nv50_program_validate(nv50, p); + if (!p->translated) + assert(0); + } + + nv50_program_validate_data(nv50, p); + nv50_program_validate_code(nv50, p); + + so = so_new(13, 2); + so_method(so, tesla, NV50TCL_VP_ADDRESS_HIGH, 2); + so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | + NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | + NOUVEAU_BO_LOW, 0, 0); + so_method(so, tesla, 0x1650, 2); + so_data (so, p->cfg.vp.attr[0]); + so_data (so, p->cfg.vp.attr[1]); + so_method(so, tesla, 0x16b8, 1); + so_data (so, p->cfg.high_result); + so_method(so, tesla, 0x16ac, 2); + so_data (so, p->cfg.high_result); //8); + so_data (so, p->cfg.high_temp); + so_method(so, tesla, 0x140c, 1); + so_data (so, 0); /* program start offset */ + so_ref(so, &nv50->state.vertprog); +} + +void +nv50_fragprog_validate(struct nv50_context *nv50) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nv50_program *p = nv50->fragprog; + struct nouveau_stateobj *so; + + if (!p->translated) { + nv50_program_validate(nv50, p); + if (!p->translated) + assert(0); + } + + nv50_program_validate_data(nv50, p); + nv50_program_validate_code(nv50, p); + + so = so_new(64, 2); + so_method(so, tesla, NV50TCL_FP_ADDRESS_HIGH, 2); + so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | + NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, p->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | + NOUVEAU_BO_LOW, 0, 0); + so_method(so, tesla, 0x1904, 4); + so_data (so, 0x00040404); /* p: 0x01000404 */ + so_data (so, 0x00000004); + so_data (so, 0x00000000); + so_data (so, 0x00000000); + so_method(so, tesla, 0x16bc, 3); /*XXX: fixme */ + so_data (so, 0x03020100); + so_data (so, 0x07060504); + so_data (so, 0x0b0a0908); + so_method(so, tesla, 0x1988, 2); + so_data (so, 0x08080408); //0x08040404); /* p: 0x0f000401 */ + so_data (so, p->cfg.high_temp); + so_method(so, tesla, 0x1414, 1); + so_data (so, 0); /* program start offset */ + so_ref(so, &nv50->state.fragprog); +} + +void +nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) +{ + struct pipe_screen *pscreen = nv50->pipe.screen; + + while (p->exec_head) { + struct nv50_program_exec *e = p->exec_head; + + p->exec_head = e->next; + FREE(e); + } + p->exec_tail = NULL; + p->exec_size = 0; + + if (p->buffer) + pipe_buffer_reference(pscreen, &p->buffer, NULL); + + nv50->screen->nvws->res_free(&p->data); + + p->translated = 0; +} + diff --git a/src/gallium/drivers/nv50/nv50_program.h b/src/gallium/drivers/nv50/nv50_program.h new file mode 100644 index 0000000000..78deed6a38 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_program.h @@ -0,0 +1,45 @@ +#ifndef __NV50_PROGRAM_H__ +#define __NV50_PROGRAM_H__ + +#include "pipe/p_state.h" +#include "tgsi/tgsi_scan.h" + +struct nv50_program_exec { + struct nv50_program_exec *next; + + unsigned inst[2]; + struct { + int index; + unsigned mask; + unsigned shift; + } param; +}; + +struct nv50_program { + struct pipe_shader_state pipe; + struct tgsi_shader_info info; + boolean translated; + + unsigned type; + struct nv50_program_exec *exec_head; + struct nv50_program_exec *exec_tail; + unsigned exec_size; + struct nouveau_resource *data; + unsigned data_start; + + struct pipe_buffer *buffer; + + float *immd; + unsigned immd_nr; + unsigned param_nr; + + struct { + unsigned high_temp; + unsigned high_result; + struct { + unsigned attr[2]; + } vp; + } cfg; +}; + +#endif diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c new file mode 100644 index 0000000000..7c8831a46d --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_query.c @@ -0,0 +1,135 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_context.h" +#include "pipe/p_inlines.h" + +#include "nv50_context.h" + +struct nv50_query { + struct pipe_buffer *buffer; + unsigned type; + boolean ready; + uint64_t result; +}; + +static INLINE struct nv50_query * +nv50_query(struct pipe_query *pipe) +{ + return (struct nv50_query *)pipe; +} + +static struct pipe_query * +nv50_query_create(struct pipe_context *pipe, unsigned type) +{ + struct pipe_winsys *ws = pipe->winsys; + struct nv50_query *q = CALLOC_STRUCT(nv50_query); + + assert (q->type == PIPE_QUERY_OCCLUSION_COUNTER); + q->type = type; + + q->buffer = ws->buffer_create(ws, 256, 0, 16); + if (!q->buffer) { + FREE(q); + return NULL; + } + + return (struct pipe_query *)q; +} + +static void +nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nv50_query *q = nv50_query(pq); + + if (q) { + pipe_buffer_reference(pipe->screen, &q->buffer, NULL); + FREE(q); + } +} + +static void +nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_channel *chan = nv50->screen->nvws->channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nv50_query *q = nv50_query(pq); + + BEGIN_RING(chan, tesla, 0x1530, 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, tesla, 0x1514, 1); + OUT_RING (chan, 1); + + q->ready = FALSE; +} + +static void +nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_channel *chan = nv50->screen->nvws->channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nv50_query *q = nv50_query(pq); + struct nouveau_bo *bo = nv50->screen->nvws->get_bo(q->buffer); + + WAIT_RING (chan, 5); + BEGIN_RING(chan, tesla, 0x1b00, 4); + OUT_RELOCh(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCl(chan, bo, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RING (chan, 0x00000000); + OUT_RING (chan, 0x0100f002); + FIRE_RING (chan); +} + +static boolean +nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, + boolean wait, uint64_t *result) +{ + struct pipe_winsys *ws = pipe->winsys; + struct nv50_query *q = nv50_query(pq); + + /*XXX: Want to be able to return FALSE here instead of blocking + * until the result is available.. + */ + + if (!q->ready) { + uint32_t *map = ws->buffer_map(ws, q->buffer, + PIPE_BUFFER_USAGE_CPU_READ); + q->result = map[1]; + q->ready = TRUE; + ws->buffer_unmap(ws, q->buffer); + } + + *result = q->result; + return q->ready; +} + +void +nv50_init_query_functions(struct nv50_context *nv50) +{ + nv50->pipe.create_query = nv50_query_create; + nv50->pipe.destroy_query = nv50_query_destroy; + nv50->pipe.begin_query = nv50_query_begin; + nv50->pipe.end_query = nv50_query_end; + nv50->pipe.get_query_result = nv50_query_result; +} diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c new file mode 100644 index 0000000000..ee24405d36 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -0,0 +1,373 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_screen.h" + +#include "util/u_simple_screen.h" + +#include "nv50_context.h" +#include "nv50_screen.h" + +#include "nouveau/nouveau_stateobj.h" + +#define NV5X_GRCLASS5097_CHIPSETS 0x00000001 +#define NV8X_GRCLASS8297_CHIPSETS 0x00000050 +#define NV9X_GRCLASS8297_CHIPSETS 0x00000014 + +static boolean +nv50_screen_is_format_supported(struct pipe_screen *pscreen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, unsigned geom_flags) +{ + if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) { + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z16_UNORM: + return TRUE; + default: + break; + } + } else { + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_A1R5G5B5_UNORM: + case PIPE_FORMAT_A4R4G4B4_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_I8_UNORM: + case PIPE_FORMAT_A8L8_UNORM: + case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT5_RGBA: + return TRUE; + default: + break; + } + } + + return FALSE; +} + +static const char * +nv50_screen_get_name(struct pipe_screen *pscreen) +{ + struct nv50_screen *screen = nv50_screen(pscreen); + struct nouveau_device *dev = screen->nvws->channel->device; + static char buffer[128]; + + snprintf(buffer, sizeof(buffer), "NV%02X", dev->chipset); + return buffer; +} + +static const char * +nv50_screen_get_vendor(struct pipe_screen *pscreen) +{ + return "nouveau"; +} + +static int +nv50_screen_get_param(struct pipe_screen *pscreen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + return 32; + case PIPE_CAP_NPOT_TEXTURES: + return 1; + case PIPE_CAP_TWO_SIDED_STENCIL: + return 1; + case PIPE_CAP_GLSL: + return 0; + case PIPE_CAP_S3TC: + return 1; + case PIPE_CAP_ANISOTROPIC_FILTER: + return 1; + case PIPE_CAP_POINT_SPRITE: + return 0; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 8; + case PIPE_CAP_OCCLUSION_QUERY: + return 1; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + return 1; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + return 13; + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + return 10; + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + return 13; + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + case PIPE_CAP_TEXTURE_MIRROR_REPEAT: + return 1; + case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: + return 0; + case NOUVEAU_CAP_HW_VTXBUF: + return 1; + case NOUVEAU_CAP_HW_IDXBUF: + return 0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0; + } +} + +static float +nv50_screen_get_paramf(struct pipe_screen *pscreen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + case PIPE_CAP_MAX_LINE_WIDTH_AA: + return 10.0; + case PIPE_CAP_MAX_POINT_WIDTH: + case PIPE_CAP_MAX_POINT_WIDTH_AA: + return 64.0; + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 16.0; + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 4.0; + default: + NOUVEAU_ERR("Unknown PIPE_CAP %d\n", param); + return 0.0; + } +} + +static void +nv50_screen_destroy(struct pipe_screen *pscreen) +{ + FREE(pscreen); +} + +struct pipe_screen * +nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) +{ + struct nv50_screen *screen = CALLOC_STRUCT(nv50_screen); + struct nouveau_stateobj *so; + unsigned tesla_class = 0, ret; + unsigned chipset = nvws->channel->device->chipset; + int i; + + if (!screen) + return NULL; + screen->nvws = nvws; + + /* DMA engine object */ + ret = nvws->grobj_alloc(nvws, 0x5039, &screen->m2mf); + if (ret) { + NOUVEAU_ERR("Error creating M2MF object: %d\n", ret); + nv50_screen_destroy(&screen->pipe); + return NULL; + } + + /* 2D object */ + ret = nvws->grobj_alloc(nvws, NV50_2D, &screen->eng2d); + if (ret) { + NOUVEAU_ERR("Error creating 2D object: %d\n", ret); + nv50_screen_destroy(&screen->pipe); + return NULL; + } + + /* 3D object */ + if ((chipset & 0xf0) != 0x50 && (chipset & 0xf0) != 0x80) { + NOUVEAU_ERR("Not a G8x chipset\n"); + nv50_screen_destroy(&screen->pipe); + return NULL; + } + + switch (chipset & 0xf0) { + case 0x50: + if (NV5X_GRCLASS5097_CHIPSETS & (1 << (chipset & 0x0f))) + tesla_class = 0x5097; + break; + case 0x80: + if (NV8X_GRCLASS8297_CHIPSETS & (1 << (chipset & 0x0f))) + tesla_class = 0x8297; + break; + case 0x90: + if (NV9X_GRCLASS8297_CHIPSETS & (1 << (chipset & 0x0f))) + tesla_class = 0x8297; + break; + default: + break; + } + + if (tesla_class == 0) { + NOUVEAU_ERR("Unknown G8x chipset: NV%02x\n", chipset); + nv50_screen_destroy(&screen->pipe); + return NULL; + } + + ret = nvws->grobj_alloc(nvws, tesla_class, &screen->tesla); + if (ret) { + NOUVEAU_ERR("Error creating 3D object: %d\n", ret); + nv50_screen_destroy(&screen->pipe); + return NULL; + } + + /* Sync notifier */ + ret = nvws->notifier_alloc(nvws, 1, &screen->sync); + if (ret) { + NOUVEAU_ERR("Error creating notifier object: %d\n", ret); + nv50_screen_destroy(&screen->pipe); + return NULL; + } + + /* Static M2MF init */ + so = so_new(32, 0); + so_method(so, screen->m2mf, 0x0180, 3); + so_data (so, screen->sync->handle); + so_data (so, screen->nvws->channel->vram->handle); + so_data (so, screen->nvws->channel->vram->handle); + so_emit(nvws, so); + so_ref (NULL, &so); + + /* Static 2D init */ + so = so_new(64, 0); + so_method(so, screen->eng2d, NV50_2D_DMA_NOTIFY, 4); + so_data (so, screen->sync->handle); + so_data (so, screen->nvws->channel->vram->handle); + so_data (so, screen->nvws->channel->vram->handle); + so_data (so, screen->nvws->channel->vram->handle); + so_method(so, screen->eng2d, NV50_2D_OPERATION, 1); + so_data (so, NV50_2D_OPERATION_SRCCOPY); + so_method(so, screen->eng2d, 0x0290, 1); + so_data (so, 0); + so_method(so, screen->eng2d, 0x0888, 1); + so_data (so, 1); + so_emit(nvws, so); + so_ref(NULL, &so); + + /* Static tesla init */ + so = so_new(256, 20); + + so_method(so, screen->tesla, 0x1558, 1); + so_data (so, 1); + so_method(so, screen->tesla, NV50TCL_DMA_NOTIFY, 1); + so_data (so, screen->sync->handle); + so_method(so, screen->tesla, NV50TCL_DMA_UNK0(0), + NV50TCL_DMA_UNK0__SIZE); + for (i = 0; i < NV50TCL_DMA_UNK0__SIZE; i++) + so_data(so, nvws->channel->vram->handle); + so_method(so, screen->tesla, NV50TCL_DMA_UNK1(0), + NV50TCL_DMA_UNK1__SIZE); + for (i = 0; i < NV50TCL_DMA_UNK1__SIZE; i++) + so_data(so, nvws->channel->vram->handle); + so_method(so, screen->tesla, 0x121c, 1); + so_data (so, 1); + + so_method(so, screen->tesla, 0x13bc, 1); + so_data (so, 0x54); + so_method(so, screen->tesla, 0x13ac, 1); + so_data (so, 1); + so_method(so, screen->tesla, 0x16b8, 1); + so_data (so, 8); + + /* Shared constant buffer */ + screen->constbuf = ws->buffer_create(ws, 0, 0, 128 * 4 * 4); + if (nvws->res_init(&screen->vp_data_heap, 0, 128)) { + NOUVEAU_ERR("Error initialising constant buffer\n"); + nv50_screen_destroy(&screen->pipe); + return NULL; + } + + so_method(so, screen->tesla, 0x1280, 3); + so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->constbuf, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_data (so, (NV50_CB_PMISC << 16) | 0x00001000); + + /* Texture sampler/image unit setup - we abuse the constant buffer + * upload mechanism for the moment to upload data to the tex config + * blocks. At some point we *may* want to go the NVIDIA way of doing + * things? + */ + screen->tic = ws->buffer_create(ws, 0, 0, 32 * 8 * 4); + so_method(so, screen->tesla, 0x1280, 3); + so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_data (so, (NV50_CB_TIC << 16) | 0x0800); + so_method(so, screen->tesla, 0x1574, 3); + so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_data (so, 0x00000800); + + screen->tsc = ws->buffer_create(ws, 0, 0, 32 * 8 * 4); + so_method(so, screen->tesla, 0x1280, 3); + so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_data (so, (NV50_CB_TSC << 16) | 0x0800); + so_method(so, screen->tesla, 0x155c, 3); + so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | + NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + so_data (so, 0x00000800); + + + /* Vertex array limits - max them out */ + for (i = 0; i < 16; i++) { + so_method(so, screen->tesla, 0x1080 + (i * 8), 2); + so_data (so, 0x000000ff); + so_data (so, 0xffffffff); + } + + so_method(so, screen->tesla, NV50TCL_DEPTH_RANGE_NEAR, 2); + so_data (so, fui(0.0)); + so_data (so, fui(1.0)); + + so_method(so, screen->tesla, 0x1234, 1); + so_data (so, 1); + so_method(so, screen->tesla, 0x1458, 1); + so_data (so, 1); + + so_emit(nvws, so); + so_ref(so, &screen->static_init); + nvws->push_flush(nvws, 0, NULL); + + screen->pipe.winsys = ws; + + screen->pipe.destroy = nv50_screen_destroy; + + screen->pipe.get_name = nv50_screen_get_name; + screen->pipe.get_vendor = nv50_screen_get_vendor; + screen->pipe.get_param = nv50_screen_get_param; + screen->pipe.get_paramf = nv50_screen_get_paramf; + + screen->pipe.is_format_supported = nv50_screen_is_format_supported; + + nv50_screen_init_miptree_functions(&screen->pipe); + nv50_transfer_init_screen_functions(&screen->pipe); + u_simple_screen_init(&screen->pipe); + + return &screen->pipe; +} + diff --git a/src/gallium/drivers/nv50/nv50_screen.h b/src/gallium/drivers/nv50/nv50_screen.h new file mode 100644 index 0000000000..db567aaac8 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_screen.h @@ -0,0 +1,35 @@ +#ifndef __NV50_SCREEN_H__ +#define __NV50_SCREEN_H__ + +#include "pipe/p_screen.h" + +struct nv50_screen { + struct pipe_screen pipe; + + struct nouveau_winsys *nvws; + + unsigned cur_pctx; + + struct nouveau_grobj *tesla; + struct nouveau_grobj *eng2d; + struct nouveau_grobj *m2mf; + struct nouveau_notifier *sync; + + struct pipe_buffer *constbuf; + struct nouveau_resource *vp_data_heap; + + struct pipe_buffer *tic; + struct pipe_buffer *tsc; + + struct nouveau_stateobj *static_init; +}; + +static INLINE struct nv50_screen * +nv50_screen(struct pipe_screen *screen) +{ + return (struct nv50_screen *)screen; +} + +void nv50_transfer_init_screen_functions(struct pipe_screen *); + +#endif diff --git a/src/gallium/drivers/nv50/nv50_state.c b/src/gallium/drivers/nv50/nv50_state.c new file mode 100644 index 0000000000..787ff958ec --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_state.c @@ -0,0 +1,664 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_state.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "tgsi/tgsi_parse.h" + +#include "nv50_context.h" +#include "nv50_texture.h" + +#include "nouveau/nouveau_stateobj.h" + +static void * +nv50_blend_state_create(struct pipe_context *pipe, + const struct pipe_blend_state *cso) +{ + struct nouveau_stateobj *so = so_new(64, 0); + struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla; + struct nv50_blend_stateobj *bso = CALLOC_STRUCT(nv50_blend_stateobj); + unsigned cmask = 0, i; + + /*XXX ignored: + * - dither + */ + + if (cso->blend_enable == 0) { + so_method(so, tesla, NV50TCL_BLEND_ENABLE(0), 8); + for (i = 0; i < 8; i++) + so_data(so, 0); + } else { + so_method(so, tesla, NV50TCL_BLEND_ENABLE(0), 8); + for (i = 0; i < 8; i++) + so_data(so, 1); + so_method(so, tesla, NV50TCL_BLEND_EQUATION_RGB, 5); + so_data (so, nvgl_blend_eqn(cso->rgb_func)); + so_data (so, 0x4000 | nvgl_blend_func(cso->rgb_src_factor)); + so_data (so, 0x4000 | nvgl_blend_func(cso->rgb_dst_factor)); + so_data (so, nvgl_blend_eqn(cso->alpha_func)); + so_data (so, 0x4000 | nvgl_blend_func(cso->alpha_src_factor)); + so_method(so, tesla, NV50TCL_BLEND_FUNC_DST_ALPHA, 1); + so_data (so, 0x4000 | nvgl_blend_func(cso->alpha_dst_factor)); + } + + if (cso->logicop_enable == 0 ) { + so_method(so, tesla, NV50TCL_LOGIC_OP_ENABLE, 1); + so_data (so, 0); + } else { + so_method(so, tesla, NV50TCL_LOGIC_OP_ENABLE, 2); + so_data (so, 1); + so_data (so, nvgl_logicop_func(cso->logicop_func)); + } + + if (cso->colormask & PIPE_MASK_R) + cmask |= (1 << 0); + if (cso->colormask & PIPE_MASK_G) + cmask |= (1 << 4); + if (cso->colormask & PIPE_MASK_B) + cmask |= (1 << 8); + if (cso->colormask & PIPE_MASK_A) + cmask |= (1 << 12); + so_method(so, tesla, NV50TCL_COLOR_MASK(0), 8); + for (i = 0; i < 8; i++) + so_data(so, cmask); + + bso->pipe = *cso; + so_ref(so, &bso->so); + return (void *)bso; +} + +static void +nv50_blend_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->blend = hwcso; + nv50->dirty |= NV50_NEW_BLEND; +} + +static void +nv50_blend_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_blend_stateobj *bso = hwcso; + + so_ref(NULL, &bso->so); + FREE(bso); +} + +static INLINE unsigned +wrap_mode(unsigned wrap) +{ + switch (wrap) { + case PIPE_TEX_WRAP_REPEAT: + return NV50TSC_1_0_WRAPS_REPEAT; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + return NV50TSC_1_0_WRAPS_MIRROR_REPEAT; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return NV50TSC_1_0_WRAPS_CLAMP_TO_EDGE; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return NV50TSC_1_0_WRAPS_CLAMP_TO_BORDER; + case PIPE_TEX_WRAP_CLAMP: + return NV50TSC_1_0_WRAPS_CLAMP; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + return NV50TSC_1_0_WRAPS_MIRROR_CLAMP_TO_EDGE; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + return NV50TSC_1_0_WRAPS_MIRROR_CLAMP_TO_BORDER; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + return NV50TSC_1_0_WRAPS_MIRROR_CLAMP; + default: + NOUVEAU_ERR("unknown wrap mode: %d\n", wrap); + return NV50TSC_1_0_WRAPS_REPEAT; + } +} +static void * +nv50_sampler_state_create(struct pipe_context *pipe, + const struct pipe_sampler_state *cso) +{ + unsigned *tsc = CALLOC(8, sizeof(unsigned)); + + tsc[0] = (0x00024000 | + (wrap_mode(cso->wrap_s) << 0) | + (wrap_mode(cso->wrap_t) << 3) | + (wrap_mode(cso->wrap_r) << 6)); + + switch (cso->mag_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + tsc[1] |= NV50TSC_1_1_MAGF_LINEAR; + break; + case PIPE_TEX_FILTER_NEAREST: + default: + tsc[1] |= NV50TSC_1_1_MAGF_NEAREST; + break; + } + + switch (cso->min_img_filter) { + case PIPE_TEX_FILTER_LINEAR: + tsc[1] |= NV50TSC_1_1_MINF_LINEAR; + break; + case PIPE_TEX_FILTER_NEAREST: + default: + tsc[1] |= NV50TSC_1_1_MINF_NEAREST; + break; + } + + switch (cso->min_mip_filter) { + case PIPE_TEX_MIPFILTER_LINEAR: + tsc[1] |= NV50TSC_1_1_MIPF_LINEAR; + break; + case PIPE_TEX_MIPFILTER_NEAREST: + tsc[1] |= NV50TSC_1_1_MIPF_NEAREST; + break; + case PIPE_TEX_MIPFILTER_NONE: + default: + tsc[1] |= NV50TSC_1_1_MIPF_NONE; + break; + } + + if (cso->max_anisotropy >= 16.0) + tsc[0] |= (7 << 20); + else + if (cso->max_anisotropy >= 12.0) + tsc[0] |= (6 << 20); + else + if (cso->max_anisotropy >= 10.0) + tsc[0] |= (5 << 20); + else + if (cso->max_anisotropy >= 8.0) + tsc[0] |= (4 << 20); + else + if (cso->max_anisotropy >= 6.0) + tsc[0] |= (3 << 20); + else + if (cso->max_anisotropy >= 4.0) + tsc[0] |= (2 << 20); + else + if (cso->max_anisotropy >= 2.0) + tsc[0] |= (1 << 20); + + if (cso->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + tsc[0] |= (1 << 8); + tsc[0] |= (nvgl_comparison_op(cso->compare_func) & 0x7); + } + + return (void *)tsc; +} + +static void +nv50_sampler_state_bind(struct pipe_context *pipe, unsigned nr, void **sampler) +{ + struct nv50_context *nv50 = nv50_context(pipe); + int i; + + nv50->sampler_nr = nr; + for (i = 0; i < nv50->sampler_nr; i++) + nv50->sampler[i] = sampler[i]; + + nv50->dirty |= NV50_NEW_SAMPLER; +} + +static void +nv50_sampler_state_delete(struct pipe_context *pipe, void *hwcso) +{ + FREE(hwcso); +} + +static void +nv50_set_sampler_texture(struct pipe_context *pipe, unsigned nr, + struct pipe_texture **pt) +{ + struct nv50_context *nv50 = nv50_context(pipe); + int i; + + for (i = 0; i < nr; i++) + pipe_texture_reference((void *)&nv50->miptree[i], pt[i]); + for (i = nr; i < nv50->miptree_nr; i++) + pipe_texture_reference((void *)&nv50->miptree[i], NULL); + + nv50->miptree_nr = nr; + nv50->dirty |= NV50_NEW_TEXTURE; +} + +static void * +nv50_rasterizer_state_create(struct pipe_context *pipe, + const struct pipe_rasterizer_state *cso) +{ + struct nouveau_stateobj *so = so_new(64, 0); + struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla; + struct nv50_rasterizer_stateobj *rso = + CALLOC_STRUCT(nv50_rasterizer_stateobj); + + /*XXX: ignored + * - light_twosize + * - point_smooth + * - multisample + * - point_sprite / sprite_coord_mode + */ + + so_method(so, tesla, NV50TCL_SHADE_MODEL, 1); + so_data (so, cso->flatshade ? NV50TCL_SHADE_MODEL_FLAT : + NV50TCL_SHADE_MODEL_SMOOTH); + + so_method(so, tesla, NV50TCL_LINE_WIDTH, 1); + so_data (so, fui(cso->line_width)); + so_method(so, tesla, NV50TCL_LINE_SMOOTH_ENABLE, 1); + so_data (so, cso->line_smooth ? 1 : 0); + if (cso->line_stipple_enable) { + so_method(so, tesla, NV50TCL_LINE_STIPPLE_ENABLE, 1); + so_data (so, 1); + so_method(so, tesla, NV50TCL_LINE_STIPPLE_PATTERN, 1); + so_data (so, (cso->line_stipple_pattern << 8) | + cso->line_stipple_factor); + } else { + so_method(so, tesla, NV50TCL_LINE_STIPPLE_ENABLE, 1); + so_data (so, 0); + } + + so_method(so, tesla, NV50TCL_POINT_SIZE, 1); + so_data (so, fui(cso->point_size)); + + so_method(so, tesla, NV50TCL_POLYGON_MODE_FRONT, 3); + if (cso->front_winding == PIPE_WINDING_CCW) { + so_data(so, nvgl_polygon_mode(cso->fill_ccw)); + so_data(so, nvgl_polygon_mode(cso->fill_cw)); + } else { + so_data(so, nvgl_polygon_mode(cso->fill_cw)); + so_data(so, nvgl_polygon_mode(cso->fill_ccw)); + } + so_data(so, cso->poly_smooth ? 1 : 0); + + so_method(so, tesla, NV50TCL_CULL_FACE_ENABLE, 3); + so_data (so, cso->cull_mode != PIPE_WINDING_NONE); + if (cso->front_winding == PIPE_WINDING_CCW) { + so_data(so, NV50TCL_FRONT_FACE_CCW); + switch (cso->cull_mode) { + case PIPE_WINDING_CCW: + so_data(so, NV50TCL_CULL_FACE_FRONT); + break; + case PIPE_WINDING_CW: + so_data(so, NV50TCL_CULL_FACE_BACK); + break; + case PIPE_WINDING_BOTH: + so_data(so, NV50TCL_CULL_FACE_FRONT_AND_BACK); + break; + default: + so_data(so, NV50TCL_CULL_FACE_BACK); + break; + } + } else { + so_data(so, NV50TCL_FRONT_FACE_CW); + switch (cso->cull_mode) { + case PIPE_WINDING_CCW: + so_data(so, NV50TCL_CULL_FACE_BACK); + break; + case PIPE_WINDING_CW: + so_data(so, NV50TCL_CULL_FACE_FRONT); + break; + case PIPE_WINDING_BOTH: + so_data(so, NV50TCL_CULL_FACE_FRONT_AND_BACK); + break; + default: + so_data(so, NV50TCL_CULL_FACE_BACK); + break; + } + } + + so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_ENABLE, 1); + so_data (so, cso->poly_stipple_enable ? 1 : 0); + + so_method(so, tesla, NV50TCL_POLYGON_OFFSET_POINT_ENABLE, 3); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_POINT) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_POINT)) + so_data(so, 1); + else + so_data(so, 0); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_LINE) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_LINE)) + so_data(so, 1); + else + so_data(so, 0); + if ((cso->offset_cw && cso->fill_cw == PIPE_POLYGON_MODE_FILL) || + (cso->offset_ccw && cso->fill_ccw == PIPE_POLYGON_MODE_FILL)) + so_data(so, 1); + else + so_data(so, 0); + + if (cso->offset_cw || cso->offset_ccw) { + so_method(so, tesla, NV50TCL_POLYGON_OFFSET_FACTOR, 1); + so_data (so, fui(cso->offset_scale)); + so_method(so, tesla, NV50TCL_POLYGON_OFFSET_UNITS, 1); + so_data (so, fui(cso->offset_units)); + } + + rso->pipe = *cso; + so_ref(so, &rso->so); + return (void *)rso; +} + +static void +nv50_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->rasterizer = hwcso; + nv50->dirty |= NV50_NEW_RASTERIZER; +} + +static void +nv50_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_rasterizer_stateobj *rso = hwcso; + + so_ref(NULL, &rso->so); + FREE(rso); +} + +static void * +nv50_depth_stencil_alpha_state_create(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *cso) +{ + struct nouveau_grobj *tesla = nv50_context(pipe)->screen->tesla; + struct nv50_zsa_stateobj *zsa = CALLOC_STRUCT(nv50_zsa_stateobj); + struct nouveau_stateobj *so = so_new(64, 0); + + so_method(so, tesla, NV50TCL_DEPTH_WRITE_ENABLE, 1); + so_data (so, cso->depth.writemask ? 1 : 0); + if (cso->depth.enabled) { + so_method(so, tesla, NV50TCL_DEPTH_TEST_ENABLE, 1); + so_data (so, 1); + so_method(so, tesla, NV50TCL_DEPTH_TEST_FUNC, 1); + so_data (so, nvgl_comparison_op(cso->depth.func)); + } else { + so_method(so, tesla, NV50TCL_DEPTH_TEST_ENABLE, 1); + so_data (so, 0); + } + + /*XXX: yes, I know they're backwards.. header needs fixing */ + if (cso->stencil[0].enabled) { + so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 5); + so_data (so, 1); + so_data (so, nvgl_stencil_op(cso->stencil[0].fail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[0].zfail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[0].zpass_op)); + so_data (so, nvgl_comparison_op(cso->stencil[0].func)); + so_method(so, tesla, NV50TCL_STENCIL_BACK_FUNC_REF, 3); + so_data (so, cso->stencil[0].ref_value); + so_data (so, cso->stencil[0].writemask); + so_data (so, cso->stencil[0].valuemask); + } else { + so_method(so, tesla, NV50TCL_STENCIL_BACK_ENABLE, 1); + so_data (so, 0); + } + + if (cso->stencil[1].enabled) { + so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 8); + so_data (so, 1); + so_data (so, nvgl_stencil_op(cso->stencil[1].fail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[1].zfail_op)); + so_data (so, nvgl_stencil_op(cso->stencil[1].zpass_op)); + so_data (so, nvgl_comparison_op(cso->stencil[1].func)); + so_data (so, cso->stencil[1].ref_value); + so_data (so, cso->stencil[1].writemask); + so_data (so, cso->stencil[1].valuemask); + } else { + so_method(so, tesla, NV50TCL_STENCIL_FRONT_ENABLE, 1); + so_data (so, 0); + } + + if (cso->alpha.enabled) { + so_method(so, tesla, NV50TCL_ALPHA_TEST_ENABLE, 1); + so_data (so, 1); + so_method(so, tesla, NV50TCL_ALPHA_TEST_REF, 2); + so_data (so, fui(cso->alpha.ref_value)); + so_data (so, nvgl_comparison_op(cso->alpha.func)); + } else { + so_method(so, tesla, NV50TCL_ALPHA_TEST_ENABLE, 1); + so_data (so, 0); + } + + zsa->pipe = *cso; + so_ref(so, &zsa->so); + return (void *)zsa; +} + +static void +nv50_depth_stencil_alpha_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->zsa = hwcso; + nv50->dirty |= NV50_NEW_ZSA; +} + +static void +nv50_depth_stencil_alpha_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_zsa_stateobj *zsa = hwcso; + + so_ref(NULL, &zsa->so); + FREE(zsa); +} + +static void * +nv50_vp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nv50_program *p = CALLOC_STRUCT(nv50_program); + + p->pipe.tokens = tgsi_dup_tokens(cso->tokens); + p->type = PIPE_SHADER_VERTEX; + tgsi_scan_shader(p->pipe.tokens, &p->info); + return (void *)p; +} + +static void +nv50_vp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->vertprog = hwcso; + nv50->dirty |= NV50_NEW_VERTPROG; +} + +static void +nv50_vp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_program *p = hwcso; + + nv50_program_destroy(nv50, p); + FREE((void*)p->pipe.tokens); + FREE(p); +} + +static void * +nv50_fp_state_create(struct pipe_context *pipe, + const struct pipe_shader_state *cso) +{ + struct nv50_program *p = CALLOC_STRUCT(nv50_program); + + p->pipe.tokens = tgsi_dup_tokens(cso->tokens); + p->type = PIPE_SHADER_FRAGMENT; + tgsi_scan_shader(p->pipe.tokens, &p->info); + return (void *)p; +} + +static void +nv50_fp_state_bind(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->fragprog = hwcso; + nv50->dirty |= NV50_NEW_FRAGPROG; +} + +static void +nv50_fp_state_delete(struct pipe_context *pipe, void *hwcso) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nv50_program *p = hwcso; + + nv50_program_destroy(nv50, p); + FREE((void*)p->pipe.tokens); + FREE(p); +} + +static void +nv50_set_blend_color(struct pipe_context *pipe, + const struct pipe_blend_color *bcol) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->blend_colour = *bcol; + nv50->dirty |= NV50_NEW_BLEND_COLOUR; +} + +static void +nv50_set_clip_state(struct pipe_context *pipe, + const struct pipe_clip_state *clip) +{ +} + +static void +nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, + const struct pipe_constant_buffer *buf ) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + if (shader == PIPE_SHADER_VERTEX) { + nv50->constbuf[PIPE_SHADER_VERTEX] = buf->buffer; + nv50->dirty |= NV50_NEW_VERTPROG_CB; + } else + if (shader == PIPE_SHADER_FRAGMENT) { + nv50->constbuf[PIPE_SHADER_FRAGMENT] = buf->buffer; + nv50->dirty |= NV50_NEW_FRAGPROG_CB; + } +} + +static void +nv50_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->framebuffer = *fb; + nv50->dirty |= NV50_NEW_FRAMEBUFFER; +} + +static void +nv50_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->stipple = *stipple; + nv50->dirty |= NV50_NEW_STIPPLE; +} + +static void +nv50_set_scissor_state(struct pipe_context *pipe, + const struct pipe_scissor_state *s) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->scissor = *s; + nv50->dirty |= NV50_NEW_SCISSOR; +} + +static void +nv50_set_viewport_state(struct pipe_context *pipe, + const struct pipe_viewport_state *vpt) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + nv50->viewport = *vpt; + nv50->dirty |= NV50_NEW_VIEWPORT; +} + +static void +nv50_set_vertex_buffers(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_buffer *vb) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + memcpy(nv50->vtxbuf, vb, sizeof(*vb) * count); + nv50->vtxbuf_nr = count; + + nv50->dirty |= NV50_NEW_ARRAYS; +} + +static void +nv50_set_vertex_elements(struct pipe_context *pipe, unsigned count, + const struct pipe_vertex_element *ve) +{ + struct nv50_context *nv50 = nv50_context(pipe); + + memcpy(nv50->vtxelt, ve, sizeof(*ve) * count); + nv50->vtxelt_nr = count; + + nv50->dirty |= NV50_NEW_ARRAYS; +} + +void +nv50_init_state_functions(struct nv50_context *nv50) +{ + nv50->pipe.create_blend_state = nv50_blend_state_create; + nv50->pipe.bind_blend_state = nv50_blend_state_bind; + nv50->pipe.delete_blend_state = nv50_blend_state_delete; + + nv50->pipe.create_sampler_state = nv50_sampler_state_create; + nv50->pipe.bind_sampler_states = nv50_sampler_state_bind; + nv50->pipe.delete_sampler_state = nv50_sampler_state_delete; + nv50->pipe.set_sampler_textures = nv50_set_sampler_texture; + + nv50->pipe.create_rasterizer_state = nv50_rasterizer_state_create; + nv50->pipe.bind_rasterizer_state = nv50_rasterizer_state_bind; + nv50->pipe.delete_rasterizer_state = nv50_rasterizer_state_delete; + + nv50->pipe.create_depth_stencil_alpha_state = + nv50_depth_stencil_alpha_state_create; + nv50->pipe.bind_depth_stencil_alpha_state = + nv50_depth_stencil_alpha_state_bind; + nv50->pipe.delete_depth_stencil_alpha_state = + nv50_depth_stencil_alpha_state_delete; + + nv50->pipe.create_vs_state = nv50_vp_state_create; + nv50->pipe.bind_vs_state = nv50_vp_state_bind; + nv50->pipe.delete_vs_state = nv50_vp_state_delete; + + nv50->pipe.create_fs_state = nv50_fp_state_create; + nv50->pipe.bind_fs_state = nv50_fp_state_bind; + nv50->pipe.delete_fs_state = nv50_fp_state_delete; + + nv50->pipe.set_blend_color = nv50_set_blend_color; + nv50->pipe.set_clip_state = nv50_set_clip_state; + nv50->pipe.set_constant_buffer = nv50_set_constant_buffer; + nv50->pipe.set_framebuffer_state = nv50_set_framebuffer_state; + nv50->pipe.set_polygon_stipple = nv50_set_polygon_stipple; + nv50->pipe.set_scissor_state = nv50_set_scissor_state; + nv50->pipe.set_viewport_state = nv50_set_viewport_state; + + nv50->pipe.set_vertex_buffers = nv50_set_vertex_buffers; + nv50->pipe.set_vertex_elements = nv50_set_vertex_elements; +} + diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c new file mode 100644 index 0000000000..948112ffa9 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -0,0 +1,313 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nv50_context.h" +#include "nouveau/nouveau_stateobj.h" + +static void +nv50_state_validate_fb(struct nv50_context *nv50) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_stateobj *so = so_new(128, 18); + struct pipe_framebuffer_state *fb = &nv50->framebuffer; + unsigned i, w, h, gw = 0; + + for (i = 0; i < fb->nr_cbufs; i++) { + if (!gw) { + w = fb->cbufs[i]->width; + h = fb->cbufs[i]->height; + gw = 1; + } else { + assert(w == fb->cbufs[i]->width); + assert(h == fb->cbufs[i]->height); + } + + so_method(so, tesla, NV50TCL_RT_HORIZ(i), 2); + so_data (so, fb->cbufs[i]->width); + so_data (so, fb->cbufs[i]->height); + + so_method(so, tesla, NV50TCL_RT_ADDRESS_HIGH(i), 5); + so_reloc (so, nv50_surface_buffer(fb->cbufs[i]), fb->cbufs[i]->offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH | + NOUVEAU_BO_RDWR, 0, 0); + so_reloc (so, nv50_surface_buffer(fb->cbufs[i]), fb->cbufs[i]->offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW | + NOUVEAU_BO_RDWR, 0, 0); + switch (fb->cbufs[i]->format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + so_data(so, 0xcf); + break; + case PIPE_FORMAT_R5G6B5_UNORM: + so_data(so, 0xe8); + break; + default: + NOUVEAU_ERR("AIIII unknown format %s\n", + pf_name(fb->cbufs[i]->format)); + so_data(so, 0xe6); + break; + } + so_data(so, 0x00000000); + so_data(so, 0x00000000); + + so_method(so, tesla, 0x1224, 1); + so_data (so, 1); + } + + if (fb->zsbuf) { + if (!gw) { + w = fb->zsbuf->width; + h = fb->zsbuf->height; + gw = 1; + } else { + assert(w == fb->zsbuf->width); + assert(h == fb->zsbuf->height); + } + + so_method(so, tesla, NV50TCL_ZETA_ADDRESS_HIGH, 5); + so_reloc (so, nv50_surface_buffer(fb->zsbuf), fb->zsbuf->offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH | + NOUVEAU_BO_RDWR, 0, 0); + so_reloc (so, nv50_surface_buffer(fb->zsbuf), fb->zsbuf->offset, + NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW | + NOUVEAU_BO_RDWR, 0, 0); + switch (fb->zsbuf->format) { + case PIPE_FORMAT_Z24S8_UNORM: + so_data(so, 0x16); + break; + case PIPE_FORMAT_Z16_UNORM: + so_data(so, 0x15); + break; + default: + NOUVEAU_ERR("AIIII unknown format %s\n", + pf_name(fb->zsbuf->format)); + so_data(so, 0x16); + break; + } + so_data(so, 0x00000000); + so_data(so, 0x00000000); + + so_method(so, tesla, 0x1538, 1); + so_data (so, 1); + so_method(so, tesla, 0x1228, 3); + so_data (so, fb->zsbuf->width); + so_data (so, fb->zsbuf->height); + so_data (so, 0x00010001); + } + + so_method(so, tesla, NV50TCL_VIEWPORT_HORIZ, 2); + so_data (so, w << 16); + so_data (so, h << 16); + so_method(so, tesla, 0x0e04, 2); + so_data (so, w << 16); + so_data (so, h << 16); + so_method(so, tesla, 0xdf8, 2); + so_data (so, 0); + so_data (so, h); + + so_ref(so, &nv50->state.fb); +} + +static void +nv50_state_emit(struct nv50_context *nv50) +{ + struct nv50_screen *screen = nv50->screen; + struct nouveau_winsys *nvws = screen->nvws; + + if (nv50->pctx_id != screen->cur_pctx) { + nv50->state.dirty |= 0xffffffff; + screen->cur_pctx = nv50->pctx_id; + } + + if (nv50->state.dirty & NV50_NEW_FRAMEBUFFER) + so_emit(nvws, nv50->state.fb); + if (nv50->state.dirty & NV50_NEW_BLEND) + so_emit(nvws, nv50->state.blend); + if (nv50->state.dirty & NV50_NEW_ZSA) + so_emit(nvws, nv50->state.zsa); + if (nv50->state.dirty & NV50_NEW_VERTPROG) + so_emit(nvws, nv50->state.vertprog); + if (nv50->state.dirty & NV50_NEW_FRAGPROG) + so_emit(nvws, nv50->state.fragprog); + if (nv50->state.dirty & NV50_NEW_RASTERIZER) + so_emit(nvws, nv50->state.rast); + if (nv50->state.dirty & NV50_NEW_BLEND_COLOUR) + so_emit(nvws, nv50->state.blend_colour); + if (nv50->state.dirty & NV50_NEW_STIPPLE) + so_emit(nvws, nv50->state.stipple); + if (nv50->state.dirty & NV50_NEW_SCISSOR) + so_emit(nvws, nv50->state.scissor); + if (nv50->state.dirty & NV50_NEW_VIEWPORT) + so_emit(nvws, nv50->state.viewport); + if (nv50->state.dirty & NV50_NEW_SAMPLER) + so_emit(nvws, nv50->state.tsc_upload); + if (nv50->state.dirty & NV50_NEW_TEXTURE) + so_emit(nvws, nv50->state.tic_upload); + if (nv50->state.dirty & NV50_NEW_ARRAYS) { + so_emit(nvws, nv50->state.vtxfmt); + so_emit(nvws, nv50->state.vtxbuf); + } + nv50->state.dirty = 0; + + so_emit_reloc_markers(nvws, nv50->state.fb); + so_emit_reloc_markers(nvws, nv50->state.vertprog); + so_emit_reloc_markers(nvws, nv50->state.fragprog); + so_emit_reloc_markers(nvws, nv50->state.vtxbuf); + so_emit_reloc_markers(nvws, nv50->screen->static_init); +} + +boolean +nv50_state_validate(struct nv50_context *nv50) +{ + const struct pipe_framebuffer_state *fb = &nv50->framebuffer; + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_stateobj *so; + unsigned i; + + for (i = 0; i < fb->nr_cbufs; i++) + fb->cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED; + + if (fb->zsbuf) + fb->zsbuf->status = PIPE_SURFACE_STATUS_DEFINED; + + if (nv50->dirty & NV50_NEW_FRAMEBUFFER) + nv50_state_validate_fb(nv50); + + if (nv50->dirty & NV50_NEW_BLEND) + so_ref(nv50->blend->so, &nv50->state.blend); + + if (nv50->dirty & NV50_NEW_ZSA) + so_ref(nv50->zsa->so, &nv50->state.zsa); + + if (nv50->dirty & (NV50_NEW_VERTPROG | NV50_NEW_VERTPROG_CB)) + nv50_vertprog_validate(nv50); + + if (nv50->dirty & (NV50_NEW_FRAGPROG | NV50_NEW_FRAGPROG_CB)) + nv50_fragprog_validate(nv50); + + if (nv50->dirty & NV50_NEW_RASTERIZER) + so_ref(nv50->rasterizer->so, &nv50->state.rast); + + if (nv50->dirty & NV50_NEW_BLEND_COLOUR) { + so = so_new(5, 0); + so_method(so, tesla, NV50TCL_BLEND_COLOR(0), 4); + so_data (so, fui(nv50->blend_colour.color[0])); + so_data (so, fui(nv50->blend_colour.color[1])); + so_data (so, fui(nv50->blend_colour.color[2])); + so_data (so, fui(nv50->blend_colour.color[3])); + so_ref(so, &nv50->state.blend_colour); + } + + if (nv50->dirty & NV50_NEW_STIPPLE) { + so = so_new(33, 0); + so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_PATTERN(0), 32); + for (i = 0; i < 32; i++) + so_data(so, nv50->stipple.stipple[i]); + so_ref(so, &nv50->state.stipple); + } + + if (nv50->dirty & (NV50_NEW_SCISSOR | NV50_NEW_RASTERIZER)) { + struct pipe_rasterizer_state *rast = &nv50->rasterizer->pipe; + struct pipe_scissor_state *s = &nv50->scissor; + + if (nv50->state.scissor && + (rast->scissor == 0 && nv50->state.scissor_enabled == 0)) + goto scissor_uptodate; + nv50->state.scissor_enabled = rast->scissor; + + so = so_new(3, 0); + so_method(so, tesla, 0x0ff4, 2); + if (nv50->state.scissor_enabled) { + so_data(so, ((s->maxx - s->minx) << 16) | s->minx); + so_data(so, ((s->maxy - s->miny) << 16) | s->miny); + } else { + so_data(so, (8192 << 16)); + so_data(so, (8192 << 16)); + } + so_ref(so, &nv50->state.scissor); + nv50->state.dirty |= NV50_NEW_SCISSOR; + } +scissor_uptodate: + + if (nv50->dirty & NV50_NEW_VIEWPORT) { + unsigned bypass; + + if (!nv50->rasterizer->pipe.bypass_clipping) + bypass = 0; + else + bypass = 1; + + if (nv50->state.viewport && + (bypass || !(nv50->dirty & NV50_NEW_VIEWPORT)) && + nv50->state.viewport_bypass == bypass) + goto viewport_uptodate; + nv50->state.viewport_bypass = bypass; + + so = so_new(12, 0); + if (!bypass) { + so_method(so, tesla, NV50TCL_VIEWPORT_UNK1(0), 3); + so_data (so, fui(nv50->viewport.translate[0])); + so_data (so, fui(nv50->viewport.translate[1])); + so_data (so, fui(nv50->viewport.translate[2])); + so_method(so, tesla, NV50TCL_VIEWPORT_UNK0(0), 3); + so_data (so, fui(nv50->viewport.scale[0])); + so_data (so, fui(-nv50->viewport.scale[1])); + so_data (so, fui(nv50->viewport.scale[2])); + so_method(so, tesla, 0x192c, 1); + so_data (so, 1); + so_method(so, tesla, 0x0f90, 1); + so_data (so, 0); + } else { + so_method(so, tesla, 0x192c, 1); + so_data (so, 0); + so_method(so, tesla, 0x0f90, 1); + so_data (so, 1); + } + + so_ref(so, &nv50->state.viewport); + } +viewport_uptodate: + + if (nv50->dirty & NV50_NEW_SAMPLER) { + int i; + + so = so_new(nv50->sampler_nr * 8 + 3, 0); + so_method(so, tesla, 0x0f00, 1); + so_data (so, NV50_CB_TSC); + so_method(so, tesla, 0x40000f04, nv50->sampler_nr * 8); + for (i = 0; i < nv50->sampler_nr; i++) + so_datap (so, nv50->sampler[i], 8); + so_ref(so, &nv50->state.tsc_upload); + } + + if (nv50->dirty & NV50_NEW_TEXTURE) + nv50_tex_validate(nv50); + + if (nv50->dirty & NV50_NEW_ARRAYS) + nv50_vbo_validate(nv50); + + nv50->state.dirty |= nv50->dirty; + nv50->dirty = 0; + nv50_state_emit(nv50); + + return TRUE; +} + diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c new file mode 100644 index 0000000000..b0936518b0 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -0,0 +1,208 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define __NOUVEAU_PUSH_H__ +#include <stdint.h> +#include "nouveau/nouveau_pushbuf.h" +#include "nv50_context.h" +#include "pipe/p_defines.h" +#include "pipe/internal/p_winsys_screen.h" +#include "pipe/p_inlines.h" + +#include "util/u_tile.h" + +static INLINE int +nv50_format(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + return NV50_2D_DST_FORMAT_32BPP; + case PIPE_FORMAT_X8R8G8B8_UNORM: + return NV50_2D_DST_FORMAT_24BPP; + case PIPE_FORMAT_R5G6B5_UNORM: + return NV50_2D_DST_FORMAT_16BPP; + case PIPE_FORMAT_A8_UNORM: + return NV50_2D_DST_FORMAT_8BPP; + default: + return -1; + } +} + +static int +nv50_surface_set(struct nv50_screen *screen, struct pipe_surface *ps, int dst) +{ + struct nv50_miptree *mt = nv50_miptree(ps->texture); + struct nouveau_channel *chan = screen->nvws->channel; + struct nouveau_grobj *eng2d = screen->eng2d; + struct nouveau_bo *bo; + int format, mthd = dst ? NV50_2D_DST_FORMAT : NV50_2D_SRC_FORMAT; + int flags = NOUVEAU_BO_VRAM | (dst ? NOUVEAU_BO_WR : NOUVEAU_BO_RD); + + bo = screen->nvws->get_bo(nv50_miptree(ps->texture)->buffer); + if (!bo) + return 1; + + format = nv50_format(ps->format); + if (format < 0) + return 1; + + if (!bo->tiled) { + BEGIN_RING(chan, eng2d, mthd, 2); + OUT_RING (chan, format); + OUT_RING (chan, 1); + BEGIN_RING(chan, eng2d, mthd + 0x14, 5); + OUT_RING (chan, mt->level[0].pitch); + OUT_RING (chan, ps->width); + OUT_RING (chan, ps->height); + OUT_RELOCh(chan, bo, ps->offset, flags); + OUT_RELOCl(chan, bo, ps->offset, flags); + } else { + BEGIN_RING(chan, eng2d, mthd, 5); + OUT_RING (chan, format); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, eng2d, mthd + 0x18, 4); + OUT_RING (chan, ps->width); + OUT_RING (chan, ps->height); + OUT_RELOCh(chan, bo, ps->offset, flags); + OUT_RELOCl(chan, bo, ps->offset, flags); + } + +#if 0 + if (dst) { + BEGIN_RING(chan, eng2d, NV50_2D_CLIP_X, 4); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, surf->width); + OUT_RING (chan, surf->height); + } +#endif + + return 0; +} + +int +nv50_surface_do_copy(struct nv50_screen *screen, struct pipe_surface *dst, + int dx, int dy, struct pipe_surface *src, int sx, int sy, + int w, int h) +{ + struct nouveau_channel *chan = screen->nvws->channel; + struct nouveau_grobj *eng2d = screen->eng2d; + int ret; + + WAIT_RING (chan, 32); + + ret = nv50_surface_set(screen, dst, 1); + if (ret) + return ret; + + ret = nv50_surface_set(screen, src, 0); + if (ret) + return ret; + + BEGIN_RING(chan, eng2d, 0x088c, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, eng2d, NV50_2D_BLIT_DST_X, 4); + OUT_RING (chan, dx); + OUT_RING (chan, dy); + OUT_RING (chan, w); + OUT_RING (chan, h); + BEGIN_RING(chan, eng2d, 0x08c0, 4); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + OUT_RING (chan, 1); + BEGIN_RING(chan, eng2d, 0x08d0, 4); + OUT_RING (chan, 0); + OUT_RING (chan, sx); + OUT_RING (chan, 0); + OUT_RING (chan, sy); + + return 0; +} + +static void +nv50_surface_copy(struct pipe_context *pipe, boolean flip, + struct pipe_surface *dest, unsigned destx, unsigned desty, + struct pipe_surface *src, unsigned srcx, unsigned srcy, + unsigned width, unsigned height) +{ + struct nv50_context *nv50 = (struct nv50_context *)pipe; + struct nv50_screen *screen = nv50->screen; + + assert(src->format == dest->format); + + if (flip) { + desty += height; + while (height--) { + nv50_surface_do_copy(screen, dest, destx, desty--, src, + srcx, srcy++, width, 1); + } + } else { + nv50_surface_do_copy(screen, dest, destx, desty, src, srcx, + srcy, width, height); + } +} + +static void +nv50_surface_fill(struct pipe_context *pipe, struct pipe_surface *dest, + unsigned destx, unsigned desty, unsigned width, + unsigned height, unsigned value) +{ + struct nv50_context *nv50 = (struct nv50_context *)pipe; + struct nv50_screen *screen = nv50->screen; + struct nouveau_channel *chan = screen->nvws->channel; + struct nouveau_grobj *eng2d = screen->eng2d; + int format, ret; + + format = nv50_format(dest->format); + if (format < 0) + return; + + WAIT_RING (chan, 32); + + ret = nv50_surface_set(screen, dest, 1); + if (ret) + return; + + BEGIN_RING(chan, eng2d, 0x0580, 3); + OUT_RING (chan, 4); + OUT_RING (chan, format); + OUT_RING (chan, value); + BEGIN_RING(chan, eng2d, NV50_2D_RECT_X1, 4); + OUT_RING (chan, destx); + OUT_RING (chan, desty); + OUT_RING (chan, width); + OUT_RING (chan, height); +} + +void +nv50_init_surface_functions(struct nv50_context *nv50) +{ + nv50->pipe.surface_copy = nv50_surface_copy; + nv50->pipe.surface_fill = nv50_surface_fill; +} + + diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c new file mode 100644 index 0000000000..31bf59675e --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_tex.c @@ -0,0 +1,156 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "nv50_context.h" +#include "nv50_texture.h" + +#include "nouveau/nouveau_stateobj.h" + +static int +nv50_tex_construct(struct nouveau_stateobj *so, struct nv50_miptree *mt) +{ + switch (mt->base.format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_8_8_8_8); + break; + case PIPE_FORMAT_A1R5G5B5_UNORM: + so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_1_5_5_5); + break; + case PIPE_FORMAT_A4R4G4B4_UNORM: + so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_4_4_4_4); + break; + case PIPE_FORMAT_R5G6B5_UNORM: + so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_5_6_5); + break; + case PIPE_FORMAT_L8_UNORM: + so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C0 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_8); + break; + case PIPE_FORMAT_A8_UNORM: + so_data(so, NV50TIC_0_0_MAPA_C0 | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_ZERO | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_ZERO | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_ZERO | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_8); + break; + case PIPE_FORMAT_I8_UNORM: + so_data(so, NV50TIC_0_0_MAPA_C0 | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C0 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_8); + break; + case PIPE_FORMAT_A8L8_UNORM: + so_data(so, NV50TIC_0_0_MAPA_C1 | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C0 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C0 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_8_8); + break; + case PIPE_FORMAT_DXT1_RGB: + so_data(so, NV50TIC_0_0_MAPA_ONE | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_DXT1); + break; + case PIPE_FORMAT_DXT1_RGBA: + so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_DXT1); + break; + case PIPE_FORMAT_DXT3_RGBA: + so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_DXT3); + break; + case PIPE_FORMAT_DXT5_RGBA: + so_data(so, NV50TIC_0_0_MAPA_C3 | NV50TIC_0_0_TYPEA_UNORM | + NV50TIC_0_0_MAPR_C0 | NV50TIC_0_0_TYPER_UNORM | + NV50TIC_0_0_MAPG_C1 | NV50TIC_0_0_TYPEG_UNORM | + NV50TIC_0_0_MAPB_C2 | NV50TIC_0_0_TYPEB_UNORM | + NV50TIC_0_0_FMT_DXT5); + break; + default: + return 1; + } + + so_reloc(so, mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW | + NOUVEAU_BO_RD, 0, 0); + so_data (so, 0xd0005000); + so_data (so, 0x00300000); + so_data (so, mt->base.width[0]); + so_data (so, (mt->base.depth[0] << 16) | mt->base.height[0]); + so_data (so, 0x03000000); + so_reloc(so, mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH | + NOUVEAU_BO_RD, 0, 0); + + return 0; +} + +void +nv50_tex_validate(struct nv50_context *nv50) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_stateobj *so; + int unit; + + so = so_new(nv50->miptree_nr * 8 + 3, nv50->miptree_nr * 2); + so_method(so, tesla, 0x0f00, 1); + so_data (so, NV50_CB_TIC); + so_method(so, tesla, 0x40000f04, nv50->miptree_nr * 8); + for (unit = 0; unit < nv50->miptree_nr; unit++) { + struct nv50_miptree *mt = nv50->miptree[unit]; + + if (nv50_tex_construct(so, mt)) { + NOUVEAU_ERR("failed tex validate\n"); + so_ref(NULL, &so); + return; + } + } + + so_ref(so, &nv50->state.tic_upload); +} + diff --git a/src/gallium/drivers/nv50/nv50_texture.h b/src/gallium/drivers/nv50/nv50_texture.h new file mode 100644 index 0000000000..aca622c73b --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_texture.h @@ -0,0 +1,129 @@ +#ifndef __NV50_TEXTURE_H__ +#define __NV50_TEXTURE_H__ + +/* It'd be really nice to have these in nouveau_class.h generated by + * renouveau like the rest of the object header - but not sure it can + * handle non-object stuff nicely - need to look into it. + */ + +/* Texture image control block */ +#define NV50TIC_0_0_MAPA_MASK 0x38000000 +#define NV50TIC_0_0_MAPA_ZERO 0x00000000 +#define NV50TIC_0_0_MAPA_C0 0x10000000 +#define NV50TIC_0_0_MAPA_C1 0x18000000 +#define NV50TIC_0_0_MAPA_C2 0x20000000 +#define NV50TIC_0_0_MAPA_C3 0x28000000 +#define NV50TIC_0_0_MAPA_ONE 0x38000000 +#define NV50TIC_0_0_MAPR_MASK 0x07000000 +#define NV50TIC_0_0_MAPR_ZERO 0x00000000 +#define NV50TIC_0_0_MAPR_C0 0x02000000 +#define NV50TIC_0_0_MAPR_C1 0x03000000 +#define NV50TIC_0_0_MAPR_C2 0x04000000 +#define NV50TIC_0_0_MAPR_C3 0x05000000 +#define NV50TIC_0_0_MAPR_ONE 0x07000000 +#define NV50TIC_0_0_MAPG_MASK 0x00e00000 +#define NV50TIC_0_0_MAPG_ZERO 0x00000000 +#define NV50TIC_0_0_MAPG_C0 0x00400000 +#define NV50TIC_0_0_MAPG_C1 0x00600000 +#define NV50TIC_0_0_MAPG_C2 0x00800000 +#define NV50TIC_0_0_MAPG_C3 0x00a00000 +#define NV50TIC_0_0_MAPG_ONE 0x00e00000 +#define NV50TIC_0_0_MAPB_MASK 0x001c0000 +#define NV50TIC_0_0_MAPB_ZERO 0x00000000 +#define NV50TIC_0_0_MAPB_C0 0x00080000 +#define NV50TIC_0_0_MAPB_C1 0x000c0000 +#define NV50TIC_0_0_MAPB_C2 0x00100000 +#define NV50TIC_0_0_MAPB_C3 0x00140000 +#define NV50TIC_0_0_MAPB_ONE 0x001c0000 +#define NV50TIC_0_0_TYPEA_MASK 0x00038000 +#define NV50TIC_0_0_TYPEA_UNORM 0x00010000 +#define NV50TIC_0_0_TYPER_MASK 0x00007000 +#define NV50TIC_0_0_TYPER_UNORM 0x00002000 +#define NV50TIC_0_0_TYPEG_MASK 0x00000e00 +#define NV50TIC_0_0_TYPEG_UNORM 0x00000400 +#define NV50TIC_0_0_TYPEB_MASK 0x000001c0 +#define NV50TIC_0_0_TYPEB_UNORM 0x00000080 +#define NV50TIC_0_0_FMT_MASK 0x0000003c +#define NV50TIC_0_0_FMT_8_8_8_8 0x00000008 +#define NV50TIC_0_0_FMT_4_4_4_4 0x00000012 +#define NV50TIC_0_0_FMT_1_5_5_5 0x00000013 +#define NV50TIC_0_0_FMT_5_6_5 0x00000015 +#define NV50TIC_0_0_FMT_8_8 0x00000018 +#define NV50TIC_0_0_FMT_8 0x0000001d +#define NV50TIC_0_0_FMT_DXT1 0x00000024 +#define NV50TIC_0_0_FMT_DXT3 0x00000025 +#define NV50TIC_0_0_FMT_DXT5 0x00000026 + +#define NV50TIC_0_1_OFFSET_LOW_MASK 0xffffffff +#define NV50TIC_0_1_OFFSET_LOW_SHIFT 0 + +#define NV50TIC_0_2_UNKNOWN_MASK 0xffffffff + +#define NV50TIC_0_3_UNKNOWN_MASK 0xffffffff + +#define NV50TIC_0_4_WIDTH_MASK 0x0000ffff +#define NV50TIC_0_4_WIDTH_SHIFT 0 + +#define NV50TIC_0_5_DEPTH_MASK 0xffff0000 +#define NV50TIC_0_5_DEPTH_SHIFT 16 +#define NV50TIC_0_5_HEIGHT_MASK 0x0000ffff +#define NV50TIC_0_5_HEIGHT_SHIFT 0 + +#define NV50TIC_0_6_UNKNOWN_MASK 0xffffffff + +#define NV50TIC_0_7_OFFSET_HIGH_MASK 0xffffffff +#define NV50TIC_0_7_OFFSET_HIGH_SHIFT 0 + +/* Texture sampler control block */ +#define NV50TSC_1_0_WRAPS_MASK 0x00000007 +#define NV50TSC_1_0_WRAPS_REPEAT 0x00000000 +#define NV50TSC_1_0_WRAPS_MIRROR_REPEAT 0x00000001 +#define NV50TSC_1_0_WRAPS_CLAMP_TO_EDGE 0x00000002 +#define NV50TSC_1_0_WRAPS_CLAMP_TO_BORDER 0x00000003 +#define NV50TSC_1_0_WRAPS_CLAMP 0x00000004 +#define NV50TSC_1_0_WRAPS_MIRROR_CLAMP_TO_EDGE 0x00000005 +#define NV50TSC_1_0_WRAPS_MIRROR_CLAMP_TO_BORDER 0x00000006 +#define NV50TSC_1_0_WRAPS_MIRROR_CLAMP 0x00000007 +#define NV50TSC_1_0_WRAPT_MASK 0x00000038 +#define NV50TSC_1_0_WRAPT_REPEAT 0x00000000 +#define NV50TSC_1_0_WRAPT_MIRROR_REPEAT 0x00000008 +#define NV50TSC_1_0_WRAPT_CLAMP_TO_EDGE 0x00000010 +#define NV50TSC_1_0_WRAPT_CLAMP_TO_BORDER 0x00000018 +#define NV50TSC_1_0_WRAPT_CLAMP 0x00000020 +#define NV50TSC_1_0_WRAPT_MIRROR_CLAMP_TO_EDGE 0x00000028 +#define NV50TSC_1_0_WRAPT_MIRROR_CLAMP_TO_BORDER 0x00000030 +#define NV50TSC_1_0_WRAPT_MIRROR_CLAMP 0x00000038 +#define NV50TSC_1_0_WRAPR_MASK 0x000001c0 +#define NV50TSC_1_0_WRAPR_REPEAT 0x00000000 +#define NV50TSC_1_0_WRAPR_MIRROR_REPEAT 0x00000040 +#define NV50TSC_1_0_WRAPR_CLAMP_TO_EDGE 0x00000080 +#define NV50TSC_1_0_WRAPR_CLAMP_TO_BORDER 0x000000c0 +#define NV50TSC_1_0_WRAPR_CLAMP 0x00000100 +#define NV50TSC_1_0_WRAPR_MIRROR_CLAMP_TO_EDGE 0x00000140 +#define NV50TSC_1_0_WRAPR_MIRROR_CLAMP_TO_BORDER 0x00000180 +#define NV50TSC_1_0_WRAPR_MIRROR_CLAMP 0x000001c0 + +#define NV50TSC_1_1_MAGF_MASK 0x00000003 +#define NV50TSC_1_1_MAGF_NEAREST 0x00000001 +#define NV50TSC_1_1_MAGF_LINEAR 0x00000002 +#define NV50TSC_1_1_MINF_MASK 0x00000030 +#define NV50TSC_1_1_MINF_NEAREST 0x00000010 +#define NV50TSC_1_1_MINF_LINEAR 0x00000020 +#define NV50TSC_1_1_MIPF_MASK 0x000000c0 +#define NV50TSC_1_1_MIPF_NONE 0x00000040 +#define NV50TSC_1_1_MIPF_NEAREST 0x00000080 +#define NV50TSC_1_1_MIPF_LINEAR 0x000000c0 + +#define NV50TSC_1_2_UNKNOWN_MASK 0xffffffff + +#define NV50TSC_1_3_UNKNOWN_MASK 0xffffffff + +#define NV50TSC_1_4_UNKNOWN_MASK 0xffffffff + +#define NV50TSC_1_5_UNKNOWN_MASK 0xffffffff + +#define NV50TSC_1_6_UNKNOWN_MASK 0xffffffff + +#define NV50TSC_1_7_UNKNOWN_MASK 0xffffffff + +#endif diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c new file mode 100644 index 0000000000..a00c999510 --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_transfer.c @@ -0,0 +1,216 @@ + +#include "pipe/p_context.h" +#include "pipe/p_inlines.h" + +#include "nv50_context.h" + +struct nv50_transfer { + struct pipe_transfer base; + struct pipe_buffer *buffer; + struct nv50_miptree_level *level; + int level_pitch; + int level_width; + int level_height; + int level_x; + int level_y; +}; + +static void +nv50_transfer_rect_m2mf(struct pipe_screen *pscreen, struct pipe_buffer *src, + int src_pitch, int sx, int sy, int sw, int sh, + struct pipe_buffer *dst, int dst_pitch, int dx, int dy, + int dw, int dh, int cpp, int width, int height, + unsigned src_reloc, unsigned dst_reloc) +{ + struct nv50_screen *screen = nv50_screen(pscreen); + struct nouveau_winsys *nvws = screen->nvws; + struct nouveau_channel *chan = nvws->channel; + struct nouveau_grobj *m2mf = screen->m2mf; + struct nouveau_bo *src_bo = nvws->get_bo(src); + struct nouveau_bo *dst_bo = nvws->get_bo(dst); + unsigned src_offset = 0, dst_offset = 0; + + src_reloc |= NOUVEAU_BO_RD; + dst_reloc |= NOUVEAU_BO_WR; + + WAIT_RING (chan, 14); + + if (!src_bo->tiled) { + BEGIN_RING(chan, m2mf, 0x0200, 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, m2mf, 0x0314, 1); + OUT_RING (chan, src_pitch); + src_offset = (sy * src_pitch) + (sx * cpp); + } else { + BEGIN_RING(chan, m2mf, 0x0200, 6); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, sw * cpp); + OUT_RING (chan, sh); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + } + + if (!dst_bo->tiled) { + BEGIN_RING(chan, m2mf, 0x021c, 1); + OUT_RING (chan, 1); + BEGIN_RING(chan, m2mf, 0x0318, 1); + OUT_RING (chan, dst_pitch); + dst_offset = (dy * dst_pitch) + (dx * cpp); + } else { + BEGIN_RING(chan, m2mf, 0x021c, 6); + OUT_RING (chan, 0); + OUT_RING (chan, 0); + OUT_RING (chan, dw * cpp); + OUT_RING (chan, dh); + OUT_RING (chan, 1); + OUT_RING (chan, 0); + } + + while (height) { + int line_count = height > 2047 ? 2047 : height; + + WAIT_RING (chan, 15); + BEGIN_RING(chan, m2mf, 0x0238, 2); + OUT_RELOCh(chan, src_bo, src_offset, src_reloc); + OUT_RELOCh(chan, dst_bo, dst_offset, dst_reloc); + BEGIN_RING(chan, m2mf, 0x030c, 2); + OUT_RELOCl(chan, src_bo, src_offset, src_reloc); + OUT_RELOCl(chan, dst_bo, dst_offset, dst_reloc); + if (src_bo->tiled) { + BEGIN_RING(chan, m2mf, 0x0218, 1); + OUT_RING (chan, (dy << 16) | sx); + } else { + src_offset += (line_count * src_pitch); + } + if (dst_bo->tiled) { + BEGIN_RING(chan, m2mf, 0x0234, 1); + OUT_RING (chan, (sy << 16) | dx); + } else { + dst_offset += (line_count * dst_pitch); + } + BEGIN_RING(chan, m2mf, 0x031c, 4); + OUT_RING (chan, width * cpp); + OUT_RING (chan, line_count); + OUT_RING (chan, 0x00000101); + OUT_RING (chan, 0); + FIRE_RING (chan); + + height -= line_count; + sy += line_count; + dy += line_count; + } +} + +static struct pipe_transfer * +nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice, + enum pipe_transfer_usage usage, + unsigned x, unsigned y, unsigned w, unsigned h) +{ + struct nv50_miptree *mt = nv50_miptree(pt); + struct nv50_miptree_level *lvl = &mt->level[level]; + struct nv50_transfer *tx; + unsigned image = 0; + + if (pt->target == PIPE_TEXTURE_CUBE) + image = face; + else + if (pt->target == PIPE_TEXTURE_3D) + image = zslice; + + tx = CALLOC_STRUCT(nv50_transfer); + if (!tx) + return NULL; + + tx->base.refcount = 1; + pipe_texture_reference(&tx->base.texture, pt); + tx->base.format = pt->format; + tx->base.width = w; + tx->base.height = h; + tx->base.block = pt->block; + tx->base.nblocksx = pt->nblocksx[level]; + tx->base.nblocksy = pt->nblocksy[level]; + tx->base.stride = (w * pt->block.size); + tx->base.usage = usage; + + tx->level = lvl; + tx->level_pitch = lvl->pitch; + tx->level_width = mt->base.width[level]; + tx->level_height = mt->base.height[level]; + tx->level_x = x; + tx->level_y = y; + tx->buffer = + pipe_buffer_create(pscreen, 0, NOUVEAU_BUFFER_USAGE_TRANSFER, + w * tx->base.block.size * h); + + if (usage != PIPE_TRANSFER_WRITE) { + nv50_transfer_rect_m2mf(pscreen, mt->buffer, tx->level_pitch, + x, y, tx->level_width, tx->level_height, + tx->buffer, tx->base.stride, 0, 0, + tx->base.width, tx->base.height, + tx->base.block.size, w, h, + NOUVEAU_BO_VRAM | NOUVEAU_BO_GART, + NOUVEAU_BO_GART); + } + + return &tx->base; +} + +static void +nv50_transfer_del(struct pipe_screen *pscreen, struct pipe_transfer **pptx) +{ + struct pipe_transfer *ptx = *pptx; + struct nv50_transfer *tx = (struct nv50_transfer *)ptx; + struct nv50_miptree *mt = nv50_miptree(ptx->texture); + + *pptx = NULL; + if (--ptx->refcount) + return; + + if (ptx->usage != PIPE_TRANSFER_READ) { + nv50_transfer_rect_m2mf(pscreen, tx->buffer, tx->base.stride, + 0, 0, tx->base.width, tx->base.height, + mt->buffer, tx->level_pitch, + tx->level_x, tx->level_y, + tx->level_width, tx->level_height, + tx->base.block.size, tx->base.width, + tx->base.height, NOUVEAU_BO_GART, + NOUVEAU_BO_VRAM | NOUVEAU_BO_GART); + } + + pipe_buffer_reference(pscreen, &tx->buffer, NULL); + pipe_texture_reference(&ptx->texture, NULL); + FREE(ptx); +} + +static void * +nv50_transfer_map(struct pipe_screen *pscreen, struct pipe_transfer *ptx) +{ + struct nv50_transfer *tx = (struct nv50_transfer *)ptx; + unsigned flags = 0; + + if (ptx->usage & PIPE_TRANSFER_WRITE) + flags |= PIPE_BUFFER_USAGE_CPU_WRITE; + if (ptx->usage & PIPE_TRANSFER_READ) + flags |= PIPE_BUFFER_USAGE_CPU_READ; + + return pipe_buffer_map(pscreen, tx->buffer, flags); +} + +static void +nv50_transfer_unmap(struct pipe_screen *pscreen, struct pipe_transfer *ptx) +{ + struct nv50_transfer *tx = (struct nv50_transfer *)ptx; + + pipe_buffer_unmap(pscreen, tx->buffer); +} + +void +nv50_transfer_init_screen_functions(struct pipe_screen *pscreen) +{ + pscreen->get_tex_transfer = nv50_transfer_new; + pscreen->tex_transfer_release = nv50_transfer_del; + pscreen->transfer_map = nv50_transfer_map; + pscreen->transfer_unmap = nv50_transfer_unmap; +} diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c new file mode 100644 index 0000000000..08d751dddb --- /dev/null +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -0,0 +1,254 @@ +/* + * Copyright 2008 Ben Skeggs + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF + * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "pipe/p_context.h" +#include "pipe/p_state.h" + +#include "nv50_context.h" + +static INLINE unsigned +nv50_prim(unsigned mode) +{ + switch (mode) { + case PIPE_PRIM_POINTS: return NV50TCL_VERTEX_BEGIN_POINTS; + case PIPE_PRIM_LINES: return NV50TCL_VERTEX_BEGIN_LINES; + case PIPE_PRIM_LINE_LOOP: return NV50TCL_VERTEX_BEGIN_LINE_LOOP; + case PIPE_PRIM_LINE_STRIP: return NV50TCL_VERTEX_BEGIN_LINE_STRIP; + case PIPE_PRIM_TRIANGLES: return NV50TCL_VERTEX_BEGIN_TRIANGLES; + case PIPE_PRIM_TRIANGLE_STRIP: + return NV50TCL_VERTEX_BEGIN_TRIANGLE_STRIP; + case PIPE_PRIM_TRIANGLE_FAN: return NV50TCL_VERTEX_BEGIN_TRIANGLE_FAN; + case PIPE_PRIM_QUADS: return NV50TCL_VERTEX_BEGIN_QUADS; + case PIPE_PRIM_QUAD_STRIP: return NV50TCL_VERTEX_BEGIN_QUAD_STRIP; + case PIPE_PRIM_POLYGON: return NV50TCL_VERTEX_BEGIN_POLYGON; + default: + break; + } + + NOUVEAU_ERR("invalid primitive type %d\n", mode); + return NV50TCL_VERTEX_BEGIN_POINTS; +} + +boolean +nv50_draw_arrays(struct pipe_context *pipe, unsigned mode, unsigned start, + unsigned count) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_channel *chan = nv50->screen->nvws->channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; + + nv50_state_validate(nv50); + + BEGIN_RING(chan, tesla, 0x142c, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, tesla, 0x142c, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, tesla, 0x1440, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, tesla, 0x1334, 1); + OUT_RING (chan, 0); + + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); + OUT_RING (chan, nv50_prim(mode)); + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BUFFER_FIRST, 2); + OUT_RING (chan, start); + OUT_RING (chan, count); + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); + OUT_RING (chan, 0); + + pipe->flush(pipe, 0, NULL); + return TRUE; +} + +static INLINE void +nv50_draw_elements_inline_u08(struct nv50_context *nv50, uint8_t *map, + unsigned start, unsigned count) +{ + struct nouveau_channel *chan = nv50->screen->nvws->channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; + + map += start; + + if (count & 1) { + BEGIN_RING(chan, tesla, 0x15e8, 1); + OUT_RING (chan, map[0]); + map++; + count--; + } + + while (count) { + unsigned nr = count > 2046 ? 2046 : count; + int i; + + BEGIN_RING(chan, tesla, 0x400015f0, nr >> 1); + for (i = 0; i < nr; i += 2) + OUT_RING (chan, (map[1] << 16) | map[0]); + + count -= nr; + map += nr; + } +} + +static INLINE void +nv50_draw_elements_inline_u16(struct nv50_context *nv50, uint16_t *map, + unsigned start, unsigned count) +{ + struct nouveau_channel *chan = nv50->screen->nvws->channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; + + map += start; + + if (count & 1) { + BEGIN_RING(chan, tesla, 0x15e8, 1); + OUT_RING (chan, map[0]); + map++; + count--; + } + + while (count) { + unsigned nr = count > 2046 ? 2046 : count; + int i; + + BEGIN_RING(chan, tesla, 0x400015f0, nr >> 1); + for (i = 0; i < nr; i += 2) + OUT_RING (chan, (map[1] << 16) | map[0]); + + count -= nr; + map += nr; + } +} + +static INLINE void +nv50_draw_elements_inline_u32(struct nv50_context *nv50, uint8_t *map, + unsigned start, unsigned count) +{ + struct nouveau_channel *chan = nv50->screen->nvws->channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; + + map += start; + + while (count) { + unsigned nr = count > 2047 ? 2047 : count; + + BEGIN_RING(chan, tesla, 0x400015e8, nr); + OUT_RINGp (chan, map, nr); + + count -= nr; + map += nr; + } +} + +boolean +nv50_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, unsigned indexSize, + unsigned mode, unsigned start, unsigned count) +{ + struct nv50_context *nv50 = nv50_context(pipe); + struct nouveau_channel *chan = nv50->screen->nvws->channel; + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct pipe_winsys *ws = pipe->winsys; + void *map = ws->buffer_map(ws, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); + + nv50_state_validate(nv50); + + BEGIN_RING(chan, tesla, 0x142c, 1); + OUT_RING (chan, 0); + BEGIN_RING(chan, tesla, 0x142c, 1); + OUT_RING (chan, 0); + + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_BEGIN, 1); + OUT_RING (chan, nv50_prim(mode)); + switch (indexSize) { + case 1: + nv50_draw_elements_inline_u08(nv50, map, start, count); + break; + case 2: + nv50_draw_elements_inline_u16(nv50, map, start, count); + break; + case 4: + nv50_draw_elements_inline_u32(nv50, map, start, count); + break; + default: + assert(0); + } + BEGIN_RING(chan, tesla, NV50TCL_VERTEX_END, 1); + OUT_RING (chan, 0); + + pipe->flush(pipe, 0, NULL); + return TRUE; +} + +void +nv50_vbo_validate(struct nv50_context *nv50) +{ + struct nouveau_grobj *tesla = nv50->screen->tesla; + struct nouveau_stateobj *vtxbuf, *vtxfmt; + int i; + + vtxbuf = so_new(nv50->vtxelt_nr * 4, nv50->vtxelt_nr * 2); + vtxfmt = so_new(nv50->vtxelt_nr + 1, 0); + so_method(vtxfmt, tesla, 0x1ac0, nv50->vtxelt_nr); + + for (i = 0; i < nv50->vtxelt_nr; i++) { + struct pipe_vertex_element *ve = &nv50->vtxelt[i]; + struct pipe_vertex_buffer *vb = + &nv50->vtxbuf[ve->vertex_buffer_index]; + + switch (ve->src_format) { + case PIPE_FORMAT_R32G32B32A32_FLOAT: + so_data(vtxfmt, 0x7e080000 | i); + break; + case PIPE_FORMAT_R32G32B32_FLOAT: + so_data(vtxfmt, 0x7e100000 | i); + break; + case PIPE_FORMAT_R32G32_FLOAT: + so_data(vtxfmt, 0x7e200000 | i); + break; + case PIPE_FORMAT_R32_FLOAT: + so_data(vtxfmt, 0x7e900000 | i); + break; + case PIPE_FORMAT_R8G8B8A8_UNORM: + so_data(vtxfmt, 0x24500000 | i); + break; + default: + { + NOUVEAU_ERR("invalid vbo format %s\n", + pf_name(ve->src_format)); + assert(0); + return; + } + } + + so_method(vtxbuf, tesla, 0x900 + (i * 16), 3); + so_data (vtxbuf, 0x20000000 | vb->stride); + so_reloc (vtxbuf, vb->buffer, vb->buffer_offset + + ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | + NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); + so_reloc (vtxbuf, vb->buffer, vb->buffer_offset + + ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | + NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); + } + + so_ref (vtxfmt, &nv50->state.vtxfmt); + so_ref (vtxbuf, &nv50->state.vtxbuf); +} + diff --git a/src/gallium/drivers/r300/Makefile b/src/gallium/drivers/r300/Makefile new file mode 100644 index 0000000000..9b7524b523 --- /dev/null +++ b/src/gallium/drivers/r300/Makefile @@ -0,0 +1,20 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = r300 + +C_SOURCES = \ + r300_chipset.c \ + r300_clear.c \ + r300_context.c \ + r300_emit.c \ + r300_flush.c \ + r300_screen.c \ + r300_state.c \ + r300_state_derived.c \ + r300_state_shader.c \ + r300_surface.c \ + r300_swtcl_emit.c \ + r300_texture.c + +include ../../Makefile.template diff --git a/src/gallium/drivers/r300/SConscript b/src/gallium/drivers/r300/SConscript new file mode 100644 index 0000000000..18684c3e7f --- /dev/null +++ b/src/gallium/drivers/r300/SConscript @@ -0,0 +1,17 @@ +Import('*') + +env = env.Clone() + +r300 = env.ConvenienceLibrary( + target = 'r300', + source = [ + 'r300_blit.c', + 'r300_clear.c', + 'r300_context.c', + 'r300_screen.c', + 'r300_state.c', + 'r300_surface.c', + ]) + +Export('r300') + diff --git a/src/gallium/drivers/r300/r300_chipset.c b/src/gallium/drivers/r300/r300_chipset.c new file mode 100644 index 0000000000..196537a432 --- /dev/null +++ b/src/gallium/drivers/r300/r300_chipset.c @@ -0,0 +1,348 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "r300_chipset.h" +#include "util/u_debug.h" + +/* r300_chipset: A file all to itself for deducing the various properties of + * Radeons. */ + +/* Parse a PCI ID and fill an r300_capabilities struct with information. */ +void r300_parse_chipset(struct r300_capabilities* caps) +{ + /* Reasonable defaults */ + caps->has_tcl = TRUE; + caps->is_r500 = FALSE; + caps->num_vert_fpus = 4; + + + /* Note: These are not ordered by PCI ID. I leave that task to GCC, + * which will perform the ordering while collating jump tables. Instead, + * I've tried to group them according to capabilities and age. */ + switch (caps->pci_id) { + case 0x4144: + caps->family = CHIP_FAMILY_R300; + break; + + case 0x4145: + case 0x4146: + case 0x4147: + case 0x4E44: + case 0x4E45: + case 0x4E46: + case 0x4E47: + caps->family = CHIP_FAMILY_R300; + break; + + case 0x4150: + case 0x4151: + case 0x4152: + case 0x4153: + case 0x4154: + case 0x4155: + case 0x4156: + case 0x4E50: + case 0x4E51: + case 0x4E52: + case 0x4E53: + case 0x4E54: + case 0x4E56: + caps->family = CHIP_FAMILY_RV350; + break; + + case 0x4148: + case 0x4149: + case 0x414A: + case 0x414B: + case 0x4E48: + case 0x4E49: + case 0x4E4B: + caps->family = CHIP_FAMILY_R350; + break; + + case 0x4E4A: + caps->family = CHIP_FAMILY_R360; + break; + + case 0x5460: + case 0x5462: + case 0x5464: + case 0x5B60: + case 0x5B62: + case 0x5B63: + case 0x5B64: + case 0x5B65: + caps->family = CHIP_FAMILY_RV370; + break; + + case 0x3150: + case 0x3152: + case 0x3154: + case 0x3E50: + case 0x3E54: + caps->family = CHIP_FAMILY_RV380; + break; + + case 0x4A48: + case 0x4A49: + case 0x4A4A: + case 0x4A4B: + case 0x4A4C: + case 0x4A4D: + case 0x4A4E: + case 0x4A4F: + case 0x4A50: + case 0x4A54: + caps->family = CHIP_FAMILY_R420; + caps->num_vert_fpus = 6; + break; + + case 0x5548: + case 0x5549: + case 0x554A: + case 0x554B: + case 0x5550: + case 0x5551: + case 0x5552: + case 0x5554: + case 0x5D57: + caps->family = CHIP_FAMILY_R423; + caps->num_vert_fpus = 6; + break; + + case 0x554C: + case 0x554D: + case 0x554E: + case 0x554F: + case 0x5D48: + case 0x5D49: + case 0x5D4A: + caps->family = CHIP_FAMILY_R430; + caps->num_vert_fpus = 6; + break; + + case 0x5D4C: + case 0x5D4D: + case 0x5D4E: + case 0x5D4F: + case 0x5D50: + case 0x5D52: + caps->family = CHIP_FAMILY_R480; + caps->num_vert_fpus = 6; + break; + + case 0x4B49: + case 0x4B4A: + case 0x4B4B: + case 0x4B4C: + caps->family = CHIP_FAMILY_R481; + caps->num_vert_fpus = 6; + break; + + case 0x5E4C: + case 0x5E4F: + case 0x564A: + case 0x564B: + case 0x564F: + case 0x5652: + case 0x5653: + case 0x5657: + case 0x5E48: + case 0x5E4A: + case 0x5E4B: + case 0x5E4D: + caps->family = CHIP_FAMILY_RV410; + caps->num_vert_fpus = 6; + break; + + case 0x5954: + case 0x5955: + caps->family = CHIP_FAMILY_RS480; + caps->has_tcl = FALSE; + break; + + case 0x5974: + case 0x5975: + caps->family = CHIP_FAMILY_RS482; + caps->has_tcl = FALSE; + break; + + case 0x5A41: + case 0x5A42: + caps->family = CHIP_FAMILY_RS400; + caps->has_tcl = FALSE; + break; + + case 0x5A61: + case 0x5A62: + caps->family = CHIP_FAMILY_RC410; + caps->has_tcl = FALSE; + break; + + case 0x791E: + case 0x791F: + caps->family = CHIP_FAMILY_RS690; + caps->has_tcl = FALSE; + break; + + case 0x796C: + case 0x796D: + case 0x796E: + case 0x796F: + caps->family = CHIP_FAMILY_RS740; + caps->has_tcl = FALSE; + break; + + case 0x7100: + case 0x7101: + case 0x7102: + case 0x7103: + case 0x7104: + case 0x7105: + case 0x7106: + case 0x7108: + case 0x7109: + case 0x710A: + case 0x710B: + case 0x710C: + case 0x710E: + case 0x710F: + caps->family = CHIP_FAMILY_R520; + caps->num_vert_fpus = 8; + caps->is_r500 = TRUE; + break; + + case 0x7140: + case 0x7141: + case 0x7142: + case 0x7143: + case 0x7144: + case 0x7145: + case 0x7146: + case 0x7147: + case 0x7149: + case 0x714A: + case 0x714B: + case 0x714C: + case 0x714D: + case 0x714E: + case 0x714F: + case 0x7151: + case 0x7152: + case 0x7153: + case 0x715E: + case 0x715F: + case 0x7180: + case 0x7181: + case 0x7183: + case 0x7186: + case 0x7187: + case 0x7188: + case 0x718A: + case 0x718B: + case 0x718C: + case 0x718D: + case 0x718F: + case 0x7193: + case 0x7196: + case 0x719B: + case 0x719F: + case 0x7200: + case 0x7210: + case 0x7211: + caps->family = CHIP_FAMILY_RV515; + caps->num_vert_fpus = 2; + caps->is_r500 = TRUE; + break; + + case 0x71C0: + case 0x71C1: + case 0x71C2: + case 0x71C3: + case 0x71C4: + case 0x71C5: + case 0x71C6: + case 0x71C7: + case 0x71CD: + case 0x71CE: + case 0x71D2: + case 0x71D4: + case 0x71D5: + case 0x71D6: + case 0x71DA: + case 0x71DE: + caps->family = CHIP_FAMILY_RV530; + caps->num_vert_fpus = 5; + caps->is_r500 = TRUE; + break; + + case 0x7240: + case 0x7243: + case 0x7244: + case 0x7245: + case 0x7246: + case 0x7247: + case 0x7248: + case 0x7249: + case 0x724A: + case 0x724B: + case 0x724C: + case 0x724D: + case 0x724E: + case 0x724F: + case 0x7284: + caps->family = CHIP_FAMILY_R580; + caps->num_vert_fpus = 8; + caps->is_r500 = TRUE; + break; + + case 0x7280: + caps->family = CHIP_FAMILY_RV570; + caps->num_vert_fpus = 5; + caps->is_r500 = TRUE; + break; + + case 0x7281: + case 0x7283: + case 0x7287: + case 0x7288: + case 0x7289: + case 0x728B: + case 0x728C: + case 0x7290: + case 0x7291: + case 0x7293: + case 0x7297: + caps->family = CHIP_FAMILY_RV560; + caps->num_vert_fpus = 5; + caps->is_r500 = TRUE; + break; + + default: + debug_printf("r300: Warning: Unknown chipset 0x%x\n", + caps->pci_id); + break; + } + + /* XXX SW TCL is broken so no forcing it off right now + caps->has_tcl = FALSE; */ +} diff --git a/src/gallium/drivers/r300/r300_chipset.h b/src/gallium/drivers/r300/r300_chipset.h new file mode 100644 index 0000000000..a9cd372ec5 --- /dev/null +++ b/src/gallium/drivers/r300/r300_chipset.h @@ -0,0 +1,79 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_CHIPSET_H +#define R300_CHIPSET_H + +#include "pipe/p_compiler.h" + +/* Structure containing all the possible information about a specific Radeon + * in the R3xx, R4xx, and R5xx families. */ +struct r300_capabilities { + /* PCI ID */ + uint32_t pci_id; + /* Chipset family */ + int family; + /* The number of vertex floating-point units */ + int num_vert_fpus; + /* The number of fragment pipes */ + int num_frag_pipes; + /* Whether or not TCL is physically present */ + boolean has_tcl; + /* Whether or not this is an RV515 or newer; R500s have many differences + * that require extra consideration, compared to their R3xx cousins: + * - Extra bit of width and height on texture sizes + * - Blend color is split across two registers + * - Universal Shader (US) block used for fragment shaders */ + boolean is_r500; +}; + +/* Enumerations for legibility and telling which card we're running on. */ +enum { + CHIP_FAMILY_R300 = 0, + CHIP_FAMILY_R350, + CHIP_FAMILY_R360, + CHIP_FAMILY_RV350, + CHIP_FAMILY_RV370, + CHIP_FAMILY_RV380, + CHIP_FAMILY_R420, + CHIP_FAMILY_R423, + CHIP_FAMILY_R430, + CHIP_FAMILY_R480, + CHIP_FAMILY_R481, + CHIP_FAMILY_RV410, + CHIP_FAMILY_RS400, + CHIP_FAMILY_RC410, + CHIP_FAMILY_RS480, + CHIP_FAMILY_RS482, + CHIP_FAMILY_RS690, + CHIP_FAMILY_RS740, + CHIP_FAMILY_RV515, + CHIP_FAMILY_R520, + CHIP_FAMILY_RV530, + CHIP_FAMILY_R580, + CHIP_FAMILY_RV560, + CHIP_FAMILY_RV570 +}; + +void r300_parse_chipset(struct r300_capabilities* caps); + +#endif /* R300_CHIPSET_H */ diff --git a/src/gallium/drivers/r300/r300_clear.c b/src/gallium/drivers/r300/r300_clear.c new file mode 100644 index 0000000000..fd28437aaa --- /dev/null +++ b/src/gallium/drivers/r300/r300_clear.c @@ -0,0 +1,33 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "r300_clear.h" + +/* This gets its own file because Intel's is in its own file. + * I assume there's a good reason. */ +void r300_clear(struct pipe_context* pipe, + struct pipe_surface* ps, + unsigned color) +{ + pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, color); + ps->status = PIPE_SURFACE_STATUS_DEFINED; +}
\ No newline at end of file diff --git a/src/gallium/drivers/r300/r300_clear.h b/src/gallium/drivers/r300/r300_clear.h new file mode 100644 index 0000000000..e24a0690c9 --- /dev/null +++ b/src/gallium/drivers/r300/r300_clear.h @@ -0,0 +1,27 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "pipe/p_context.h" + +void r300_clear(struct pipe_context* pipe, + struct pipe_surface* ps, + unsigned color); diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c new file mode 100644 index 0000000000..15a8751549 --- /dev/null +++ b/src/gallium/drivers/r300/r300_context.c @@ -0,0 +1,141 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "r300_context.h" + +static boolean r300_draw_range_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count) +{ + struct r300_context* r300 = r300_context(pipe); + int i; + + if (r300->dirty_state) { + r300_emit_dirty_state(r300); + } + + for (i = 0; i < r300->vertex_buffer_count; i++) { + void* buf = pipe_buffer_map(pipe->screen, + r300->vertex_buffers[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_vertex_buffer(r300->draw, i, buf); + } + + if (indexBuffer) { + void* indices = pipe_buffer_map(pipe->screen, indexBuffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_element_buffer_range(r300->draw, indexSize, + minIndex, maxIndex, indices); + } else { + draw_set_mapped_element_buffer(r300->draw, 0, NULL); + } + + draw_set_mapped_constant_buffer(r300->draw, + r300->shader_constants[PIPE_SHADER_VERTEX].constants, + r300->shader_constants[PIPE_SHADER_VERTEX].user_count * + (sizeof(float) * 4)); + + /* Abandon all hope, ye who enter here. */ + draw_arrays(r300->draw, mode, start, count); + + for (i = 0; i < r300->vertex_buffer_count; i++) { + pipe_buffer_unmap(pipe->screen, r300->vertex_buffers[i].buffer); + draw_set_mapped_vertex_buffer(r300->draw, i, NULL); + } + + if (indexBuffer) { + pipe_buffer_unmap(pipe->screen, indexBuffer); + draw_set_mapped_element_buffer_range(r300->draw, 0, start, + start + count - 1, NULL); + } + + return true; +} + +static boolean r300_draw_elements(struct pipe_context* pipe, + struct pipe_buffer* indexBuffer, + unsigned indexSize, unsigned mode, + unsigned start, unsigned count) +{ + return r300_draw_range_elements(pipe, indexBuffer, indexSize, 0, ~0, + mode, start, count); +} + +static boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode, + unsigned start, unsigned count) +{ + return r300_draw_elements(pipe, NULL, 0, mode, start, count); +} + +static void r300_destroy_context(struct pipe_context* context) { + struct r300_context* r300 = r300_context(context); + + draw_destroy(r300->draw); + + FREE(r300->blend_color_state); + FREE(r300->scissor_state); + FREE(r300); +} + +struct pipe_context* r300_create_context(struct pipe_screen* screen, + struct pipe_winsys* winsys, + struct r300_winsys* r300_winsys) +{ + struct r300_context* r300 = CALLOC_STRUCT(r300_context); + + if (!r300) + return NULL; + + r300->winsys = r300_winsys; + r300->context.winsys = winsys; + r300->context.screen = r300_create_screen(winsys, r300_winsys); + + r300->context.destroy = r300_destroy_context; + + r300->context.clear = r300_clear; + + r300->context.draw_arrays = r300_draw_arrays; + r300->context.draw_elements = r300_draw_elements; + r300->context.draw_range_elements = r300_draw_range_elements; + + r300->draw = draw_create(); + draw_set_rasterize_stage(r300->draw, r300_draw_swtcl_stage(r300)); + + r300->blend_color_state = CALLOC_STRUCT(r300_blend_color_state); + r300->scissor_state = CALLOC_STRUCT(r300_scissor_state); + + r300_init_flush_functions(r300); + + r300_init_surface_functions(r300); + + r300_init_state_functions(r300); + + r300->dirty_state = R300_NEW_KITCHEN_SINK; + r300->dirty_hw++; + + return &r300->context; +} diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h new file mode 100644 index 0000000000..aaab1dd2bc --- /dev/null +++ b/src/gallium/drivers/r300/r300_context.h @@ -0,0 +1,265 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_CONTEXT_H +#define R300_CONTEXT_H + +#include "draw/draw_context.h" +#include "draw/draw_vertex.h" +#include "pipe/p_context.h" +#include "tgsi/tgsi_scan.h" +#include "util/u_memory.h" + +#include "r300_clear.h" +#include "r300_screen.h" +#include "r300_winsys.h" + +struct r300_blend_state { + uint32_t blend_control; /* R300_RB3D_CBLEND: 0x4e04 */ + uint32_t alpha_blend_control; /* R300_RB3D_ABLEND: 0x4e08 */ + uint32_t rop; /* R300_RB3D_ROPCNTL: 0x4e18 */ + uint32_t dither; /* R300_RB3D_DITHER_CTL: 0x4e50 */ +}; + +struct r300_blend_color_state { + /* RV515 and earlier */ + uint32_t blend_color; /* R300_RB3D_BLEND_COLOR: 0x4e10 */ + /* R520 and newer */ + uint32_t blend_color_red_alpha; /* R500_RB3D_CONSTANT_COLOR_AR: 0x4ef8 */ + uint32_t blend_color_green_blue; /* R500_RB3D_CONSTANT_COLOR_GB: 0x4efc */ +}; + +struct r300_dsa_state { + uint32_t alpha_function; /* R300_FG_ALPHA_FUNC: 0x4bd4 */ + uint32_t alpha_reference; /* R500_FG_ALPHA_VALUE: 0x4be0 */ + uint32_t z_buffer_control; /* R300_ZB_CNTL: 0x4f00 */ + uint32_t z_stencil_control; /* R300_ZB_ZSTENCILCNTL: 0x4f04 */ + uint32_t stencil_ref_mask; /* R300_ZB_STENCILREFMASK: 0x4f08 */ + uint32_t z_buffer_top; /* R300_ZB_ZTOP: 0x4f14 */ + uint32_t stencil_ref_bf; /* R500_ZB_STENCILREFMASK_BF: 0x4fd4 */ +}; + +struct r300_rs_state { + /* XXX icky as fucking hell */ + struct pipe_rasterizer_state rs; + + uint32_t vap_control_status; /* R300_VAP_CNTL_STATUS: 0x2140 */ + uint32_t point_size; /* R300_GA_POINT_SIZE: 0x421c */ + uint32_t line_control; /* R300_GA_LINE_CNTL: 0x4234 */ + uint32_t depth_scale_front; /* R300_SU_POLY_OFFSET_FRONT_SCALE: 0x42a4 */ + uint32_t depth_offset_front;/* R300_SU_POLY_OFFSET_FRONT_OFFSET: 0x42a8 */ + uint32_t depth_scale_back; /* R300_SU_POLY_OFFSET_BACK_SCALE: 0x42ac */ + uint32_t depth_offset_back; /* R300_SU_POLY_OFFSET_BACK_OFFSET: 0x42b0 */ + uint32_t polygon_offset_enable; /* R300_SU_POLY_OFFSET_ENABLE: 0x42b4 */ + uint32_t cull_mode; /* R300_SU_CULL_MODE: 0x42b8 */ + uint32_t line_stipple_config; /* R300_GA_LINE_STIPPLE_CONFIG: 0x4328 */ + uint32_t line_stipple_value; /* R300_GA_LINE_STIPPLE_VALUE: 0x4260 */ +}; + +struct r300_sampler_state { + uint32_t filter0; /* R300_TX_FILTER0: 0x4400 */ + uint32_t filter1; /* R300_TX_FILTER1: 0x4440 */ + uint32_t border_color; /* R300_TX_BORDER_COLOR: 0x45c0 */ +}; + +struct r300_scissor_state { + uint32_t scissor_top_left; /* R300_SC_SCISSORS_TL: 0x43e0 */ + uint32_t scissor_bottom_right; /* R300_SC_SCISSORS_BR: 0x43e4 */ +}; + +struct r300_texture_state { +}; + +#define R300_NEW_BLEND 0x0000001 +#define R300_NEW_BLEND_COLOR 0x0000002 +#define R300_NEW_CONSTANTS 0x0000004 +#define R300_NEW_DSA 0x0000008 +#define R300_NEW_FRAMEBUFFERS 0x0000010 +#define R300_NEW_FRAGMENT_SHADER 0x0000020 +#define R300_NEW_RASTERIZER 0x0000040 +#define R300_NEW_SAMPLER 0x0000080 +#define R300_NEW_SCISSOR 0x0008000 +#define R300_NEW_TEXTURE 0x0010000 +#define R300_NEW_VERTEX_FORMAT 0x1000000 +#define R300_NEW_VERTEX_SHADER 0x2000000 +#define R300_NEW_KITCHEN_SINK 0x3ffffff + +/* The next several objects are not pure Radeon state; they inherit from + * various Gallium classes. */ + +struct r300_constant_buffer { + /* Buffer of constants */ + /* XXX first number should be raised */ + float constants[8][4]; + /* Number of user-defined constants */ + int user_count; + /* Total number of constants */ + int count; +}; + +struct r3xx_fragment_shader { + /* Parent class */ + struct pipe_shader_state state; + struct tgsi_shader_info info; + + /* Has this shader been translated yet? */ + boolean translated; + + /* Pixel stack size */ + int stack_size; +}; + +struct r300_fragment_shader { + /* Parent class */ + struct r3xx_fragment_shader shader; + + /* Number of ALU instructions */ + int alu_instruction_count; + + /* Number of texture instructions */ + int tex_instruction_count; + + /* Number of texture indirections */ + int indirections; + + /* Indirection node offsets */ + int offset0; + int offset1; + int offset2; + int offset3; + + /* Machine instructions */ + struct { + uint32_t alu_rgb_inst; + uint32_t alu_rgb_addr; + uint32_t alu_alpha_inst; + uint32_t alu_alpha_addr; + } instructions[64]; /* XXX magic num */ +}; + +struct r500_fragment_shader { + /* Parent class */ + struct r3xx_fragment_shader shader; + + /* Number of used instructions */ + int instruction_count; + + /* Machine instructions */ + struct { + uint32_t inst0; + uint32_t inst1; + uint32_t inst2; + uint32_t inst3; + uint32_t inst4; + uint32_t inst5; + } instructions[256]; /*< XXX magic number */ +}; + +struct r300_texture { + /* Parent class */ + struct pipe_texture tex; + + /* Offsets into the buffer. */ + unsigned offset[PIPE_MAX_TEXTURE_LEVELS]; + + /* Stride (pitch?) of this texture in bytes */ + unsigned stride; + + /* Total size of this texture, in bytes. */ + unsigned size; + + /* Pipe buffer backing this texture. */ + struct pipe_buffer* buffer; +}; + +struct r300_vertex_format { + /* Parent class */ + struct vertex_info vinfo; + /* R300_VAP_PROG_STREAK_CNTL_[0-7] */ + uint32_t vap_prog_stream_cntl[8]; + /* R300_VAP_PROG_STREAK_CNTL_EXT_[0-7] */ + uint32_t vap_prog_stream_cntl_ext[8]; +}; + +struct r300_context { + /* Parent class */ + struct pipe_context context; + + /* The interface to the windowing system, etc. */ + struct r300_winsys* winsys; + /* Draw module. Used mostly for SW TCL. */ + struct draw_context* draw; + + /* Various CSO state objects. */ + /* Blend state. */ + struct r300_blend_state* blend_state; + /* Blend color state. */ + struct r300_blend_color_state* blend_color_state; + /* Shader constants. */ + struct r300_constant_buffer shader_constants[PIPE_SHADER_TYPES]; + /* Depth, stencil, and alpha state. */ + struct r300_dsa_state* dsa_state; + /* Fragment shader. */ + struct r3xx_fragment_shader* fs; + /* Framebuffer state. We currently don't need our own version of this. */ + struct pipe_framebuffer_state framebuffer_state; + /* Rasterizer state. */ + struct r300_rs_state* rs_state; + /* Sampler states. */ + struct r300_sampler_state* sampler_states[8]; + int sampler_count; + /* Scissor state. */ + struct r300_scissor_state* scissor_state; + /* Texture states. */ + struct r300_texture* textures[8]; + struct r300_texture_state* texture_states[8]; + int texture_count; + /* Vertex buffers. */ + struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; + int vertex_buffer_count; + /* Vertex information. */ + struct r300_vertex_format vertex_info; + /* Bitmask of dirty state objects. */ + uint32_t dirty_state; + /* Flag indicating whether or not the HW is dirty. */ + uint32_t dirty_hw; +}; + +/* Convenience cast wrapper. */ +static struct r300_context* r300_context(struct pipe_context* context) { + return (struct r300_context*)context; +} + +/* Context initialization. */ +struct draw_stage* r300_draw_swtcl_stage(struct r300_context* r300); +void r300_init_state_functions(struct r300_context* r300); +void r300_init_surface_functions(struct r300_context* r300); + +/* Fun with includes: r300_winsys also declares this prototype. + * We'll just step out in that case... */ +#ifndef R300_WINSYS_H +struct pipe_context* r300_create_context(struct pipe_screen* screen, + struct pipe_winsys* winsys, + struct r300_winsys* r300_winsys); +#endif + +#endif /* R300_CONTEXT_H */ diff --git a/src/gallium/drivers/r300/r300_cs.h b/src/gallium/drivers/r300/r300_cs.h new file mode 100644 index 0000000000..d8038ff1e1 --- /dev/null +++ b/src/gallium/drivers/r300/r300_cs.h @@ -0,0 +1,123 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_CS_H +#define R300_CS_H + +#include "util/u_math.h" + +#include "r300_reg.h" +#include "r300_winsys.h" + +/* Yes, I know macros are ugly. However, they are much prettier than the code + * that they neatly hide away, and don't have the cost of function setup,so + * we're going to use them. */ + +#define MAX_CS_SIZE 64 * 1024 / 4 + +#define VERY_VERBOSE_REGISTERS 0 + +/* XXX stolen from radeon_drm.h */ +#define RADEON_GEM_DOMAIN_CPU 0x1 +#define RADEON_GEM_DOMAIN_GTT 0x2 +#define RADEON_GEM_DOMAIN_VRAM 0x4 + +/* XXX stolen from radeon_reg.h */ +#define RADEON_CP_PACKET0 0x0 + +#define CP_PACKET0(register, count) \ + (RADEON_CP_PACKET0 | ((count) << 16) | ((register) >> 2)) + +#define CP_PACKET3(op, count) \ + (RADEON_CP_PACKET3 | (op) | ((count) << 16)) + +#define CS_LOCALS(context) \ + struct r300_winsys* cs_winsys = context->winsys; \ + struct radeon_cs* cs = cs_winsys->cs; \ + int cs_count = 0; + +#define CHECK_CS(size) \ + cs_winsys->check_cs(cs, (size)) + +#define BEGIN_CS(size) do { \ + CHECK_CS(size); \ + debug_printf("r300: BEGIN_CS, count %d, in %s (%s:%d)\n", \ + size, __FUNCTION__, __FILE__, __LINE__); \ + cs_winsys->begin_cs(cs, (size), __FILE__, __FUNCTION__, __LINE__); \ + cs_count = size; \ +} while (0) + +#define OUT_CS(value) do { \ + cs_winsys->write_cs_dword(cs, (value)); \ + cs_count--; \ +} while (0) + +#define OUT_CS_32F(value) do { \ + cs_winsys->write_cs_dword(cs, fui(value)); \ + cs_count--; \ +} while (0) + +#define OUT_CS_REG(register, value) do { \ + if (VERY_VERBOSE_REGISTERS) \ + debug_printf("r300: writing 0x%08X to register 0x%04X\n", \ + value, register); \ + assert(register); \ + OUT_CS(CP_PACKET0(register, 0)); \ + OUT_CS(value); \ +} while (0) + +/* Note: This expects count to be the number of registers, + * not the actual packet0 count! */ +#define OUT_CS_REG_SEQ(register, count) do { \ + if (VERY_VERBOSE_REGISTERS) \ + debug_printf("r300: writing register sequence of %d to 0x%04X\n", \ + count, register); \ + assert(register); \ + OUT_CS(CP_PACKET0(register, ((count) - 1))); \ +} while (0) + +#define OUT_CS_RELOC(bo, offset, rd, wd, flags) do { \ + debug_printf("r300: writing relocation for buffer %p, offset %d\n", \ + bo, offset); \ + assert(bo); \ + OUT_CS(offset); \ + cs_winsys->write_cs_reloc(cs, bo, rd, wd, flags); \ + cs_count -= 2; \ +} while (0) + +#define END_CS do { \ + debug_printf("r300: END_CS in %s (%s:%d)\n", __FUNCTION__, __FILE__, \ + __LINE__); \ + if (cs_count != 0) \ + debug_printf("r300: Warning: cs_count off by %d\n", cs_count); \ + cs_winsys->end_cs(cs, __FILE__, __FUNCTION__, __LINE__); \ +} while (0) + +#define FLUSH_CS do { \ + debug_printf("r300: FLUSH_CS in %s (%s:%d)\n\n", __FUNCTION__, __FILE__, \ + __LINE__); \ + cs_winsys->flush_cs(cs); \ +} while (0) + +#include "r300_cs_inlines.h" + +#endif /* R300_CS_H */ diff --git a/src/gallium/drivers/r300/r300_cs_inlines.h b/src/gallium/drivers/r300/r300_cs_inlines.h new file mode 100644 index 0000000000..03bb608eb9 --- /dev/null +++ b/src/gallium/drivers/r300/r300_cs_inlines.h @@ -0,0 +1,50 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +/* r300_cs_inlines: This is just a handful of useful inlines for sending + * (very) common instructions to the CS buffer. Should only be included from + * r300_cs.h, probably. */ + +#ifdef R300_CS_H + +#define RADEON_ONE_REG_WR (1 << 15) + +#define OUT_CS_ONE_REG(register, count) do { \ + if (VERY_VERBOSE_REGISTERS) \ + debug_printf("r300: writing data sequence of %d to 0x%04X\n", \ + count, register); \ + assert(register); \ + OUT_CS(CP_PACKET0(register, ((count) - 1)) | RADEON_ONE_REG_WR); \ +} while (0) + +#define R300_PACIFY do { \ + OUT_CS_REG(RADEON_WAIT_UNTIL, (1 << 14) | (1 << 15) | (1 << 16) | (1 << 17) | \ + (1 << 18)); \ +} while (0) + +#define R300_SCREENDOOR do { \ + OUT_CS_REG(R300_SC_SCREENDOOR, 0x0); \ + R300_PACIFY; \ + OUT_CS_REG(R300_SC_SCREENDOOR, 0xffffff); \ +} while (0) + +#endif /* R300_CS_H */ diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c new file mode 100644 index 0000000000..960f45f651 --- /dev/null +++ b/src/gallium/drivers/r300/r300_emit.c @@ -0,0 +1,303 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +/* r300_emit: Functions for emitting state. */ + +#include "r300_emit.h" + +void r300_emit_blend_state(struct r300_context* r300, + struct r300_blend_state* blend) +{ + CS_LOCALS(r300); + BEGIN_CS(7); + OUT_CS_REG_SEQ(R300_RB3D_CBLEND, 2); + OUT_CS(blend->blend_control); + OUT_CS(blend->alpha_blend_control); + OUT_CS_REG(R300_RB3D_ROPCNTL, blend->rop); + OUT_CS_REG(R300_RB3D_DITHER_CTL, blend->dither); + END_CS; +} + +void r300_emit_blend_color_state(struct r300_context* r300, + struct r300_blend_color_state* bc) +{ + struct r300_screen* r300screen = + (struct r300_screen*)r300->context.screen; + CS_LOCALS(r300); + if (r300screen->caps->is_r500) { + BEGIN_CS(3); + OUT_CS_REG_SEQ(R500_RB3D_CONSTANT_COLOR_AR, 2); + OUT_CS(bc->blend_color_red_alpha); + OUT_CS(bc->blend_color_green_blue); + END_CS; + } else { + BEGIN_CS(2); + OUT_CS_REG(R300_RB3D_BLEND_COLOR, bc->blend_color); + END_CS; + } +} + +void r300_emit_dsa_state(struct r300_context* r300, + struct r300_dsa_state* dsa) +{ + struct r300_screen* r300screen = + (struct r300_screen*)r300->context.screen; + CS_LOCALS(r300); + BEGIN_CS(r300screen->caps->is_r500 ? 8 : 8); + OUT_CS_REG(R300_FG_ALPHA_FUNC, dsa->alpha_function); + /* XXX figure out the r300 counterpart for this */ + if (r300screen->caps->is_r500) { + /* OUT_CS_REG(R500_FG_ALPHA_VALUE, dsa->alpha_reference); */ + } + OUT_CS_REG_SEQ(R300_ZB_CNTL, 3); + OUT_CS(dsa->z_buffer_control); + OUT_CS(dsa->z_stencil_control); + OUT_CS(dsa->stencil_ref_mask); + OUT_CS_REG(R300_ZB_ZTOP, dsa->z_buffer_top); + if (r300screen->caps->is_r500) { + /* OUT_CS_REG(R500_ZB_STENCILREFMASK_BF, dsa->stencil_ref_bf); */ + } + END_CS; +} + +void r300_emit_fragment_shader(struct r300_context* r300, + struct r300_fragment_shader* fs) +{ + CS_LOCALS(r300); + int i; + BEGIN_CS(22); + + OUT_CS_REG(R300_US_CONFIG, MAX2(fs->indirections - 1, 0)); + OUT_CS_REG(R300_US_PIXSIZE, fs->shader.stack_size); + /* XXX figure out exactly how big the sizes are on this reg */ + OUT_CS_REG(R300_US_CODE_OFFSET, 0x0); + /* XXX figure these ones out a bit better kthnx */ + OUT_CS_REG(R300_US_CODE_ADDR_0, 0x0); + OUT_CS_REG(R300_US_CODE_ADDR_1, 0x0); + OUT_CS_REG(R300_US_CODE_ADDR_2, 0x0); + OUT_CS_REG(R300_US_CODE_ADDR_3, R300_RGBA_OUT); + + for (i = 0; i < fs->alu_instruction_count; i++) { + OUT_CS_REG(R300_US_ALU_RGB_INST_0 + (4 * i), + fs->instructions[i].alu_rgb_inst); + OUT_CS_REG(R300_US_ALU_RGB_ADDR_0 + (4 * i), + fs->instructions[i].alu_rgb_addr); + OUT_CS_REG(R300_US_ALU_ALPHA_INST_0 + (4 * i), + fs->instructions[i].alu_alpha_inst); + OUT_CS_REG(R300_US_ALU_ALPHA_ADDR_0 + (4 * i), + fs->instructions[i].alu_alpha_addr); + } + + END_CS; +} + +void r500_emit_fragment_shader(struct r300_context* r300, + struct r500_fragment_shader* fs) +{ + CS_LOCALS(r300); + int i = 0; + BEGIN_CS(9 + (fs->instruction_count * 6)); + OUT_CS_REG(R500_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO); + OUT_CS_REG(R500_US_PIXSIZE, fs->shader.stack_size); + OUT_CS_REG(R500_US_CODE_ADDR, R500_US_CODE_START_ADDR(0) | + R500_US_CODE_END_ADDR(fs->instruction_count)); + + OUT_CS_REG(R500_GA_US_VECTOR_INDEX, R500_GA_US_VECTOR_INDEX_TYPE_INSTR); + OUT_CS_ONE_REG(R500_GA_US_VECTOR_DATA, + fs->instruction_count * 6); + for (i = 0; i < fs->instruction_count; i++) { + OUT_CS(fs->instructions[i].inst0); + OUT_CS(fs->instructions[i].inst1); + OUT_CS(fs->instructions[i].inst2); + OUT_CS(fs->instructions[i].inst3); + OUT_CS(fs->instructions[i].inst4); + OUT_CS(fs->instructions[i].inst5); + } + END_CS; +} + +/* Translate pipe_format into US_OUT_FMT. Note that formats are stored from + * C3 to C0. */ +uint32_t translate_out_fmt(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + return R300_US_OUT_FMT_C4_8 | + R300_C0_SEL_B | R300_C1_SEL_G | + R300_C2_SEL_R | R300_C3_SEL_A; + default: + return R300_US_OUT_FMT_UNUSED; + } + return 0; +} + +/* XXX add pitch, stride */ +void r300_emit_fb_state(struct r300_context* r300, + struct pipe_framebuffer_state* fb) +{ + CS_LOCALS(r300); + struct r300_texture* tex; + int i; + + BEGIN_CS((5 * fb->nr_cbufs) + (fb->zsbuf ? 5 : 0) + 4); + for (i = 0; i < fb->nr_cbufs; i++) { + tex = (struct r300_texture*)fb->cbufs[i]->texture; + OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0 + (4 * i), 1); + OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); + + OUT_CS_REG(R300_US_OUT_FMT_0 + (4 * i), + translate_out_fmt(fb->cbufs[i]->format)); + } + + if (fb->zsbuf) { + tex = (struct r300_texture*)fb->zsbuf->texture; + OUT_CS_REG_SEQ(R300_ZB_DEPTHOFFSET, 1); + OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); + if (fb->zsbuf->format == PIPE_FORMAT_Z24S8_UNORM) { + OUT_CS_REG(R300_ZB_FORMAT, + R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL); + } else { + OUT_CS_REG(R300_ZB_FORMAT, 0x0); + } + } + + OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, + R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS | + R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D); + OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT, + R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | + R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); + END_CS; +} + +void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs) +{ + struct r300_screen* r300screen = + (struct r300_screen*)r300->context.screen; + CS_LOCALS(r300); + BEGIN_CS(13); + OUT_CS_REG(R300_VAP_CNTL_STATUS, rs->vap_control_status); + OUT_CS_REG_SEQ(R300_SU_POLY_OFFSET_FRONT_SCALE, 6); + OUT_CS(rs->depth_scale_front); + OUT_CS(rs->depth_offset_front); + OUT_CS(rs->depth_scale_back); + OUT_CS(rs->depth_offset_back); + OUT_CS(rs->polygon_offset_enable); + OUT_CS(rs->cull_mode); + OUT_CS_REG(R300_GA_LINE_STIPPLE_CONFIG, rs->line_stipple_config); + OUT_CS_REG(R300_GA_LINE_STIPPLE_VALUE, rs->line_stipple_value); + END_CS; +} + +void r300_emit_scissor_state(struct r300_context* r300, + struct r300_scissor_state* scissor) +{ + CS_LOCALS(r300); + BEGIN_CS(3); + OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); + OUT_CS(scissor->scissor_top_left); + OUT_CS(scissor->scissor_bottom_right); + END_CS; +} + +void r300_emit_vertex_format_state(struct r300_context* r300) +{ + CS_LOCALS(r300); + int i; + + BEGIN_CS(6); + OUT_CS_REG_SEQ(R300_VAP_VTX_STATE_CNTL, 2); + OUT_CS(r300->vertex_info.vinfo.hwfmt[0]); + OUT_CS(r300->vertex_info.vinfo.hwfmt[1]); + OUT_CS_REG_SEQ(R300_VAP_OUTPUT_VTX_FMT_0, 2); + OUT_CS(r300->vertex_info.vinfo.hwfmt[2]); + OUT_CS(r300->vertex_info.vinfo.hwfmt[3]); + END_CS; + + BEGIN_CS(18); + OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_0, 8); + for (i = 0; i < 8; i++) { + OUT_CS(r300->vertex_info.vap_prog_stream_cntl[i]); + } + OUT_CS_REG_SEQ(R300_VAP_PROG_STREAM_CNTL_EXT_0, 8); + for (i = 0; i < 8; i++) { + OUT_CS(r300->vertex_info.vap_prog_stream_cntl_ext[i]); + } + END_CS; +} + +/* Emit all dirty state. */ +void r300_emit_dirty_state(struct r300_context* r300) +{ + struct r300_screen* r300screen = + (struct r300_screen*)r300->context.screen; + CS_LOCALS(r300); + + if (!(r300->dirty_state) && !(r300->dirty_hw)) { + return; + } + + r300_update_derived_state(r300); + + /* XXX check size */ + + if (r300->dirty_state & R300_NEW_BLEND) { + r300_emit_blend_state(r300, r300->blend_state); + r300->dirty_state &= ~R300_NEW_BLEND; + } + + if (r300->dirty_state & R300_NEW_BLEND_COLOR) { + r300_emit_blend_color_state(r300, r300->blend_color_state); + r300->dirty_state &= ~R300_NEW_BLEND_COLOR; + } + + if (r300->dirty_state & R300_NEW_DSA) { + r300_emit_dsa_state(r300, r300->dsa_state); + r300->dirty_state &= ~R300_NEW_DSA; + } + + if (r300->dirty_state & R300_NEW_FRAGMENT_SHADER) { + if (r300screen->caps->is_r500) { + r500_emit_fragment_shader(r300, + (struct r500_fragment_shader*)r300->fs); + } else { + r300_emit_fragment_shader(r300, + (struct r300_fragment_shader*)r300->fs); + } + r300->dirty_state &= ~R300_NEW_FRAGMENT_SHADER; + } + + if (r300->dirty_state & R300_NEW_RASTERIZER) { + r300_emit_rs_state(r300, r300->rs_state); + r300->dirty_state &= ~R300_NEW_RASTERIZER; + } + + if (r300->dirty_state & R300_NEW_SCISSOR) { + r300_emit_scissor_state(r300, r300->scissor_state); + r300->dirty_state &= ~R300_NEW_SCISSOR; + } + + if (r300->dirty_state & R300_NEW_VERTEX_FORMAT) { + r300_emit_vertex_format_state(r300); + r300->dirty_state &= ~R300_NEW_VERTEX_FORMAT; + } +} diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h new file mode 100644 index 0000000000..f21ca33171 --- /dev/null +++ b/src/gallium/drivers/r300/r300_emit.h @@ -0,0 +1,59 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_EMIT_H +#define R300_EMIT_H + +#include "util/u_math.h" + +#include "r300_context.h" +#include "r300_cs.h" +#include "r300_screen.h" + +void r300_emit_blend_state(struct r300_context* r300, + struct r300_blend_state* blend); + +void r300_emit_blend_color_state(struct r300_context* r300, + struct r300_blend_color_state* bc); + +void r300_emit_dsa_state(struct r300_context* r300, + struct r300_dsa_state* dsa); + +void r300_emit_fragment_shader(struct r300_context* r300, + struct r300_fragment_shader* fs); + +void r500_emit_fragment_shader(struct r300_context* r300, + struct r500_fragment_shader* fs); + +void r300_emit_fb_state(struct r300_context* r300, + struct pipe_framebuffer_state* fb); + +void r300_emit_rs_state(struct r300_context* r300, struct r300_rs_state* rs); + +void r300_emit_scissor_state(struct r300_context* r300, + struct r300_scissor_state* scissor); + + +/* Emit all dirty state. */ +void r300_emit_dirty_state(struct r300_context* r300); + +#endif /* R300_EMIT_H */ diff --git a/src/gallium/drivers/r300/r300_flush.c b/src/gallium/drivers/r300/r300_flush.c new file mode 100644 index 0000000000..3766f0a0a7 --- /dev/null +++ b/src/gallium/drivers/r300/r300_flush.c @@ -0,0 +1,42 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "r300_flush.h" + +static void r300_flush(struct pipe_context* pipe, + unsigned flags, + struct pipe_fence_handle** fence) +{ + struct r300_context* r300 = r300_context(pipe); + CS_LOCALS(r300); + + if (r300->dirty_hw) { + FLUSH_CS; + r300->dirty_state = R300_NEW_KITCHEN_SINK; + r300->dirty_hw = 0; + } +} + +void r300_init_flush_functions(struct r300_context* r300) +{ + r300->context.flush = r300_flush; +} diff --git a/src/gallium/drivers/r300/r300_flush.h b/src/gallium/drivers/r300/r300_flush.h new file mode 100644 index 0000000000..a1b224b39c --- /dev/null +++ b/src/gallium/drivers/r300/r300_flush.h @@ -0,0 +1,33 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_FLUSH_H +#define R300_FLUSH_H + +#include "pipe/p_context.h" + +#include "r300_context.h" +#include "r300_cs.h" + +void r300_init_flush_functions(struct r300_context* r300); + +#endif /* R300_FLUSH_H */ diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h new file mode 100644 index 0000000000..8888b39a2f --- /dev/null +++ b/src/gallium/drivers/r300/r300_reg.h @@ -0,0 +1,3263 @@ +/************************************************************************** + +Copyright (C) 2004-2005 Nicolai Haehnle et al. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* *INDENT-OFF* */ + +#ifndef _R300_REG_H +#define _R300_REG_H + +#define R300_MC_INIT_MISC_LAT_TIMER 0x180 +# define R300_MC_MISC__MC_CPR_INIT_LAT_SHIFT 0 +# define R300_MC_MISC__MC_VF_INIT_LAT_SHIFT 4 +# define R300_MC_MISC__MC_DISP0R_INIT_LAT_SHIFT 8 +# define R300_MC_MISC__MC_DISP1R_INIT_LAT_SHIFT 12 +# define R300_MC_MISC__MC_FIXED_INIT_LAT_SHIFT 16 +# define R300_MC_MISC__MC_E2R_INIT_LAT_SHIFT 20 +# define R300_MC_MISC__MC_SAME_PAGE_PRIO_SHIFT 24 +# define R300_MC_MISC__MC_GLOBW_INIT_LAT_SHIFT 28 + + +#define R300_MC_INIT_GFX_LAT_TIMER 0x154 +# define R300_MC_MISC__MC_G3D0R_INIT_LAT_SHIFT 0 +# define R300_MC_MISC__MC_G3D1R_INIT_LAT_SHIFT 4 +# define R300_MC_MISC__MC_G3D2R_INIT_LAT_SHIFT 8 +# define R300_MC_MISC__MC_G3D3R_INIT_LAT_SHIFT 12 +# define R300_MC_MISC__MC_TX0R_INIT_LAT_SHIFT 16 +# define R300_MC_MISC__MC_TX1R_INIT_LAT_SHIFT 20 +# define R300_MC_MISC__MC_GLOBR_INIT_LAT_SHIFT 24 +# define R300_MC_MISC__MC_GLOBW_FULL_LAT_SHIFT 28 + +/* + * This file contains registers and constants for the R300. They have been + * found mostly by examining command buffers captured using glxtest, as well + * as by extrapolating some known registers and constants from the R200. + * I am fairly certain that they are correct unless stated otherwise + * in comments. + */ + +#define R300_SE_VPORT_XSCALE 0x1D98 +#define R300_SE_VPORT_XOFFSET 0x1D9C +#define R300_SE_VPORT_YSCALE 0x1DA0 +#define R300_SE_VPORT_YOFFSET 0x1DA4 +#define R300_SE_VPORT_ZSCALE 0x1DA8 +#define R300_SE_VPORT_ZOFFSET 0x1DAC + +#define R300_VAP_PORT_IDX0 0x2040 +/* + * Vertex Array Processing (VAP) Control + */ +#define R300_VAP_CNTL 0x2080 +# define R300_PVS_NUM_SLOTS_SHIFT 0 +# define R300_PVS_NUM_CNTLRS_SHIFT 4 +# define R300_PVS_NUM_FPUS_SHIFT 8 +# define R300_VF_MAX_VTX_NUM_SHIFT 18 +# define R300_GL_CLIP_SPACE_DEF (0 << 22) +# define R300_DX_CLIP_SPACE_DEF (1 << 22) +# define R500_TCL_STATE_OPTIMIZATION (1 << 23) + +/* This register is written directly and also starts data section + * in many 3d CP_PACKET3's + */ +#define R300_VAP_VF_CNTL 0x2084 +# define R300_VAP_VF_CNTL__PRIM_TYPE__SHIFT 0 +# define R300_VAP_VF_CNTL__PRIM_NONE (0<<0) +# define R300_VAP_VF_CNTL__PRIM_POINTS (1<<0) +# define R300_VAP_VF_CNTL__PRIM_LINES (2<<0) +# define R300_VAP_VF_CNTL__PRIM_LINE_STRIP (3<<0) +# define R300_VAP_VF_CNTL__PRIM_TRIANGLES (4<<0) +# define R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN (5<<0) +# define R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP (6<<0) +# define R300_VAP_VF_CNTL__PRIM_LINE_LOOP (12<<0) +# define R300_VAP_VF_CNTL__PRIM_QUADS (13<<0) +# define R300_VAP_VF_CNTL__PRIM_QUAD_STRIP (14<<0) +# define R300_VAP_VF_CNTL__PRIM_POLYGON (15<<0) + +# define R300_VAP_VF_CNTL__PRIM_WALK__SHIFT 4 + /* State based - direct writes to registers trigger vertex + generation */ +# define R300_VAP_VF_CNTL__PRIM_WALK_STATE_BASED (0<<4) +# define R300_VAP_VF_CNTL__PRIM_WALK_INDICES (1<<4) +# define R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST (2<<4) +# define R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_EMBEDDED (3<<4) + + /* I don't think I saw these three used.. */ +# define R300_VAP_VF_CNTL__COLOR_ORDER__SHIFT 6 +# define R300_VAP_VF_CNTL__TCL_OUTPUT_CTL_ENA__SHIFT 9 +# define R300_VAP_VF_CNTL__PROG_STREAM_ENA__SHIFT 10 + + /* index size - when not set the indices are assumed to be 16 bit */ +# define R300_VAP_VF_CNTL__INDEX_SIZE_32bit (1<<11) + /* number of vertices */ +# define R300_VAP_VF_CNTL__NUM_VERTICES__SHIFT 16 + +#define R500_VAP_INDEX_OFFSET 0x208c + +#define R300_VAP_OUTPUT_VTX_FMT_0 0x2090 +# define R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT (1<<0) +# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT (1<<1) +# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_1_PRESENT (1<<2) +# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT (1<<3) +# define R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT (1<<4) +# define R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT (1<<16) + +#define R300_VAP_OUTPUT_VTX_FMT_1 0x2094 + /* each of the following is 3 bits wide, specifies number + of components */ +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_0_COMP_CNT_SHIFT 0 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_1_COMP_CNT_SHIFT 3 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_2_COMP_CNT_SHIFT 6 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_3_COMP_CNT_SHIFT 9 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_4_COMP_CNT_SHIFT 12 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_5_COMP_CNT_SHIFT 15 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_6_COMP_CNT_SHIFT 18 +# define R300_VAP_OUTPUT_VTX_FMT_1__TEX_7_COMP_CNT_SHIFT 21 +# define R300_VAP_OUTPUT_VTX_FMT_1__NOT_PRESENT 0 +# define R300_VAP_OUTPUT_VTX_FMT_1__1_COMPONENT 1 +# define R300_VAP_OUTPUT_VTX_FMT_1__2_COMPONENTS 2 +# define R300_VAP_OUTPUT_VTX_FMT_1__3_COMPONENTS 3 +# define R300_VAP_OUTPUT_VTX_FMT_1__4_COMPONENTS 4 + +#define R300_SE_VTE_CNTL 0x20b0 +# define R300_VPORT_X_SCALE_ENA (1 << 0) +# define R300_VPORT_X_OFFSET_ENA (1 << 1) +# define R300_VPORT_Y_SCALE_ENA (1 << 2) +# define R300_VPORT_Y_OFFSET_ENA (1 << 3) +# define R300_VPORT_Z_SCALE_ENA (1 << 4) +# define R300_VPORT_Z_OFFSET_ENA (1 << 5) +# define R300_VTX_XY_FMT (1 << 8) +# define R300_VTX_Z_FMT (1 << 9) +# define R300_VTX_W0_FMT (1 << 10) +# define R300_SERIAL_PROC_ENA (1 << 11) + +#define R300_VAP_VTX_SIZE 0x20b4 + +/* BEGIN: Vertex data assembly - lots of uncertainties */ + +/* gap */ + +/* Maximum Vertex Indx Clamp */ +#define R300_VAP_VF_MAX_VTX_INDX 0x2134 +/* Minimum Vertex Indx Clamp */ +#define R300_VAP_VF_MIN_VTX_INDX 0x2138 + +/** Vertex assembler/processor control status */ +#define R300_VAP_CNTL_STATUS 0x2140 +/* No swap at all (default) */ +# define R300_VC_NO_SWAP (0 << 0) +/* 16-bit swap: 0xAABBCCDD becomes 0xBBAADDCC */ +# define R300_VC_16BIT_SWAP (1 << 0) +/* 32-bit swap: 0xAABBCCDD becomes 0xDDCCBBAA */ +# define R300_VC_32BIT_SWAP (2 << 0) +/* Half-dword swap: 0xAABBCCDD becomes 0xCCDDAABB */ +# define R300_VC_HALF_DWORD_SWAP (3 << 0) +/* The TCL engine will not be used (as it is logically or even physically removed) */ +# define R300_VAP_TCL_BYPASS (1 << 8) +/* Read only flag if TCL engine is busy. */ +# define R300_VAP_PVS_BUSY (1 << 11) +/* TODO: gap for MAX_MPS */ +/* Read only flag if the vertex store is busy. */ +# define R300_VAP_VS_BUSY (1 << 24) +/* Read only flag if the reciprocal engine is busy. */ +# define R300_VAP_RCP_BUSY (1 << 25) +/* Read only flag if the viewport transform engine is busy. */ +# define R300_VAP_VTE_BUSY (1 << 26) +/* Read only flag if the memory interface unit is busy. */ +# define R300_VAP_MUI_BUSY (1 << 27) +/* Read only flag if the vertex cache is busy. */ +# define R300_VAP_VC_BUSY (1 << 28) +/* Read only flag if the vertex fetcher is busy. */ +# define R300_VAP_VF_BUSY (1 << 29) +/* Read only flag if the register pipeline is busy. */ +# define R300_VAP_REGPIPE_BUSY (1 << 30) +/* Read only flag if the VAP engine is busy. */ +# define R300_VAP_VAP_BUSY (1 << 31) + +/* gap */ + +/* Where do we get our vertex data? + * + * Vertex data either comes either from immediate mode registers or from + * vertex arrays. + * There appears to be no mixed mode (though we can force the pitch of + * vertex arrays to 0, effectively reusing the same element over and over + * again). + * + * Immediate mode is controlled by the INPUT_CNTL registers. I am not sure + * if these registers influence vertex array processing. + * + * Vertex arrays are controlled via the 3D_LOAD_VBPNTR packet3. + * + * In both cases, vertex attributes are then passed through INPUT_ROUTE. + * + * Beginning with INPUT_ROUTE_0_0 is a list of WORDs that route vertex data + * into the vertex processor's input registers. + * The first word routes the first input, the second word the second, etc. + * The corresponding input is routed into the register with the given index. + * The list is ended by a word with INPUT_ROUTE_END set. + * + * Always set COMPONENTS_4 in immediate mode. + */ + +#define R300_VAP_PROG_STREAM_CNTL_0 0x2150 +# define R300_DATA_TYPE_0_SHIFT 0 +# define R300_DATA_TYPE_FLOAT_1 0 +# define R300_DATA_TYPE_FLOAT_2 1 +# define R300_DATA_TYPE_FLOAT_3 2 +# define R300_DATA_TYPE_FLOAT_4 3 +# define R300_DATA_TYPE_BYTE 4 +# define R300_DATA_TYPE_D3DCOLOR 5 +# define R300_DATA_TYPE_SHORT_2 6 +# define R300_DATA_TYPE_SHORT_4 7 +# define R300_DATA_TYPE_VECTOR_3_TTT 8 +# define R300_DATA_TYPE_VECTOR_3_EET 9 +# define R300_SKIP_DWORDS_SHIFT 4 +# define R300_DST_VEC_LOC_SHIFT 8 +# define R300_LAST_VEC (1 << 13) +# define R300_SIGNED (1 << 14) +# define R300_NORMALIZE (1 << 15) +# define R300_DATA_TYPE_1_SHIFT 16 +#define R300_VAP_PROG_STREAM_CNTL_1 0x2154 +#define R300_VAP_PROG_STREAM_CNTL_2 0x2158 +#define R300_VAP_PROG_STREAM_CNTL_3 0x215C +#define R300_VAP_PROG_STREAM_CNTL_4 0x2160 +#define R300_VAP_PROG_STREAM_CNTL_5 0x2164 +#define R300_VAP_PROG_STREAM_CNTL_6 0x2168 +#define R300_VAP_PROG_STREAM_CNTL_7 0x216C +/* gap */ + +/* Notes: + * - always set up to produce at least two attributes: + * if vertex program uses only position, fglrx will set normal, too + * - INPUT_CNTL_0_COLOR and INPUT_CNTL_COLOR bits are always equal. + */ +#define R300_VAP_VTX_STATE_CNTL 0x2180 +# define R300_COLOR_0_ASSEMBLY_SHIFT 0 +# define R300_SEL_COLOR 0 +# define R300_SEL_USER_COLOR_0 1 +# define R300_SEL_USER_COLOR_1 2 +# define R300_COLOR_1_ASSEMBLY_SHIFT 2 +# define R300_COLOR_2_ASSEMBLY_SHIFT 4 +# define R300_COLOR_3_ASSEMBLY_SHIFT 6 +# define R300_COLOR_4_ASSEMBLY_SHIFT 8 +# define R300_COLOR_5_ASSEMBLY_SHIFT 10 +# define R300_COLOR_6_ASSEMBLY_SHIFT 12 +# define R300_COLOR_7_ASSEMBLY_SHIFT 14 +# define R300_UPDATE_USER_COLOR_0_ENA (1 << 16) + +/* + * Each bit in this field applies to the corresponding vector in the VSM + * memory (i.e. Bit 0 applies to VECTOR_0 (POSITION), etc.). If the bit + * is set, then the corresponding 4-Dword Vector is output into the Vertex Stream. + */ +#define R300_VAP_VSM_VTX_ASSM 0x2184 +# define R300_INPUT_CNTL_POS 0x00000001 +# define R300_INPUT_CNTL_NORMAL 0x00000002 +# define R300_INPUT_CNTL_COLOR 0x00000004 +# define R300_INPUT_CNTL_TC0 0x00000400 +# define R300_INPUT_CNTL_TC1 0x00000800 +# define R300_INPUT_CNTL_TC2 0x00001000 /* GUESS */ +# define R300_INPUT_CNTL_TC3 0x00002000 /* GUESS */ +# define R300_INPUT_CNTL_TC4 0x00004000 /* GUESS */ +# define R300_INPUT_CNTL_TC5 0x00008000 /* GUESS */ +# define R300_INPUT_CNTL_TC6 0x00010000 /* GUESS */ +# define R300_INPUT_CNTL_TC7 0x00020000 /* GUESS */ + +/* Programmable Stream Control Signed Normalize Control */ +#define R300_VAP_PSC_SGN_NORM_CNTL 0x21dc +# define SGN_NORM_ZERO 0 +# define SGN_NORM_ZERO_CLAMP_MINUS_ONE 1 +# define SGN_NORM_NO_ZERO 2 + +/* gap */ + +/* Words parallel to INPUT_ROUTE_0; All words that are active in INPUT_ROUTE_0 + * are set to a swizzling bit pattern, other words are 0. + * + * In immediate mode, the pattern is always set to xyzw. In vertex array + * mode, the swizzling pattern is e.g. used to set zw components in texture + * coordinates with only tweo components. + */ +#define R300_VAP_PROG_STREAM_CNTL_EXT_0 0x21e0 +# define R300_SWIZZLE0_SHIFT 0 +# define R300_SWIZZLE_SELECT_X_SHIFT 0 +# define R300_SWIZZLE_SELECT_Y_SHIFT 3 +# define R300_SWIZZLE_SELECT_Z_SHIFT 6 +# define R300_SWIZZLE_SELECT_W_SHIFT 9 + +# define R300_SWIZZLE_SELECT_X 0 +# define R300_SWIZZLE_SELECT_Y 1 +# define R300_SWIZZLE_SELECT_Z 2 +# define R300_SWIZZLE_SELECT_W 3 +# define R300_SWIZZLE_SELECT_FP_ZERO 4 +# define R300_SWIZZLE_SELECT_FP_ONE 5 +/* alternate forms for r300_emit.c */ +# define R300_INPUT_ROUTE_SELECT_X 0 +# define R300_INPUT_ROUTE_SELECT_Y 1 +# define R300_INPUT_ROUTE_SELECT_Z 2 +# define R300_INPUT_ROUTE_SELECT_W 3 +# define R300_INPUT_ROUTE_SELECT_ZERO 4 +# define R300_INPUT_ROUTE_SELECT_ONE 5 + +# define R300_WRITE_ENA_SHIFT 12 +# define R300_WRITE_ENA_X 1 +# define R300_WRITE_ENA_Y 2 +# define R300_WRITE_ENA_Z 4 +# define R300_WRITE_ENA_W 8 +# define R300_SWIZZLE1_SHIFT 16 +#define R300_VAP_PROG_STREAM_CNTL_EXT_1 0x21e4 +#define R300_VAP_PROG_STREAM_CNTL_EXT_2 0x21e8 +#define R300_VAP_PROG_STREAM_CNTL_EXT_3 0x21ec +#define R300_VAP_PROG_STREAM_CNTL_EXT_4 0x21f0 +#define R300_VAP_PROG_STREAM_CNTL_EXT_5 0x21f4 +#define R300_VAP_PROG_STREAM_CNTL_EXT_6 0x21f8 +#define R300_VAP_PROG_STREAM_CNTL_EXT_7 0x21fc + +/* END: Vertex data assembly */ + +/* gap */ + +/* BEGIN: Upload vertex program and data */ + +/* + * The programmable vertex shader unit has a memory bank of unknown size + * that can be written to in 16 byte units by writing the address into + * UPLOAD_ADDRESS, followed by data in UPLOAD_DATA (multiples of 4 DWORDs). + * + * Pointers into the memory bank are always in multiples of 16 bytes. + * + * The memory bank is divided into areas with fixed meaning. + * + * Starting at address UPLOAD_PROGRAM: Vertex program instructions. + * Native limits reported by drivers from ATI suggest size 256 (i.e. 4KB), + * whereas the difference between known addresses suggests size 512. + * + * Starting at address UPLOAD_PARAMETERS: Vertex program parameters. + * Native reported limits and the VPI layout suggest size 256, whereas + * difference between known addresses suggests size 512. + * + * At address UPLOAD_POINTSIZE is a vector (0, 0, ps, 0), where ps is the + * floating point pointsize. The exact purpose of this state is uncertain, + * as there is also the R300_RE_POINTSIZE register. + * + * Multiple vertex programs and parameter sets can be loaded at once, + * which could explain the size discrepancy. + */ +#define R300_VAP_PVS_VECTOR_INDX_REG 0x2200 +# define R300_PVS_CODE_START 0 +# define R300_MAX_PVS_CODE_LINES 256 +# define R500_MAX_PVS_CODE_LINES 1024 +# define R300_PVS_CONST_START 512 +# define R500_PVS_CONST_START 1024 +# define R300_MAX_PVS_CONST_VECS 256 +# define R500_MAX_PVS_CONST_VECS 1024 +# define R300_PVS_UCP_START 1024 +# define R500_PVS_UCP_START 1536 +# define R300_POINT_VPORT_SCALE_OFFSET 1030 +# define R500_POINT_VPORT_SCALE_OFFSET 1542 +# define R300_POINT_GEN_TEX_OFFSET 1031 +# define R500_POINT_GEN_TEX_OFFSET 1543 + +/* + * These are obsolete defines form r300_context.h, but they might give some + * clues when investigating the addresses further... + */ +#if 0 +#define VSF_DEST_PROGRAM 0x0 +#define VSF_DEST_MATRIX0 0x200 +#define VSF_DEST_MATRIX1 0x204 +#define VSF_DEST_MATRIX2 0x208 +#define VSF_DEST_VECTOR0 0x20c +#define VSF_DEST_VECTOR1 0x20d +#define VSF_DEST_UNKNOWN1 0x400 +#define VSF_DEST_UNKNOWN2 0x406 +#endif + +/* gap */ + +#define R300_VAP_PVS_UPLOAD_DATA 0x2208 + +/* END: Upload vertex program and data */ + +/* gap */ + +/* I do not know the purpose of this register. However, I do know that + * it is set to 221C_CLEAR for clear operations and to 221C_NORMAL + * for normal rendering. + * + * 2007-11-05: This register is the user clip plane control register, but there + * also seems to be a rendering mode control; the NORMAL/CLEAR defines. + * + * See bug #9871. http://bugs.freedesktop.org/attachment.cgi?id=10672&action=view + */ +#define R300_VAP_CLIP_CNTL 0x221C +# define R300_VAP_UCP_ENABLE_0 (1 << 0) +# define R300_VAP_UCP_ENABLE_1 (1 << 1) +# define R300_VAP_UCP_ENABLE_2 (1 << 2) +# define R300_VAP_UCP_ENABLE_3 (1 << 3) +# define R300_VAP_UCP_ENABLE_4 (1 << 4) +# define R300_VAP_UCP_ENABLE_5 (1 << 5) +# define R300_PS_UCP_MODE_DIST_COP (0 << 14) +# define R300_PS_UCP_MODE_RADIUS_COP (1 << 14) +# define R300_PS_UCP_MODE_RADIUS_COP_CLIP (2 << 14) +# define R300_PS_UCP_MODE_CLIP_AS_TRIFAN (3 << 14) +# define R300_CLIP_DISABLE (1 << 16) +# define R300_UCP_CULL_ONLY_ENABLE (1 << 17) +# define R300_BOUNDARY_EDGE_FLAG_ENABLE (1 << 18) +# define R500_COLOR2_IS_TEXTURE (1 << 20) +# define R500_COLOR3_IS_TEXTURE (1 << 21) + +/* These seem to be per-pixel and per-vertex X and Y clipping planes. The first + * plane is per-pixel and the second plane is per-vertex. + * + * This was determined by experimentation alone but I believe it is correct. + * + * These registers are called X_QUAD0_1_FL to X_QUAD0_4_FL by glxtest. + */ +#define R300_VAP_GB_VERT_CLIP_ADJ 0x2220 +#define R300_VAP_GB_VERT_DISC_ADJ 0x2224 +#define R300_VAP_GB_HORZ_CLIP_ADJ 0x2228 +#define R300_VAP_GB_HORZ_DISC_ADJ 0x222c + +/* gap */ + +/* Sometimes, END_OF_PKT and 0x2284=0 are the only commands sent between + * rendering commands and overwriting vertex program parameters. + * Therefore, I suspect writing zero to 0x2284 synchronizes the engine and + * avoids bugs caused by still running shaders reading bad data from memory. + */ +#define R300_VAP_PVS_STATE_FLUSH_REG 0x2284 + +/* This register is used to define the number of core clocks to wait for a + * vertex to be received by the VAP input controller (while the primitive + * path is backed up) before forcing any accumulated vertices to be submitted + * to the vertex processing path. + */ +#define VAP_PVS_VTX_TIMEOUT_REG 0x2288 +# define R300_2288_R300 0x00750000 /* -- nh */ +# define R300_2288_RV350 0x0000FFFF /* -- Vladimir */ + +/* gap */ + +/* Addresses are relative to the vertex program instruction area of the + * memory bank. PROGRAM_END points to the last instruction of the active + * program + * + * The meaning of the two UNKNOWN fields is obviously not known. However, + * experiments so far have shown that both *must* point to an instruction + * inside the vertex program, otherwise the GPU locks up. + * + * fglrx usually sets CNTL_3_UNKNOWN to the end of the program and + * R300_PVS_CNTL_1_POS_END_SHIFT points to instruction where last write to + * position takes place. + * + * Most likely this is used to ignore rest of the program in cases + * where group of verts arent visible. For some reason this "section" + * is sometimes accepted other instruction that have no relationship with + * position calculations. + */ +#define R300_VAP_PVS_CODE_CNTL_0 0x22D0 +# define R300_PVS_FIRST_INST_SHIFT 0 +# define R300_PVS_XYZW_VALID_INST_SHIFT 10 +# define R300_PVS_LAST_INST_SHIFT 20 +/* Addresses are relative the the vertex program parameters area. */ +#define R300_VAP_PVS_CONST_CNTL 0x22D4 +# define R300_PVS_CONST_BASE_OFFSET_SHIFT 0 +# define R300_PVS_MAX_CONST_ADDR_SHIFT 16 +#define R300_VAP_PVS_CODE_CNTL_1 0x22D8 +# define R300_PVS_LAST_VTX_SRC_INST_SHIFT 0 +#define R300_VAP_PVS_FLOW_CNTL_OPC 0x22DC + +/* The entire range from 0x2300 to 0x2AC inclusive seems to be used for + * immediate vertices + */ +#define R300_VAP_VTX_COLOR_R 0x2464 +#define R300_VAP_VTX_COLOR_G 0x2468 +#define R300_VAP_VTX_COLOR_B 0x246C +#define R300_VAP_VTX_POS_0_X_1 0x2490 /* used for glVertex2*() */ +#define R300_VAP_VTX_POS_0_Y_1 0x2494 +#define R300_VAP_VTX_COLOR_PKD 0x249C /* RGBA */ +#define R300_VAP_VTX_POS_0_X_2 0x24A0 /* used for glVertex3*() */ +#define R300_VAP_VTX_POS_0_Y_2 0x24A4 +#define R300_VAP_VTX_POS_0_Z_2 0x24A8 +/* write 0 to indicate end of packet? */ +#define R300_VAP_VTX_END_OF_PKT 0x24AC + +/* gap */ + +/* These are values from r300_reg/r300_reg.h - they are known to be correct + * and are here so we can use one register file instead of several + * - Vladimir + */ +#define R300_GB_VAP_RASTER_VTX_FMT_0 0x4000 +# define R300_GB_VAP_RASTER_VTX_FMT_0__POS_PRESENT (1<<0) +# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_0_PRESENT (1<<1) +# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_1_PRESENT (1<<2) +# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_2_PRESENT (1<<3) +# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_3_PRESENT (1<<4) +# define R300_GB_VAP_RASTER_VTX_FMT_0__COLOR_SPACE (0xf<<5) +# define R300_GB_VAP_RASTER_VTX_FMT_0__PT_SIZE_PRESENT (0x1<<16) + +#define R300_GB_VAP_RASTER_VTX_FMT_1 0x4004 + /* each of the following is 3 bits wide, specifies number + of components */ +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_0_COMP_CNT_SHIFT 0 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_1_COMP_CNT_SHIFT 3 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_2_COMP_CNT_SHIFT 6 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_3_COMP_CNT_SHIFT 9 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_4_COMP_CNT_SHIFT 12 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_5_COMP_CNT_SHIFT 15 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_6_COMP_CNT_SHIFT 18 +# define R300_GB_VAP_RASTER_VTX_FMT_1__TEX_7_COMP_CNT_SHIFT 21 + +/* UNK30 seems to enables point to quad transformation on textures + * (or something closely related to that). + * This bit is rather fatal at the time being due to lackings at pixel + * shader side + * Specifies top of Raster pipe specific enable controls. + */ +#define R300_GB_ENABLE 0x4008 +# define R300_GB_POINT_STUFF_DISABLE (0 << 0) +# define R300_GB_POINT_STUFF_ENABLE (1 << 0) /* Specifies if points will have stuffed texture coordinates. */ +# define R300_GB_LINE_STUFF_DISABLE (0 << 1) +# define R300_GB_LINE_STUFF_ENABLE (1 << 1) /* Specifies if lines will have stuffed texture coordinates. */ +# define R300_GB_TRIANGLE_STUFF_DISABLE (0 << 2) +# define R300_GB_TRIANGLE_STUFF_ENABLE (1 << 2) /* Specifies if triangles will have stuffed texture coordinates. */ +# define R300_GB_STENCIL_AUTO_DISABLE (0 << 4) +# define R300_GB_STENCIL_AUTO_ENABLE (1 << 4) /* Enable stencil auto inc/dec based on triangle cw/ccw, force into dzy low bit. */ +# define R300_GB_STENCIL_AUTO_FORCE (2 << 4) /* Force 0 into dzy low bit. */ + + /* each of the following is 2 bits wide */ +#define R300_GB_TEX_REPLICATE 0 /* Replicate VAP source texture coordinates (S,T,[R,Q]). */ +#define R300_GB_TEX_ST 1 /* Stuff with source texture coordinates (S,T). */ +#define R300_GB_TEX_STR 2 /* Stuff with source texture coordinates (S,T,R). */ +# define R300_GB_TEX0_SOURCE_SHIFT 16 +# define R300_GB_TEX1_SOURCE_SHIFT 18 +# define R300_GB_TEX2_SOURCE_SHIFT 20 +# define R300_GB_TEX3_SOURCE_SHIFT 22 +# define R300_GB_TEX4_SOURCE_SHIFT 24 +# define R300_GB_TEX5_SOURCE_SHIFT 26 +# define R300_GB_TEX6_SOURCE_SHIFT 28 +# define R300_GB_TEX7_SOURCE_SHIFT 30 + +/* MSPOS - positions for multisample antialiasing (?) */ +#define R300_GB_MSPOS0 0x4010 + /* shifts - each of the fields is 4 bits */ +# define R300_GB_MSPOS0__MS_X0_SHIFT 0 +# define R300_GB_MSPOS0__MS_Y0_SHIFT 4 +# define R300_GB_MSPOS0__MS_X1_SHIFT 8 +# define R300_GB_MSPOS0__MS_Y1_SHIFT 12 +# define R300_GB_MSPOS0__MS_X2_SHIFT 16 +# define R300_GB_MSPOS0__MS_Y2_SHIFT 20 +# define R300_GB_MSPOS0__MSBD0_Y 24 +# define R300_GB_MSPOS0__MSBD0_X 28 + +#define R300_GB_MSPOS1 0x4014 +# define R300_GB_MSPOS1__MS_X3_SHIFT 0 +# define R300_GB_MSPOS1__MS_Y3_SHIFT 4 +# define R300_GB_MSPOS1__MS_X4_SHIFT 8 +# define R300_GB_MSPOS1__MS_Y4_SHIFT 12 +# define R300_GB_MSPOS1__MS_X5_SHIFT 16 +# define R300_GB_MSPOS1__MS_Y5_SHIFT 20 +# define R300_GB_MSPOS1__MSBD1 24 + +/* Specifies the graphics pipeline configuration for rasterization. */ +#define R300_GB_TILE_CONFIG 0x4018 +# define R300_GB_TILE_DISABLE (0 << 0) +# define R300_GB_TILE_ENABLE (1 << 0) +# define R300_GB_TILE_PIPE_COUNT_RV300 (0 << 1) /* RV350 (1 pipe, 1 ctx) */ +# define R300_GB_TILE_PIPE_COUNT_R300 (3 << 1) /* R300 (2 pipes, 1 ctx) */ +# define R300_GB_TILE_PIPE_COUNT_R420_3P (6 << 1) /* R420-3P (3 pipes, 1 ctx) */ +# define R300_GB_TILE_PIPE_COUNT_R420 (7 << 1) /* R420 (4 pipes, 1 ctx) */ +# define R300_GB_TILE_SIZE_8 (0 << 4) +# define R300_GB_TILE_SIZE_16 (1 << 4) +# define R300_GB_TILE_SIZE_32 (2 << 4) +# define R300_GB_SUPER_SIZE_1 (0 << 6) +# define R300_GB_SUPER_SIZE_2 (1 << 6) +# define R300_GB_SUPER_SIZE_4 (2 << 6) +# define R300_GB_SUPER_SIZE_8 (3 << 6) +# define R300_GB_SUPER_SIZE_16 (4 << 6) +# define R300_GB_SUPER_SIZE_32 (5 << 6) +# define R300_GB_SUPER_SIZE_64 (6 << 6) +# define R300_GB_SUPER_SIZE_128 (7 << 6) +# define R300_GB_SUPER_X_SHIFT 9 /* 3 bits wide */ +# define R300_GB_SUPER_Y_SHIFT 12 /* 3 bits wide */ +# define R300_GB_SUPER_TILE_A (0 << 15) +# define R300_GB_SUPER_TILE_B (1 << 15) +# define R300_GB_SUBPIXEL_1_12 (0 << 16) +# define R300_GB_SUBPIXEL_1_16 (1 << 16) +# define GB_TILE_CONFIG_QUADS_PER_RAS_4 (0 << 17) +# define GB_TILE_CONFIG_QUADS_PER_RAS_8 (1 << 17) +# define GB_TILE_CONFIG_QUADS_PER_RAS_16 (2 << 17) +# define GB_TILE_CONFIG_QUADS_PER_RAS_32 (3 << 17) +# define GB_TILE_CONFIG_BB_SCAN_INTERCEPT (0 << 19) +# define GB_TILE_CONFIG_BB_SCAN_BOUND_BOX (1 << 19) +# define GB_TILE_CONFIG_ALT_SCAN_EN_LR (0 << 20) +# define GB_TILE_CONFIG_ALT_SCAN_EN_LRL (1 << 20) +# define GB_TILE_CONFIG_ALT_OFFSET (0 << 21) +# define GB_TILE_CONFIG_SUBPRECISION (0 << 22) +# define GB_TILE_CONFIG_ALT_TILING_DEF (0 << 23) +# define GB_TILE_CONFIG_ALT_TILING_3_2 (1 << 23) +# define GB_TILE_CONFIG_Z_EXTENDED_24_1 (0 << 24) +# define GB_TILE_CONFIG_Z_EXTENDED_S25_1 (1 << 24) + +/* Specifies the sizes of the various FIFO`s in the sc/rs/us. This register must be the first one written */ +#define R300_GB_FIFO_SIZE 0x4024 + /* each of the following is 2 bits wide */ +#define R300_GB_FIFO_SIZE_32 0 +#define R300_GB_FIFO_SIZE_64 1 +#define R300_GB_FIFO_SIZE_128 2 +#define R300_GB_FIFO_SIZE_256 3 +# define R300_SC_IFIFO_SIZE_SHIFT 0 +# define R300_SC_TZFIFO_SIZE_SHIFT 2 +# define R300_SC_BFIFO_SIZE_SHIFT 4 + +# define R300_US_OFIFO_SIZE_SHIFT 12 +# define R300_US_WFIFO_SIZE_SHIFT 14 + /* the following use the same constants as above, but meaning is + is times 2 (i.e. instead of 32 words it means 64 */ +# define R300_RS_TFIFO_SIZE_SHIFT 6 +# define R300_RS_CFIFO_SIZE_SHIFT 8 +# define R300_US_RAM_SIZE_SHIFT 10 + /* watermarks, 3 bits wide */ +# define R300_RS_HIGHWATER_COL_SHIFT 16 +# define R300_RS_HIGHWATER_TEX_SHIFT 19 +# define R300_OFIFO_HIGHWATER_SHIFT 22 /* two bits only */ +# define R300_CUBE_FIFO_HIGHWATER_COL_SHIFT 24 + +#define GB_Z_PEQ_CONFIG 0x4028 +# define GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_4_4 (0 << 0) +# define GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8 (1 << 0) + +/* Specifies various polygon specific selects (fog, depth, perspective). */ +#define R300_GB_SELECT 0x401c +# define R300_GB_FOG_SELECT_C0A (0 << 0) +# define R300_GB_FOG_SELECT_C1A (1 << 0) +# define R300_GB_FOG_SELECT_C2A (2 << 0) +# define R300_GB_FOG_SELECT_C3A (3 << 0) +# define R300_GB_FOG_SELECT_1_1_W (4 << 0) +# define R300_GB_FOG_SELECT_Z (5 << 0) +# define R300_GB_DEPTH_SELECT_Z (0 << 3) +# define R300_GB_DEPTH_SELECT_1_1_W (1 << 3) +# define R300_GB_W_SELECT_1_W (0 << 4) +# define R300_GB_W_SELECT_1 (1 << 4) +# define R300_GB_FOG_STUFF_DISABLE (0 << 5) +# define R300_GB_FOG_STUFF_ENABLE (1 << 5) +# define R300_GB_FOG_STUFF_TEX_SHIFT 6 +# define R300_GB_FOG_STUFF_TEX_MASK 0x000003c0 +# define R300_GB_FOG_STUFF_COMP_SHIFT 10 +# define R300_GB_FOG_STUFF_COMP_MASK 0x00000c00 + +/* Specifies the graphics pipeline configuration for antialiasing. */ +#define R300_GB_AA_CONFIG 0x4020 +# define GB_AA_CONFIG_AA_DISABLE (0 << 0) +# define GB_AA_CONFIG_AA_ENABLE (1 << 0) +# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_2 (0 << 1) +# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_3 (1 << 1) +# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_4 (2 << 1) +# define GB_AA_CONFIG_NUM_AA_SUBSAMPLES_6 (3 << 1) + +/* Selects which of 4 pipes are active. */ +#define GB_PIPE_SELECT 0x402c +# define GB_PIPE_SELECT_PIPE0_ID_SHIFT 0 +# define GB_PIPE_SELECT_PIPE1_ID_SHIFT 2 +# define GB_PIPE_SELECT_PIPE2_ID_SHIFT 4 +# define GB_PIPE_SELECT_PIPE3_ID_SHIFT 6 +# define GB_PIPE_SELECT_PIPE_MASK_SHIFT 8 +# define GB_PIPE_SELECT_MAX_PIPE 12 +# define GB_PIPE_SELECT_BAD_PIPES 14 +# define GB_PIPE_SELECT_CONFIG_PIPES 18 + + +/* Specifies the sizes of the various FIFO`s in the sc/rs. */ +#define GB_FIFO_SIZE1 0x4070 +/* High water mark for SC input fifo */ +# define GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_SHIFT 0 +# define GB_FIFO_SIZE1_SC_HIGHWATER_IFIFO_MASK 0x0000003f +/* High water mark for SC input fifo (B) */ +# define GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_SHIFT 6 +# define GB_FIFO_SIZE1_SC_HIGHWATER_BFIFO_MASK 0x00000fc0 +/* High water mark for RS colors' fifo */ +# define GB_FIFO_SIZE1_SC_HIGHWATER_COL_SHIFT 12 +# define GB_FIFO_SIZE1_SC_HIGHWATER_COL_MASK 0x0003f000 +/* High water mark for RS textures' fifo */ +# define GB_FIFO_SIZE1_SC_HIGHWATER_TEX_SHIFT 18 +# define GB_FIFO_SIZE1_SC_HIGHWATER_TEX_MASK 0x00fc0000 + +/* This table specifies the source location and format for up to 16 texture + * addresses (i[0]:i[15]) and four colors (c[0]:c[3]) + */ +#define R500_RS_IP_0 0x4074 +#define R500_RS_IP_1 0x4078 +#define R500_RS_IP_2 0x407C +#define R500_RS_IP_3 0x4080 +#define R500_RS_IP_4 0x4084 +#define R500_RS_IP_5 0x4088 +#define R500_RS_IP_6 0x408C +#define R500_RS_IP_7 0x4090 +#define R500_RS_IP_8 0x4094 +#define R500_RS_IP_9 0x4098 +#define R500_RS_IP_10 0x409C +#define R500_RS_IP_11 0x40A0 +#define R500_RS_IP_12 0x40A4 +#define R500_RS_IP_13 0x40A8 +#define R500_RS_IP_14 0x40AC +#define R500_RS_IP_15 0x40B0 +#define R500_RS_IP_PTR_K0 62 +#define R500_RS_IP_PTR_K1 63 +#define R500_RS_IP_TEX_PTR_S_SHIFT 0 +#define R500_RS_IP_TEX_PTR_T_SHIFT 6 +#define R500_RS_IP_TEX_PTR_R_SHIFT 12 +#define R500_RS_IP_TEX_PTR_Q_SHIFT 18 +#define R500_RS_IP_COL_PTR_SHIFT 24 +#define R500_RS_IP_COL_FMT_SHIFT 27 +# define R500_RS_COL_PTR(x) ((x) << 24) +# define R500_RS_COL_FMT(x) ((x) << 27) +/* gap */ +#define R500_RS_IP_OFFSET_DIS (0 << 31) +#define R500_RS_IP_OFFSET_EN (1 << 31) + +/* gap */ + +/* Zero to flush caches. */ +#define R300_TX_INVALTAGS 0x4100 +#define R300_TX_FLUSH 0x0 + +/* The upper enable bits are guessed, based on fglrx reported limits. */ +#define R300_TX_ENABLE 0x4104 +# define R300_TX_ENABLE_0 (1 << 0) +# define R300_TX_ENABLE_1 (1 << 1) +# define R300_TX_ENABLE_2 (1 << 2) +# define R300_TX_ENABLE_3 (1 << 3) +# define R300_TX_ENABLE_4 (1 << 4) +# define R300_TX_ENABLE_5 (1 << 5) +# define R300_TX_ENABLE_6 (1 << 6) +# define R300_TX_ENABLE_7 (1 << 7) +# define R300_TX_ENABLE_8 (1 << 8) +# define R300_TX_ENABLE_9 (1 << 9) +# define R300_TX_ENABLE_10 (1 << 10) +# define R300_TX_ENABLE_11 (1 << 11) +# define R300_TX_ENABLE_12 (1 << 12) +# define R300_TX_ENABLE_13 (1 << 13) +# define R300_TX_ENABLE_14 (1 << 14) +# define R300_TX_ENABLE_15 (1 << 15) + +#define R500_TX_FILTER_4 0x4110 +# define R500_TX_WEIGHT_1_SHIFT (0) +# define R500_TX_WEIGHT_0_SHIFT (11) +# define R500_TX_WEIGHT_PAIR (1<<22) +# define R500_TX_PHASE_SHIFT (23) +# define R500_TX_DIRECTION_HORIZONTAL (0<<27) +# define R500_TX_DIRECTION_VERITCAL (1<<27) + +/* S Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) */ +#define R300_GA_POINT_S0 0x4200 + +/* T Texture Coordinate of Vertex 0 for Point texture stuffing (LLC) */ +#define R300_GA_POINT_T0 0x4204 + +/* S Texture Coordinate of Vertex 2 for Point texture stuffing (URC) */ +#define R300_GA_POINT_S1 0x4208 + +/* T Texture Coordinate of Vertex 2 for Point texture stuffing (URC) */ +#define R300_GA_POINT_T1 0x420c + +/* Specifies amount to shift integer position of vertex (screen space) before + * converting to float for triangle stipple. + */ +#define R300_GA_TRIANGLE_STIPPLE 0x4214 +# define R300_GA_TRIANGLE_STIPPLE_X_SHIFT_SHIFT 0 +# define R300_GA_TRIANGLE_STIPPLE_X_SHIFT_MASK 0x0000000f +# define R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_SHIFT 16 +# define R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_MASK 0x000f0000 + +/* The pointsize is given in multiples of 6. The pointsize can be enormous: + * Clear() renders a single point that fills the entire framebuffer. + * 1/2 Height of point; fixed (16.0), subpixel format (1/12 or 1/16, even if in + * 8b precision). + */ +#define R300_GA_POINT_SIZE 0x421C +# define R300_POINTSIZE_Y_SHIFT 0 +# define R300_POINTSIZE_Y_MASK 0x0000ffff +# define R300_POINTSIZE_X_SHIFT 16 +# define R300_POINTSIZE_X_MASK 0xffff0000 +# define R300_POINTSIZE_MAX (R300_POINTSIZE_Y_MASK / 6) + +/* Blue fill color */ +#define R500_GA_FILL_R 0x4220 + +/* Blue fill color */ +#define R500_GA_FILL_G 0x4224 + +/* Blue fill color */ +#define R500_GA_FILL_B 0x4228 + +/* Alpha fill color */ +#define R500_GA_FILL_A 0x422c + + +/* Specifies maximum and minimum point & sprite sizes for per vertex size + * specification. The lower part (15:0) is MIN and (31:16) is max. + */ +#define R300_GA_POINT_MINMAX 0x4230 +# define R300_GA_POINT_MINMAX_MIN_SHIFT 0 +# define R300_GA_POINT_MINMAX_MIN_MASK (0xFFFF << 0) +# define R300_GA_POINT_MINMAX_MAX_SHIFT 16 +# define R300_GA_POINT_MINMAX_MAX_MASK (0xFFFF << 16) + +/* 1/2 width of line, in subpixels (1/12 or 1/16 only, even in 8b + * subprecision); (16.0) fixed format. + * + * The line width is given in multiples of 6. + * In default mode lines are classified as vertical lines. + * HO: horizontal + * VE: vertical or horizontal + * HO & VE: no classification + */ +#define R300_GA_LINE_CNTL 0x4234 +# define R300_GA_LINE_CNTL_WIDTH_SHIFT 0 +# define R300_GA_LINE_CNTL_WIDTH_MASK 0x0000ffff +# define R300_GA_LINE_CNTL_END_TYPE_HOR (0 << 16) +# define R300_GA_LINE_CNTL_END_TYPE_VER (1 << 16) +# define R300_GA_LINE_CNTL_END_TYPE_SQR (2 << 16) /* horizontal or vertical depending upon slope */ +# define R300_GA_LINE_CNTL_END_TYPE_COMP (3 << 16) /* Computed (perpendicular to slope) */ +# define R500_GA_LINE_CNTL_SORT_NO (0 << 18) +# define R500_GA_LINE_CNTL_SORT_MINX_MINY (1 << 18) +/** TODO: looks wrong */ +# define R300_LINESIZE_MAX (R300_GA_LINE_CNTL_WIDTH_MASK / 6) +/** TODO: looks wrong */ +# define R300_LINE_CNT_HO (1 << 16) +/** TODO: looks wrong */ +# define R300_LINE_CNT_VE (1 << 17) + +/* Line Stipple configuration information. */ +#define R300_GA_LINE_STIPPLE_CONFIG 0x4238 +# define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_NO (0 << 0) +# define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_LINE (1 << 0) +# define R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_PACKET (2 << 0) +# define R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_SHIFT 2 +# define R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_MASK 0xfffffffc + +/* Used to load US instructions and constants */ +#define R500_GA_US_VECTOR_INDEX 0x4250 +# define R500_GA_US_VECTOR_INDEX_SHIFT 0 +# define R500_GA_US_VECTOR_INDEX_MASK 0x000000ff +# define R500_GA_US_VECTOR_INDEX_TYPE_INSTR (0 << 16) +# define R500_GA_US_VECTOR_INDEX_TYPE_CONST (1 << 16) +# define R500_GA_US_VECTOR_INDEX_CLAMP_NO (0 << 17) +# define R500_GA_US_VECTOR_INDEX_CLAMP_CONST (1 << 17) + +/* Data register for loading US instructions and constants */ +#define R500_GA_US_VECTOR_DATA 0x4254 + +/* Specifies color properties and mappings of textures. */ +#define R500_GA_COLOR_CONTROL_PS3 0x4258 +# define R500_TEX0_SHADING_PS3_SOLID (0 << 0) +# define R500_TEX0_SHADING_PS3_FLAT (1 << 0) +# define R500_TEX0_SHADING_PS3_GOURAUD (2 << 0) +# define R500_TEX1_SHADING_PS3_SOLID (0 << 2) +# define R500_TEX1_SHADING_PS3_FLAT (1 << 2) +# define R500_TEX1_SHADING_PS3_GOURAUD (2 << 2) +# define R500_TEX2_SHADING_PS3_SOLID (0 << 4) +# define R500_TEX2_SHADING_PS3_FLAT (1 << 4) +# define R500_TEX2_SHADING_PS3_GOURAUD (2 << 4) +# define R500_TEX3_SHADING_PS3_SOLID (0 << 6) +# define R500_TEX3_SHADING_PS3_FLAT (1 << 6) +# define R500_TEX3_SHADING_PS3_GOURAUD (2 << 6) +# define R500_TEX4_SHADING_PS3_SOLID (0 << 8) +# define R500_TEX4_SHADING_PS3_FLAT (1 << 8) +# define R500_TEX4_SHADING_PS3_GOURAUD (2 << 8) +# define R500_TEX5_SHADING_PS3_SOLID (0 << 10) +# define R500_TEX5_SHADING_PS3_FLAT (1 << 10) +# define R500_TEX5_SHADING_PS3_GOURAUD (2 << 10) +# define R500_TEX6_SHADING_PS3_SOLID (0 << 12) +# define R500_TEX6_SHADING_PS3_FLAT (1 << 12) +# define R500_TEX6_SHADING_PS3_GOURAUD (2 << 12) +# define R500_TEX7_SHADING_PS3_SOLID (0 << 14) +# define R500_TEX7_SHADING_PS3_FLAT (1 << 14) +# define R500_TEX7_SHADING_PS3_GOURAUD (2 << 14) +# define R500_TEX8_SHADING_PS3_SOLID (0 << 16) +# define R500_TEX8_SHADING_PS3_FLAT (1 << 16) +# define R500_TEX8_SHADING_PS3_GOURAUD (2 << 16) +# define R500_TEX9_SHADING_PS3_SOLID (0 << 18) +# define R500_TEX9_SHADING_PS3_FLAT (1 << 18) +# define R500_TEX9_SHADING_PS3_GOURAUD (2 << 18) +# define R500_TEX10_SHADING_PS3_SOLID (0 << 20) +# define R500_TEX10_SHADING_PS3_FLAT (1 << 20) +# define R500_TEX10_SHADING_PS3_GOURAUD (2 << 20) +# define R500_COLOR0_TEX_OVERRIDE_NO (0 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_0 (1 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_1 (2 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_2 (3 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_3 (4 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_4 (5 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_5 (6 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_6 (7 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_7 (8 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_8_C2 (9 << 22) +# define R500_COLOR0_TEX_OVERRIDE_TEX_9_C3 (10 << 22) +# define R500_COLOR1_TEX_OVERRIDE_NO (0 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_0 (1 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_1 (2 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_2 (3 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_3 (4 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_4 (5 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_5 (6 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_6 (7 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_7 (8 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_8_C2 (9 << 26) +# define R500_COLOR1_TEX_OVERRIDE_TEX_9_C3 (10 << 26) + +/* Returns idle status of various G3D block, captured when GA_IDLE written or + * when hard or soft reset asserted. + */ +#define R500_GA_IDLE 0x425c +# define R500_GA_IDLE_PIPE3_Z_IDLE (0 << 0) +# define R500_GA_IDLE_PIPE2_Z_IDLE (0 << 1) +# define R500_GA_IDLE_PIPE3_CD_IDLE (0 << 2) +# define R500_GA_IDLE_PIPE2_CD_IDLE (0 << 3) +# define R500_GA_IDLE_PIPE3_FG_IDLE (0 << 4) +# define R500_GA_IDLE_PIPE2_FG_IDLE (0 << 5) +# define R500_GA_IDLE_PIPE3_US_IDLE (0 << 6) +# define R500_GA_IDLE_PIPE2_US_IDLE (0 << 7) +# define R500_GA_IDLE_PIPE3_SC_IDLE (0 << 8) +# define R500_GA_IDLE_PIPE2_SC_IDLE (0 << 9) +# define R500_GA_IDLE_PIPE3_RS_IDLE (0 << 10) +# define R500_GA_IDLE_PIPE2_RS_IDLE (0 << 11) +# define R500_GA_IDLE_PIPE1_Z_IDLE (0 << 12) +# define R500_GA_IDLE_PIPE0_Z_IDLE (0 << 13) +# define R500_GA_IDLE_PIPE1_CD_IDLE (0 << 14) +# define R500_GA_IDLE_PIPE0_CD_IDLE (0 << 15) +# define R500_GA_IDLE_PIPE1_FG_IDLE (0 << 16) +# define R500_GA_IDLE_PIPE0_FG_IDLE (0 << 17) +# define R500_GA_IDLE_PIPE1_US_IDLE (0 << 18) +# define R500_GA_IDLE_PIPE0_US_IDLE (0 << 19) +# define R500_GA_IDLE_PIPE1_SC_IDLE (0 << 20) +# define R500_GA_IDLE_PIPE0_SC_IDLE (0 << 21) +# define R500_GA_IDLE_PIPE1_RS_IDLE (0 << 22) +# define R500_GA_IDLE_PIPE0_RS_IDLE (0 << 23) +# define R500_GA_IDLE_SU_IDLE (0 << 24) +# define R500_GA_IDLE_GA_IDLE (0 << 25) +# define R500_GA_IDLE_GA_UNIT2_IDLE (0 << 26) + +/* Current value of stipple accumulator. */ +#define R300_GA_LINE_STIPPLE_VALUE 0x4260 + +/* S Texture Coordinate Value for Vertex 0 of Line (stuff textures -- i.e. AA) */ +#define R300_GA_LINE_S0 0x4264 +/* S Texture Coordinate Value for Vertex 1 of Lines (V2 of parallelogram -- stuff textures -- i.e. AA) */ +#define R300_GA_LINE_S1 0x4268 + +/* GA Input fifo high water marks */ +#define R500_GA_FIFO_CNTL 0x4270 +# define R500_GA_FIFO_CNTL_VERTEX_FIFO_MASK 0x00000007 +# define R500_GA_FIFO_CNTL_VERTEX_FIFO_SHIFT 0 +# define R500_GA_FIFO_CNTL_VERTEX_INDEX_MASK 0x00000038 +# define R500_GA_FIFO_CNTL_VERTEX_INDEX_SHIFT 3 +# define R500_GA_FIFO_CNTL_VERTEX_REG_MASK 0x00003fc0 +# define R500_GA_FIFO_CNTL_VERTEX_REG_SHIFT 6 + +/* GA enhance/tweaks */ +#define R300_GA_ENHANCE 0x4274 +# define R300_GA_ENHANCE_DEADLOCK_CNTL_NO_EFFECT (0 << 0) +# define R300_GA_ENHANCE_DEADLOCK_CNTL_PREVENT_TCL (1 << 0) /* Prevents TCL interface from deadlocking on GA side. */ +# define R300_GA_ENHANCE_FASTSYNC_CNTL_NO_EFFECT (0 << 1) +# define R300_GA_ENHANCE_FASTSYNC_CNTL_ENABLE (1 << 1) /* Enables high-performance register/primitive switching. */ +# define R500_GA_ENHANCE_REG_READWRITE_NO_EFFECT (0 << 2) /* R520+ only */ +# define R500_GA_ENHANCE_REG_READWRITE_ENABLE (1 << 2) /* R520+ only, Enables GA support of simultaneous register reads and writes. */ +# define R500_GA_ENHANCE_REG_NOSTALL_NO_EFFECT (0 << 3) +# define R500_GA_ENHANCE_REG_NOSTALL_ENABLE (1 << 3) /* Enables GA support of no-stall reads for register read back. */ + +#define R300_GA_COLOR_CONTROL 0x4278 +# define R300_GA_COLOR_CONTROL_RGB0_SHADING_SOLID (0 << 0) +# define R300_GA_COLOR_CONTROL_RGB0_SHADING_FLAT (1 << 0) +# define R300_GA_COLOR_CONTROL_RGB0_SHADING_GOURAUD (2 << 0) +# define R300_GA_COLOR_CONTROL_ALPHA0_SHADING_SOLID (0 << 2) +# define R300_GA_COLOR_CONTROL_ALPHA0_SHADING_FLAT (1 << 2) +# define R300_GA_COLOR_CONTROL_ALPHA0_SHADING_GOURAUD (2 << 2) +# define R300_GA_COLOR_CONTROL_RGB1_SHADING_SOLID (0 << 4) +# define R300_GA_COLOR_CONTROL_RGB1_SHADING_FLAT (1 << 4) +# define R300_GA_COLOR_CONTROL_RGB1_SHADING_GOURAUD (2 << 4) +# define R300_GA_COLOR_CONTROL_ALPHA1_SHADING_SOLID (0 << 6) +# define R300_GA_COLOR_CONTROL_ALPHA1_SHADING_FLAT (1 << 6) +# define R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD (2 << 6) +# define R300_GA_COLOR_CONTROL_RGB2_SHADING_SOLID (0 << 8) +# define R300_GA_COLOR_CONTROL_RGB2_SHADING_FLAT (1 << 8) +# define R300_GA_COLOR_CONTROL_RGB2_SHADING_GOURAUD (2 << 8) +# define R300_GA_COLOR_CONTROL_ALPHA2_SHADING_SOLID (0 << 10) +# define R300_GA_COLOR_CONTROL_ALPHA2_SHADING_FLAT (1 << 10) +# define R300_GA_COLOR_CONTROL_ALPHA2_SHADING_GOURAUD (2 << 10) +# define R300_GA_COLOR_CONTROL_RGB3_SHADING_SOLID (0 << 12) +# define R300_GA_COLOR_CONTROL_RGB3_SHADING_FLAT (1 << 12) +# define R300_GA_COLOR_CONTROL_RGB3_SHADING_GOURAUD (2 << 12) +# define R300_GA_COLOR_CONTROL_ALPHA3_SHADING_SOLID (0 << 14) +# define R300_GA_COLOR_CONTROL_ALPHA3_SHADING_FLAT (1 << 14) +# define R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD (2 << 14) +# define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_FIRST (0 << 16) +# define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_SECOND (1 << 16) +# define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_THIRD (2 << 16) +# define R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST (3 << 16) + +/** TODO: might be candidate for removal */ +# define R300_RE_SHADE_MODEL_SMOOTH ( \ + R300_GA_COLOR_CONTROL_RGB0_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA0_SHADING_GOURAUD | \ + R300_GA_COLOR_CONTROL_RGB1_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD | \ + R300_GA_COLOR_CONTROL_RGB2_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA2_SHADING_GOURAUD | \ + R300_GA_COLOR_CONTROL_RGB3_SHADING_GOURAUD | R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD | \ + R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST ) +/** TODO: might be candidate for removal, the GOURAUD stuff also looks buggy to me */ +# define R300_RE_SHADE_MODEL_FLAT ( \ + R300_GA_COLOR_CONTROL_RGB0_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA0_SHADING_FLAT | \ + R300_GA_COLOR_CONTROL_RGB1_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA1_SHADING_GOURAUD | \ + R300_GA_COLOR_CONTROL_RGB2_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA2_SHADING_FLAT | \ + R300_GA_COLOR_CONTROL_RGB3_SHADING_FLAT | R300_GA_COLOR_CONTROL_ALPHA3_SHADING_GOURAUD | \ + R300_GA_COLOR_CONTROL_PROVOKING_VERTEX_LAST ) + +/* Specifies red & green components of fill color -- S312 format -- Backwards comp. */ +#define R300_GA_SOLID_RG 0x427c +# define GA_SOLID_RG_COLOR_GREEN_SHIFT 0 +# define GA_SOLID_RG_COLOR_GREEN_MASK 0x0000ffff +# define GA_SOLID_RG_COLOR_RED_SHIFT 16 +# define GA_SOLID_RG_COLOR_RED_MASK 0xffff0000 +/* Specifies blue & alpha components of fill color -- S312 format -- Backwards comp. */ +#define R300_GA_SOLID_BA 0x4280 +# define GA_SOLID_BA_COLOR_ALPHA_SHIFT 0 +# define GA_SOLID_BA_COLOR_ALPHA_MASK 0x0000ffff +# define GA_SOLID_BA_COLOR_BLUE_SHIFT 16 +# define GA_SOLID_BA_COLOR_BLUE_MASK 0xffff0000 + +/* Polygon Mode + * Dangerous + */ +#define R300_GA_POLY_MODE 0x4288 +# define R300_GA_POLY_MODE_DISABLE (0 << 0) +# define R300_GA_POLY_MODE_DUAL (1 << 0) /* send 2 sets of 3 polys with specified poly type */ +/* reserved */ +# define R300_GA_POLY_MODE_FRONT_PTYPE_POINT (0 << 4) +# define R300_GA_POLY_MODE_FRONT_PTYPE_LINE (1 << 4) +# define R300_GA_POLY_MODE_FRONT_PTYPE_TRI (2 << 4) +/* reserved */ +# define R300_GA_POLY_MODE_BACK_PTYPE_POINT (0 << 7) +# define R300_GA_POLY_MODE_BACK_PTYPE_LINE (1 << 7) +# define R300_GA_POLY_MODE_BACK_PTYPE_TRI (2 << 7) +/* reserved */ + +/* Specifies the rouding mode for geometry & color SPFP to FP conversions. */ +#define R300_GA_ROUND_MODE 0x428c +# define R300_GA_ROUND_MODE_GEOMETRY_ROUND_TRUNC (0 << 0) +# define R300_GA_ROUND_MODE_GEOMETRY_ROUND_NEAREST (1 << 0) +# define R300_GA_ROUND_MODE_COLOR_ROUND_TRUNC (0 << 2) +# define R300_GA_ROUND_MODE_COLOR_ROUND_NEAREST (1 << 2) +# define R300_GA_ROUND_MODE_RGB_CLAMP_RGB (0 << 4) +# define R300_GA_ROUND_MODE_RGB_CLAMP_FP20 (1 << 4) +# define R300_GA_ROUND_MODE_ALPHA_CLAMP_RGB (0 << 5) +# define R300_GA_ROUND_MODE_ALPHA_CLAMP_FP20 (1 << 5) +# define R500_GA_ROUND_MODE_GEOMETRY_MASK_SHIFT 6 +# define R500_GA_ROUND_MODE_GEOMETRY_MASK_MASK 0x000003c0 + +/* Specifies x & y offsets for vertex data after conversion to FP. + * Offsets are in S15 format (subpixels -- 1/12 or 1/16, even in 8b + * subprecision). + */ +#define R300_GA_OFFSET 0x4290 +# define R300_GA_OFFSET_X_OFFSET_SHIFT 0 +# define R300_GA_OFFSET_X_OFFSET_MASK 0x0000ffff +# define R300_GA_OFFSET_Y_OFFSET_SHIFT 16 +# define R300_GA_OFFSET_Y_OFFSET_MASK 0xffff0000 + +/* Specifies the scale to apply to fog. */ +#define R300_GA_FOG_SCALE 0x4294 +/* Specifies the offset to apply to fog. */ +#define R300_GA_FOG_OFFSET 0x4298 +/* Specifies number of cycles to assert reset, and also causes RB3D soft reset to assert. */ +#define R300_GA_SOFT_RESET 0x429c + +/* Not sure why there are duplicate of factor and constant values. + * My best guess so far is that there are seperate zbiases for test and write. + * Ordering might be wrong. + * Some of the tests indicate that fgl has a fallback implementation of zbias + * via pixel shaders. + */ +#define R300_SU_TEX_WRAP 0x42A0 +#define R300_SU_POLY_OFFSET_FRONT_SCALE 0x42A4 +#define R300_SU_POLY_OFFSET_FRONT_OFFSET 0x42A8 +#define R300_SU_POLY_OFFSET_BACK_SCALE 0x42AC +#define R300_SU_POLY_OFFSET_BACK_OFFSET 0x42B0 + +/* This register needs to be set to (1<<1) for RV350 to correctly + * perform depth test (see --vb-triangles in r300_demo) + * Don't know about other chips. - Vladimir + * This is set to 3 when GL_POLYGON_OFFSET_FILL is on. + * My guess is that there are two bits for each zbias primitive + * (FILL, LINE, POINT). + * One to enable depth test and one for depth write. + * Yet this doesnt explain why depth writes work ... + */ +#define R300_SU_POLY_OFFSET_ENABLE 0x42B4 +# define R300_FRONT_ENABLE (1 << 0) +# define R300_BACK_ENABLE (1 << 1) +# define R300_PARA_ENABLE (1 << 2) + +#define R300_SU_CULL_MODE 0x42B8 +# define R300_CULL_FRONT (1 << 0) +# define R300_CULL_BACK (1 << 1) +# define R300_FRONT_FACE_CCW (0 << 2) +# define R300_FRONT_FACE_CW (1 << 2) + +/* SU Depth Scale value */ +#define R300_SU_DEPTH_SCALE 0x42c0 +/* SU Depth Offset value */ +#define R300_SU_DEPTH_OFFSET 0x42c4 + + +/* BEGIN: Rasterization / Interpolators - many guesses */ + +/* + * TC_CNT is the number of incoming texture coordinate sets (i.e. it depends + * on the vertex program, *not* the fragment program) + */ +#define R300_RS_COUNT 0x4300 +# define R300_IT_COUNT_SHIFT 0 +# define R300_IT_COUNT_MASK 0x0000007f +# define R300_IC_COUNT_SHIFT 7 +# define R300_IC_COUNT_MASK 0x00000780 +# define R300_W_ADDR_SHIFT 12 +# define R300_W_ADDR_MASK 0x0003f000 +# define R300_HIRES_DIS (0 << 18) +# define R300_HIRES_EN (1 << 18) + +#define R300_RS_INST_COUNT 0x4304 +# define R300_RS_INST_COUNT_SHIFT 0 +# define R300_RS_INST_COUNT_MASK 0x0000000f +# define R300_RS_TX_OFFSET_SHIFT 5 +# define R300_RS_TX_OFFSET_MASK 0x000000e0 + +/* gap */ + +/* Only used for texture coordinates. + * Use the source field to route texture coordinate input from the + * vertex program to the desired interpolator. Note that the source + * field is relative to the outputs the vertex program *actually* + * writes. If a vertex program only writes texcoord[1], this will + * be source index 0. + * Set INTERP_USED on all interpolators that produce data used by + * the fragment program. INTERP_USED looks like a swizzling mask, + * but I haven't seen it used that way. + * + * Note: The _UNKNOWN constants are always set in their respective + * register. I don't know if this is necessary. + */ +#define R300_RS_IP_0 0x4310 +#define R300_RS_IP_1 0x4314 +#define R300_RS_IP_2 0x4318 +#define R300_RS_IP_3 0x431C +# define R300_RS_INTERP_SRC_SHIFT 2 /* TODO: check for removal */ +# define R300_RS_INTERP_SRC_MASK (7 << 2) /* TODO: check for removal */ +# define R300_RS_TEX_PTR(x) (x << 0) +# define R300_RS_COL_PTR(x) ((x) << 6) +# define R300_RS_COL_FMT(x) ((x) << 9) +# define R300_RS_COL_FMT_RGBA 0 +# define R300_RS_COL_FMT_RGB0 1 +# define R300_RS_COL_FMT_RGB1 2 +# define R300_RS_COL_FMT_000A 4 +# define R300_RS_COL_FMT_0000 5 +# define R300_RS_COL_FMT_0001 6 +# define R300_RS_COL_FMT_111A 8 +# define R300_RS_COL_FMT_1110 9 +# define R300_RS_COL_FMT_1111 10 +# define R300_RS_SEL_S(x) ((x) << 13) +# define R300_RS_SEL_T(x) ((x) << 16) +# define R300_RS_SEL_R(x) ((x) << 19) +# define R300_RS_SEL_Q(x) ((x) << 22) +# define R300_RS_SEL_C0 0 +# define R300_RS_SEL_C1 1 +# define R300_RS_SEL_C2 2 +# define R300_RS_SEL_C3 3 +# define R300_RS_SEL_K0 4 +# define R300_RS_SEL_K1 5 + + +/* */ +#define R500_RS_INST_0 0x4320 +#define R500_RS_INST_1 0x4324 +#define R500_RS_INST_2 0x4328 +#define R500_RS_INST_3 0x432c +#define R500_RS_INST_4 0x4330 +#define R500_RS_INST_5 0x4334 +#define R500_RS_INST_6 0x4338 +#define R500_RS_INST_7 0x433c +#define R500_RS_INST_8 0x4340 +#define R500_RS_INST_9 0x4344 +#define R500_RS_INST_10 0x4348 +#define R500_RS_INST_11 0x434c +#define R500_RS_INST_12 0x4350 +#define R500_RS_INST_13 0x4354 +#define R500_RS_INST_14 0x4358 +#define R500_RS_INST_15 0x435c +#define R500_RS_INST_TEX_ID_SHIFT 0 +#define R500_RS_INST_TEX_CN_WRITE (1 << 4) +#define R500_RS_INST_TEX_ADDR_SHIFT 5 +#define R500_RS_INST_COL_ID_SHIFT 12 +#define R500_RS_INST_COL_CN_NO_WRITE (0 << 16) +#define R500_RS_INST_COL_CN_WRITE (1 << 16) +#define R500_RS_INST_COL_CN_WRITE_FBUFFER (2 << 16) +#define R500_RS_INST_COL_CN_WRITE_BACKFACE (3 << 16) +#define R500_RS_INST_COL_ADDR_SHIFT 18 +#define R500_RS_INST_TEX_ADJ (1 << 25) +#define R500_RS_INST_W_CN (1 << 26) + +/* These DWORDs control how vertex data is routed into fragment program + * registers, after interpolators. + */ +#define R300_RS_INST_0 0x4330 +#define R300_RS_INST_1 0x4334 +#define R300_RS_INST_2 0x4338 +#define R300_RS_INST_3 0x433C +#define R300_RS_INST_4 0x4340 +#define R300_RS_INST_5 0x4344 +#define R300_RS_INST_6 0x4348 +#define R300_RS_INST_7 0x434C +# define R300_RS_INST_TEX_ID(x) ((x) << 0) +# define R300_RS_INST_TEX_CN_WRITE (1 << 3) +# define R300_RS_INST_TEX_ADDR_SHIFT 6 +# define R300_RS_INST_COL_ID(x) ((x) << 11) +# define R300_RS_INST_COL_CN_WRITE (1 << 14) +# define R300_RS_INST_COL_ADDR_SHIFT 17 +# define R300_RS_INST_TEX_ADJ (1 << 22) +# define R300_RS_COL_BIAS_UNUSED_SHIFT 23 + +/* END: Rasterization / Interpolators - many guesses */ + +/* Hierarchical Z Enable */ +#define R300_SC_HYPERZ 0x43a4 +# define R300_SC_HYPERZ_DISABLE (0 << 0) +# define R300_SC_HYPERZ_ENABLE (1 << 0) +# define R300_SC_HYPERZ_MIN (0 << 1) +# define R300_SC_HYPERZ_MAX (1 << 1) +# define R300_SC_HYPERZ_ADJ_256 (0 << 2) +# define R300_SC_HYPERZ_ADJ_128 (1 << 2) +# define R300_SC_HYPERZ_ADJ_64 (2 << 2) +# define R300_SC_HYPERZ_ADJ_32 (3 << 2) +# define R300_SC_HYPERZ_ADJ_16 (4 << 2) +# define R300_SC_HYPERZ_ADJ_8 (5 << 2) +# define R300_SC_HYPERZ_ADJ_4 (6 << 2) +# define R300_SC_HYPERZ_ADJ_2 (7 << 2) +# define R300_SC_HYPERZ_HZ_Z0MIN_NO (0 << 5) +# define R300_SC_HYPERZ_HZ_Z0MIN (1 << 5) +# define R300_SC_HYPERZ_HZ_Z0MAX_NO (0 << 6) +# define R300_SC_HYPERZ_HZ_Z0MAX (1 << 6) + +#define R300_SC_EDGERULE 0x43a8 + +/* BEGIN: Scissors and cliprects */ + +/* There are four clipping rectangles. Their corner coordinates are inclusive. + * Every pixel is assigned a number from 0 and 15 by setting bits 0-3 depending + * on whether the pixel is inside cliprects 0-3, respectively. For example, + * if a pixel is inside cliprects 0 and 1, but outside 2 and 3, it is assigned + * the number 3 (binary 0011). + * Iff the bit corresponding to the pixel's number in RE_CLIPRECT_CNTL is set, + * the pixel is rasterized. + * + * In addition to this, there is a scissors rectangle. Only pixels inside the + * scissors rectangle are drawn. (coordinates are inclusive) + * + * For some reason, the top-left corner of the framebuffer is at (1440, 1440) + * for the purpose of clipping and scissors. + */ +#define R300_SC_CLIPRECT_TL_0 0x43B0 +#define R300_SC_CLIPRECT_BR_0 0x43B4 +#define R300_SC_CLIPRECT_TL_1 0x43B8 +#define R300_SC_CLIPRECT_BR_1 0x43BC +#define R300_SC_CLIPRECT_TL_2 0x43C0 +#define R300_SC_CLIPRECT_BR_2 0x43C4 +#define R300_SC_CLIPRECT_TL_3 0x43C8 +#define R300_SC_CLIPRECT_BR_3 0x43CC +# define R300_CLIPRECT_OFFSET 1440 +# define R300_CLIPRECT_MASK 0x1FFF +# define R300_CLIPRECT_X_SHIFT 0 +# define R300_CLIPRECT_X_MASK (0x1FFF << 0) +# define R300_CLIPRECT_Y_SHIFT 13 +# define R300_CLIPRECT_Y_MASK (0x1FFF << 13) +#define R300_SC_CLIP_RULE 0x43D0 +# define R300_CLIP_OUT (1 << 0) +# define R300_CLIP_0 (1 << 1) +# define R300_CLIP_1 (1 << 2) +# define R300_CLIP_10 (1 << 3) +# define R300_CLIP_2 (1 << 4) +# define R300_CLIP_20 (1 << 5) +# define R300_CLIP_21 (1 << 6) +# define R300_CLIP_210 (1 << 7) +# define R300_CLIP_3 (1 << 8) +# define R300_CLIP_30 (1 << 9) +# define R300_CLIP_31 (1 << 10) +# define R300_CLIP_310 (1 << 11) +# define R300_CLIP_32 (1 << 12) +# define R300_CLIP_320 (1 << 13) +# define R300_CLIP_321 (1 << 14) +# define R300_CLIP_3210 (1 << 15) + +/* gap */ + +#define R300_SC_SCISSORS_TL 0x43E0 +#define R300_SC_SCISSORS_BR 0x43E4 +# define R300_SCISSORS_OFFSET 1440 +# define R300_SCISSORS_X_SHIFT 0 +# define R300_SCISSORS_X_MASK (0x1FFF << 0) +# define R300_SCISSORS_Y_SHIFT 13 +# define R300_SCISSORS_Y_MASK (0x1FFF << 13) + +/* Screen door sample mask */ +#define R300_SC_SCREENDOOR 0x43e8 + +/* END: Scissors and cliprects */ + +/* BEGIN: Texture specification */ + +/* + * The texture specification dwords are grouped by meaning and not by texture + * unit. This means that e.g. the offset for texture image unit N is found in + * register TX_OFFSET_0 + (4*N) + */ +#define R300_TX_FILTER0_0 0x4400 +#define R300_TX_FILTER0_1 0x4404 +#define R300_TX_FILTER0_2 0x4408 +#define R300_TX_FILTER0_3 0x440c +#define R300_TX_FILTER0_4 0x4410 +#define R300_TX_FILTER0_5 0x4414 +#define R300_TX_FILTER0_6 0x4418 +#define R300_TX_FILTER0_7 0x441c +#define R300_TX_FILTER0_8 0x4420 +#define R300_TX_FILTER0_9 0x4424 +#define R300_TX_FILTER0_10 0x4428 +#define R300_TX_FILTER0_11 0x442c +#define R300_TX_FILTER0_12 0x4430 +#define R300_TX_FILTER0_13 0x4434 +#define R300_TX_FILTER0_14 0x4438 +#define R300_TX_FILTER0_15 0x443c +# define R300_TX_REPEAT 0 +# define R300_TX_MIRRORED 1 +# define R300_TX_CLAMP_TO_EDGE 2 +# define R300_TX_MIRROR_ONCE_TO_EDGE 3 +# define R300_TX_CLAMP 4 +# define R300_TX_MIRROR_ONCE 5 +# define R300_TX_CLAMP_TO_BORDER 6 +# define R300_TX_MIRROR_ONCE_TO_BORDER 7 +# define R300_TX_WRAP_S_SHIFT 0 +# define R300_TX_WRAP_S_MASK (7 << 0) +# define R300_TX_WRAP_T_SHIFT 3 +# define R300_TX_WRAP_T_MASK (7 << 3) +# define R300_TX_WRAP_R_SHIFT 6 +# define R300_TX_WRAP_R_MASK (7 << 6) +# define R300_TX_MAG_FILTER_4 (0 << 9) +# define R300_TX_MAG_FILTER_NEAREST (1 << 9) +# define R300_TX_MAG_FILTER_LINEAR (2 << 9) +# define R300_TX_MAG_FILTER_ANISO (3 << 9) +# define R300_TX_MAG_FILTER_MASK (3 << 9) +# define R300_TX_MIN_FILTER_NEAREST (1 << 11) +# define R300_TX_MIN_FILTER_LINEAR (2 << 11) +# define R300_TX_MIN_FILTER_ANISO (3 << 11) +# define R300_TX_MIN_FILTER_MASK (3 << 11) +# define R300_TX_MIN_FILTER_MIP_NONE (0 << 13) +# define R300_TX_MIN_FILTER_MIP_NEAREST (1 << 13) +# define R300_TX_MIN_FILTER_MIP_LINEAR (2 << 13) +# define R300_TX_MIN_FILTER_MIP_MASK (3 << 13) +# define R300_TX_MAX_ANISO_1_TO_1 (0 << 21) +# define R300_TX_MAX_ANISO_2_TO_1 (1 << 21) +# define R300_TX_MAX_ANISO_4_TO_1 (2 << 21) +# define R300_TX_MAX_ANISO_8_TO_1 (3 << 21) +# define R300_TX_MAX_ANISO_16_TO_1 (4 << 21) +# define R300_TX_MAX_ANISO_MASK (7 << 21) + +#define R300_TX_FILTER1_0 0x4440 +# define R300_CHROMA_KEY_MODE_DISABLE 0 +# define R300_CHROMA_KEY_FORCE 1 +# define R300_CHROMA_KEY_BLEND 2 +# define R300_MC_ROUND_NORMAL (0<<2) +# define R300_MC_ROUND_MPEG4 (1<<2) +# define R300_LOD_BIAS_SHIFT 3 +# define R300_LOD_BIAS_MASK 0x1ff8 +# define R300_EDGE_ANISO_EDGE_DIAG (0<<13) +# define R300_EDGE_ANISO_EDGE_ONLY (1<<13) +# define R300_MC_COORD_TRUNCATE_DISABLE (0<<14) +# define R300_MC_COORD_TRUNCATE_MPEG (1<<14) +# define R300_TX_TRI_PERF_0_8 (0<<15) +# define R300_TX_TRI_PERF_1_8 (1<<15) +# define R300_TX_TRI_PERF_1_4 (2<<15) +# define R300_TX_TRI_PERF_3_8 (3<<15) +# define R300_ANISO_THRESHOLD_MASK (7<<17) + +# define R500_MACRO_SWITCH (1<<22) +# define R500_BORDER_FIX (1<<31) + +#define R300_TX_SIZE_0 0x4480 +# define R300_TX_WIDTHMASK_SHIFT 0 +# define R300_TX_WIDTHMASK_MASK (2047 << 0) +# define R300_TX_HEIGHTMASK_SHIFT 11 +# define R300_TX_HEIGHTMASK_MASK (2047 << 11) +# define R300_TX_DEPTHMASK_SHIFT 22 +# define R300_TX_DEPTHMASK_MASK (0xf << 22) +# define R300_TX_MAX_MIP_LEVEL_SHIFT 26 +# define R300_TX_MAX_MIP_LEVEL_MASK (0xf << 26) +# define R300_TX_SIZE_PROJECTED (1<<30) +# define R300_TX_SIZE_TXPITCH_EN (1<<31) +#define R300_TX_FORMAT_0 0x44C0 + /* The interpretation of the format word by Wladimir van der Laan */ + /* The X, Y, Z and W refer to the layout of the components. + They are given meanings as R, G, B and Alpha by the swizzle + specification */ +# define R300_TX_FORMAT_X8 0x0 +# define R500_TX_FORMAT_X1 0x0 // bit set in format 2 +# define R300_TX_FORMAT_X16 0x1 +# define R500_TX_FORMAT_X1_REV 0x0 // bit set in format 2 +# define R300_TX_FORMAT_Y4X4 0x2 +# define R300_TX_FORMAT_Y8X8 0x3 +# define R300_TX_FORMAT_Y16X16 0x4 +# define R300_TX_FORMAT_Z3Y3X2 0x5 +# define R300_TX_FORMAT_Z5Y6X5 0x6 +# define R300_TX_FORMAT_Z6Y5X5 0x7 +# define R300_TX_FORMAT_Z11Y11X10 0x8 +# define R300_TX_FORMAT_Z10Y11X11 0x9 +# define R300_TX_FORMAT_W4Z4Y4X4 0xA +# define R300_TX_FORMAT_W1Z5Y5X5 0xB +# define R300_TX_FORMAT_W8Z8Y8X8 0xC +# define R300_TX_FORMAT_W2Z10Y10X10 0xD +# define R300_TX_FORMAT_W16Z16Y16X16 0xE +# define R300_TX_FORMAT_DXT1 0xF +# define R300_TX_FORMAT_DXT3 0x10 +# define R300_TX_FORMAT_DXT5 0x11 +# define R300_TX_FORMAT_D3DMFT_CxV8U8 0x12 /* no swizzle */ +# define R300_TX_FORMAT_A8R8G8B8 0x13 /* no swizzle */ +# define R300_TX_FORMAT_B8G8_B8G8 0x14 /* no swizzle */ +# define R300_TX_FORMAT_G8R8_G8B8 0x15 /* no swizzle */ + + /* These two values are wrong, but they're the only values that + * produce any even vaguely correct results. Can r300 only do 16-bit + * depth textures? + */ +# define R300_TX_FORMAT_X24_Y8 0x1e +# define R300_TX_FORMAT_X32 0x1e + + /* 0x16 - some 16 bit green format.. ?? */ +# define R300_TX_FORMAT_3D (1 << 25) +# define R300_TX_FORMAT_CUBIC_MAP (2 << 25) + + /* gap */ + /* Floating point formats */ + /* Note - hardware supports both 16 and 32 bit floating point */ +# define R300_TX_FORMAT_FL_I16 0x18 +# define R300_TX_FORMAT_FL_I16A16 0x19 +# define R300_TX_FORMAT_FL_R16G16B16A16 0x1A +# define R300_TX_FORMAT_FL_I32 0x1B +# define R300_TX_FORMAT_FL_I32A32 0x1C +# define R300_TX_FORMAT_FL_R32G32B32A32 0x1D + /* alpha modes, convenience mostly */ + /* if you have alpha, pick constant appropriate to the + number of channels (1 for I8, 2 for I8A8, 4 for R8G8B8A8, etc */ +# define R300_TX_FORMAT_ALPHA_1CH 0x000 +# define R300_TX_FORMAT_ALPHA_2CH 0x200 +# define R300_TX_FORMAT_ALPHA_4CH 0x600 +# define R300_TX_FORMAT_ALPHA_NONE 0xA00 + /* Swizzling */ + /* constants */ +# define R300_TX_FORMAT_X 0 +# define R300_TX_FORMAT_Y 1 +# define R300_TX_FORMAT_Z 2 +# define R300_TX_FORMAT_W 3 +# define R300_TX_FORMAT_ZERO 4 +# define R300_TX_FORMAT_ONE 5 + /* 2.0*Z, everything above 1.0 is set to 0.0 */ +# define R300_TX_FORMAT_CUT_Z 6 + /* 2.0*W, everything above 1.0 is set to 0.0 */ +# define R300_TX_FORMAT_CUT_W 7 + +# define R300_TX_FORMAT_B_SHIFT 18 +# define R300_TX_FORMAT_G_SHIFT 15 +# define R300_TX_FORMAT_R_SHIFT 12 +# define R300_TX_FORMAT_A_SHIFT 9 + /* Convenience macro to take care of layout and swizzling */ +# define R300_EASY_TX_FORMAT(B, G, R, A, FMT) ( \ + ((R300_TX_FORMAT_##B)<<R300_TX_FORMAT_B_SHIFT) \ + | ((R300_TX_FORMAT_##G)<<R300_TX_FORMAT_G_SHIFT) \ + | ((R300_TX_FORMAT_##R)<<R300_TX_FORMAT_R_SHIFT) \ + | ((R300_TX_FORMAT_##A)<<R300_TX_FORMAT_A_SHIFT) \ + | (R300_TX_FORMAT_##FMT) \ + ) + /* These can be ORed with result of R300_EASY_TX_FORMAT() + We don't really know what they do. Take values from a + constant color ? */ +# define R300_TX_FORMAT_CONST_X (1<<5) +# define R300_TX_FORMAT_CONST_Y (2<<5) +# define R300_TX_FORMAT_CONST_Z (4<<5) +# define R300_TX_FORMAT_CONST_W (8<<5) + +# define R300_TX_FORMAT_YUV_MODE 0x00800000 + +#define R300_TX_FORMAT2_0 0x4500 /* obvious missing in gap */ +# define R300_TX_PITCHMASK_SHIFT 0 +# define R300_TX_PITCHMASK_MASK (2047 << 0) +# define R500_TXFORMAT_MSB (1 << 14) +# define R500_TXWIDTH_BIT11 (1 << 15) +# define R500_TXHEIGHT_BIT11 (1 << 16) +# define R500_POW2FIX2FLT (1 << 17) +# define R500_SEL_FILTER4_TC0 (0 << 18) +# define R500_SEL_FILTER4_TC1 (1 << 18) +# define R500_SEL_FILTER4_TC2 (2 << 18) +# define R500_SEL_FILTER4_TC3 (3 << 18) + +#define R300_TX_OFFSET_0 0x4540 +#define R300_TX_OFFSET_1 0x4544 +#define R300_TX_OFFSET_2 0x4548 +#define R300_TX_OFFSET_3 0x454C +#define R300_TX_OFFSET_4 0x4550 +#define R300_TX_OFFSET_5 0x4554 +#define R300_TX_OFFSET_6 0x4558 +#define R300_TX_OFFSET_7 0x455C + /* BEGIN: Guess from R200 */ +# define R300_TXO_ENDIAN_NO_SWAP (0 << 0) +# define R300_TXO_ENDIAN_BYTE_SWAP (1 << 0) +# define R300_TXO_ENDIAN_WORD_SWAP (2 << 0) +# define R300_TXO_ENDIAN_HALFDW_SWAP (3 << 0) +# define R300_TXO_MACRO_TILE (1 << 2) +# define R300_TXO_MICRO_TILE_LINEAR (0 << 3) +# define R300_TXO_MICRO_TILE (1 << 3) +# define R300_TXO_MICRO_TILE_SQUARE (2 << 3) +# define R300_TXO_OFFSET_MASK 0xffffffe0 +# define R300_TXO_OFFSET_SHIFT 5 + /* END: Guess from R200 */ + +/* 32 bit chroma key */ +#define R300_TX_CHROMA_KEY_0 0x4580 +#define R300_TX_CHROMA_KEY_1 0x4584 +#define R300_TX_CHROMA_KEY_2 0x4588 +#define R300_TX_CHROMA_KEY_3 0x458c +#define R300_TX_CHROMA_KEY_4 0x4590 +#define R300_TX_CHROMA_KEY_5 0x4594 +#define R300_TX_CHROMA_KEY_6 0x4598 +#define R300_TX_CHROMA_KEY_7 0x459c +#define R300_TX_CHROMA_KEY_8 0x45a0 +#define R300_TX_CHROMA_KEY_9 0x45a4 +#define R300_TX_CHROMA_KEY_10 0x45a8 +#define R300_TX_CHROMA_KEY_11 0x45ac +#define R300_TX_CHROMA_KEY_12 0x45b0 +#define R300_TX_CHROMA_KEY_13 0x45b4 +#define R300_TX_CHROMA_KEY_14 0x45b8 +#define R300_TX_CHROMA_KEY_15 0x45bc +/* ff00ff00 == { 0, 1.0, 0, 1.0 } */ + +/* Border Color */ +#define R300_TX_BORDER_COLOR_0 0x45c0 +#define R300_TX_BORDER_COLOR_1 0x45c4 +#define R300_TX_BORDER_COLOR_2 0x45c8 +#define R300_TX_BORDER_COLOR_3 0x45cc +#define R300_TX_BORDER_COLOR_4 0x45d0 +#define R300_TX_BORDER_COLOR_5 0x45d4 +#define R300_TX_BORDER_COLOR_6 0x45d8 +#define R300_TX_BORDER_COLOR_7 0x45dc +#define R300_TX_BORDER_COLOR_8 0x45e0 +#define R300_TX_BORDER_COLOR_9 0x45e4 +#define R300_TX_BORDER_COLOR_10 0x45e8 +#define R300_TX_BORDER_COLOR_11 0x45ec +#define R300_TX_BORDER_COLOR_12 0x45f0 +#define R300_TX_BORDER_COLOR_13 0x45f4 +#define R300_TX_BORDER_COLOR_14 0x45f8 +#define R300_TX_BORDER_COLOR_15 0x45fc + + +/* END: Texture specification */ + +/* BEGIN: Fragment program instruction set */ + +/* Fragment programs are written directly into register space. + * There are separate instruction streams for texture instructions and ALU + * instructions. + * In order to synchronize these streams, the program is divided into up + * to 4 nodes. Each node begins with a number of TEX operations, followed + * by a number of ALU operations. + * The first node can have zero TEX ops, all subsequent nodes must have at + * least + * one TEX ops. + * All nodes must have at least one ALU op. + * + * The index of the last node is stored in PFS_CNTL_0: A value of 0 means + * 1 node, a value of 3 means 4 nodes. + * The total amount of instructions is defined in PFS_CNTL_2. The offsets are + * offsets into the respective instruction streams, while *_END points to the + * last instruction relative to this offset. + */ +#define R300_US_CONFIG 0x4600 +# define R300_PFS_CNTL_LAST_NODES_SHIFT 0 +# define R300_PFS_CNTL_LAST_NODES_MASK (3 << 0) +# define R300_PFS_CNTL_FIRST_NODE_HAS_TEX (1 << 3) +#define R300_US_PIXSIZE 0x4604 +/* There is an unshifted value here which has so far always been equal to the + * index of the highest used temporary register. + */ +#define R300_US_CODE_OFFSET 0x4608 +# define R300_PFS_CNTL_ALU_OFFSET_SHIFT 0 +# define R300_PFS_CNTL_ALU_OFFSET_MASK (63 << 0) +# define R300_PFS_CNTL_ALU_END_SHIFT 6 +# define R300_PFS_CNTL_ALU_END_MASK (63 << 6) +# define R300_PFS_CNTL_TEX_OFFSET_SHIFT 13 +# define R300_PFS_CNTL_TEX_OFFSET_MASK (31 << 13) +# define R300_PFS_CNTL_TEX_END_SHIFT 18 +# define R300_PFS_CNTL_TEX_END_MASK (31 << 18) + +/* gap */ + +/* Nodes are stored backwards. The last active node is always stored in + * PFS_NODE_3. + * Example: In a 2-node program, NODE_0 and NODE_1 are set to 0. The + * first node is stored in NODE_2, the second node is stored in NODE_3. + * + * Offsets are relative to the master offset from PFS_CNTL_2. + */ +#define R300_US_CODE_ADDR_0 0x4610 +#define R300_US_CODE_ADDR_1 0x4614 +#define R300_US_CODE_ADDR_2 0x4618 +#define R300_US_CODE_ADDR_3 0x461C +# define R300_ALU_START_SHIFT 0 +# define R300_ALU_START_MASK (63 << 0) +# define R300_ALU_SIZE_SHIFT 6 +# define R300_ALU_SIZE_MASK (63 << 6) +# define R300_TEX_START_SHIFT 12 +# define R300_TEX_START_MASK (31 << 12) +# define R300_TEX_SIZE_SHIFT 17 +# define R300_TEX_SIZE_MASK (31 << 17) +# define R300_RGBA_OUT (1 << 22) +# define R300_W_OUT (1 << 23) + +/* TEX + * As far as I can tell, texture instructions cannot write into output + * registers directly. A subsequent ALU instruction is always necessary, + * even if it's just MAD o0, r0, 1, 0 + */ +#define R300_US_TEX_INST_0 0x4620 +# define R300_SRC_ADDR_SHIFT 0 +# define R300_SRC_ADDR_MASK (31 << 0) +# define R300_DST_ADDR_SHIFT 6 +# define R300_DST_ADDR_MASK (31 << 6) +# define R300_TEX_ID_SHIFT 11 +# define R300_TEX_ID_MASK (15 << 11) +# define R300_TEX_INST_SHIFT 15 +# define R300_TEX_OP_NOP 0 +# define R300_TEX_OP_LD 1 +# define R300_TEX_OP_KIL 2 +# define R300_TEX_OP_TXP 3 +# define R300_TEX_OP_TXB 4 +# define R300_TEX_INST_MASK (7 << 15) + +/* Output format from the unfied shader */ +#define R300_US_OUT_FMT_0 0x46A4 +# define R300_US_OUT_FMT_C4_8 (0 << 0) +# define R300_US_OUT_FMT_C4_10 (1 << 0) +# define R300_US_OUT_FMT_C4_10_GAMMA (2 << 0) +# define R300_US_OUT_FMT_C_16 (3 << 0) +# define R300_US_OUT_FMT_C2_16 (4 << 0) +# define R300_US_OUT_FMT_C4_16 (5 << 0) +# define R300_US_OUT_FMT_C_16_MPEG (6 << 0) +# define R300_US_OUT_FMT_C2_16_MPEG (7 << 0) +# define R300_US_OUT_FMT_C2_4 (8 << 0) +# define R300_US_OUT_FMT_C_3_3_2 (9 << 0) +# define R300_US_OUT_FMT_C_6_5_6 (10 << 0) +# define R300_US_OUT_FMT_C_11_11_10 (11 << 0) +# define R300_US_OUT_FMT_C_10_11_11 (12 << 0) +# define R300_US_OUT_FMT_C_2_10_10_10 (13 << 0) +/* reserved */ +# define R300_US_OUT_FMT_UNUSED (15 << 0) +# define R300_US_OUT_FMT_C_16_FP (16 << 0) +# define R300_US_OUT_FMT_C2_16_FP (17 << 0) +# define R300_US_OUT_FMT_C4_16_FP (18 << 0) +# define R300_US_OUT_FMT_C_32_FP (19 << 0) +# define R300_US_OUT_FMT_C2_32_FP (20 << 0) +# define R300_US_OUT_FMT_C4_32_FP (21 << 0) +# define R300_C0_SEL_A (0 << 8) +# define R300_C0_SEL_R (1 << 8) +# define R300_C0_SEL_G (2 << 8) +# define R300_C0_SEL_B (3 << 8) +# define R300_C1_SEL_A (0 << 10) +# define R300_C1_SEL_R (1 << 10) +# define R300_C1_SEL_G (2 << 10) +# define R300_C1_SEL_B (3 << 10) +# define R300_C2_SEL_A (0 << 12) +# define R300_C2_SEL_R (1 << 12) +# define R300_C2_SEL_G (2 << 12) +# define R300_C2_SEL_B (3 << 12) +# define R300_C3_SEL_A (0 << 14) +# define R300_C3_SEL_R (1 << 14) +# define R300_C3_SEL_G (2 << 14) +# define R300_C3_SEL_B (3 << 14) +# define R300_OUT_SIGN(x) ((x) << 16) +# define R500_ROUND_ADJ (1 << 20) + +/* ALU + * The ALU instructions register blocks are enumerated according to the order + * in which fglrx. I assume there is space for 64 instructions, since + * each block has space for a maximum of 64 DWORDs, and this matches reported + * native limits. + * + * The basic functional block seems to be one MAD for each color and alpha, + * and an adder that adds all components after the MUL. + * - ADD, MUL, MAD etc.: use MAD with appropriate neutral operands + * - DP4: Use OUTC_DP4, OUTA_DP4 + * - DP3: Use OUTC_DP3, OUTA_DP4, appropriate alpha operands + * - DPH: Use OUTC_DP4, OUTA_DP4, appropriate alpha operands + * - CMPH: If ARG2 > 0.5, return ARG0, else return ARG1 + * - CMP: If ARG2 < 0, return ARG1, else return ARG0 + * - FLR: use FRC+MAD + * - XPD: use MAD+MAD + * - SGE, SLT: use MAD+CMP + * - RSQ: use ABS modifier for argument + * - Use OUTC_REPL_ALPHA to write results of an alpha-only operation + * (e.g. RCP) into color register + * - apparently, there's no quick DST operation + * - fglrx set FPI2_UNKNOWN_31 on a "MAD fragment.color, tmp0, tmp1, tmp2" + * - fglrx set FPI2_UNKNOWN_31 on a "MAX r2, r1, c0" + * - fglrx once set FPI0_UNKNOWN_31 on a "FRC r1, r1" + * + * Operand selection + * First stage selects three sources from the available registers and + * constant parameters. This is defined in INSTR1 (color) and INSTR3 (alpha). + * fglrx sorts the three source fields: Registers before constants, + * lower indices before higher indices; I do not know whether this is + * necessary. + * + * fglrx fills unused sources with "read constant 0" + * According to specs, you cannot select more than two different constants. + * + * Second stage selects the operands from the sources. This is defined in + * INSTR0 (color) and INSTR2 (alpha). You can also select the special constants + * zero and one. + * Swizzling and negation happens in this stage, as well. + * + * Important: Color and alpha seem to be mostly separate, i.e. their sources + * selection appears to be fully independent (the register storage is probably + * physically split into a color and an alpha section). + * However (because of the apparent physical split), there is some interaction + * WRT swizzling. If, for example, you want to load an R component into an + * Alpha operand, this R component is taken from a *color* source, not from + * an alpha source. The corresponding register doesn't even have to appear in + * the alpha sources list. (I hope this all makes sense to you) + * + * Destination selection + * The destination register index is in FPI1 (color) and FPI3 (alpha) + * together with enable bits. + * There are separate enable bits for writing into temporary registers + * (DSTC_REG_* /DSTA_REG) and and program output registers (DSTC_OUTPUT_* + * /DSTA_OUTPUT). You can write to both at once, or not write at all (the + * same index must be used for both). + * + * Note: There is a special form for LRP + * - Argument order is the same as in ARB_fragment_program. + * - Operation is MAD + * - ARG1 is set to ARGC_SRC1C_LRP/ARGC_SRC1A_LRP + * - Set FPI0/FPI2_SPECIAL_LRP + * Arbitrary LRP (including support for swizzling) requires vanilla MAD+MAD + */ +#define R300_US_ALU_RGB_ADDR_0 0x46C0 +# define R300_ALU_SRC0C_SHIFT 0 +# define R300_ALU_SRC0C_MASK (31 << 0) +# define R300_ALU_SRC0C_CONST (1 << 5) +# define R300_ALU_SRC1C_SHIFT 6 +# define R300_ALU_SRC1C_MASK (31 << 6) +# define R300_ALU_SRC1C_CONST (1 << 11) +# define R300_ALU_SRC2C_SHIFT 12 +# define R300_ALU_SRC2C_MASK (31 << 12) +# define R300_ALU_SRC2C_CONST (1 << 17) +# define R300_ALU_SRC_MASK 0x0003ffff +# define R300_ALU_DSTC_SHIFT 18 +# define R300_ALU_DSTC_MASK (31 << 18) +# define R300_ALU_DSTC_REG_MASK_SHIFT 23 +# define R300_ALU_DSTC_REG_X (1 << 23) +# define R300_ALU_DSTC_REG_Y (1 << 24) +# define R300_ALU_DSTC_REG_Z (1 << 25) +# define R300_ALU_DSTC_OUTPUT_MASK_SHIFT 26 +# define R300_ALU_DSTC_OUTPUT_X (1 << 26) +# define R300_ALU_DSTC_OUTPUT_Y (1 << 27) +# define R300_ALU_DSTC_OUTPUT_Z (1 << 28) +# define R300_ALU_DSTC_OUTPUT_XYZ (7 << 26) +# define R300_RGB_ADDR0(x) ((x) << 0) +# define R300_RGB_ADDR1(x) ((x) << 6) +# define R300_RGB_ADDR2(x) ((x) << 12) + +#define R300_US_ALU_ALPHA_ADDR_0 0x47C0 +# define R300_ALU_SRC0A_SHIFT 0 +# define R300_ALU_SRC0A_MASK (31 << 0) +# define R300_ALU_SRC0A_CONST (1 << 5) +# define R300_ALU_SRC1A_SHIFT 6 +# define R300_ALU_SRC1A_MASK (31 << 6) +# define R300_ALU_SRC1A_CONST (1 << 11) +# define R300_ALU_SRC2A_SHIFT 12 +# define R300_ALU_SRC2A_MASK (31 << 12) +# define R300_ALU_SRC2A_CONST (1 << 17) +# define R300_ALU_SRC_MASK 0x0003ffff +# define R300_ALU_DSTA_SHIFT 18 +# define R300_ALU_DSTA_MASK (31 << 18) +# define R300_ALU_DSTA_REG (1 << 23) +# define R300_ALU_DSTA_OUTPUT (1 << 24) +# define R300_ALU_DSTA_DEPTH (1 << 27) +# define R300_ALPHA_ADDR0(x) ((x) << 0) +# define R300_ALPHA_ADDR1(x) ((x) << 6) +# define R300_ALPHA_ADDR2(x) ((x) << 12) + +#define R300_US_ALU_RGB_INST_0 0x48C0 +# define R300_ALU_ARGC_SRC0C_XYZ 0 +# define R300_ALU_ARGC_SRC0C_XXX 1 +# define R300_ALU_ARGC_SRC0C_YYY 2 +# define R300_ALU_ARGC_SRC0C_ZZZ 3 +# define R300_ALU_ARGC_SRC1C_XYZ 4 +# define R300_ALU_ARGC_SRC1C_XXX 5 +# define R300_ALU_ARGC_SRC1C_YYY 6 +# define R300_ALU_ARGC_SRC1C_ZZZ 7 +# define R300_ALU_ARGC_SRC2C_XYZ 8 +# define R300_ALU_ARGC_SRC2C_XXX 9 +# define R300_ALU_ARGC_SRC2C_YYY 10 +# define R300_ALU_ARGC_SRC2C_ZZZ 11 +# define R300_ALU_ARGC_SRC0A 12 +# define R300_ALU_ARGC_SRC1A 13 +# define R300_ALU_ARGC_SRC2A 14 +# define R300_ALU_ARGC_SRCP_XYZ 15 +# define R300_ALU_ARGC_SRCP_XXX 16 +# define R300_ALU_ARGC_SRCP_YYY 17 +# define R300_ALU_ARGC_SRCP_ZZZ 18 +# define R300_ALU_ARGC_SRCP_WWW 19 +# define R300_ALU_ARGC_ZERO 20 +# define R300_ALU_ARGC_ONE 21 +# define R300_ALU_ARGC_HALF 22 +# define R300_ALU_ARGC_SRC0C_YZX 23 +# define R300_ALU_ARGC_SRC1C_YZX 24 +# define R300_ALU_ARGC_SRC2C_YZX 25 +# define R300_ALU_ARGC_SRC0C_ZXY 26 +# define R300_ALU_ARGC_SRC1C_ZXY 27 +# define R300_ALU_ARGC_SRC2C_ZXY 28 +# define R300_ALU_ARGC_SRC0CA_WZY 29 +# define R300_ALU_ARGC_SRC1CA_WZY 30 +# define R300_ALU_ARGC_SRC2CA_WZY 31 +# define R300_RGB_SWIZA(x) ((x) << 0) +# define R300_RGB_SWIZB(x) ((x) << 7) +# define R300_RGB_SWIZC(x) ((x) << 14) + +# define R300_ALU_ARG0C_SHIFT 0 +# define R300_ALU_ARG0C_MASK (31 << 0) +# define R300_ALU_ARG0C_NOP (0 << 5) +# define R300_ALU_ARG0C_NEG (1 << 5) +# define R300_ALU_ARG0C_ABS (2 << 5) +# define R300_ALU_ARG0C_NAB (3 << 5) +# define R300_ALU_ARG1C_SHIFT 7 +# define R300_ALU_ARG1C_MASK (31 << 7) +# define R300_ALU_ARG1C_NOP (0 << 12) +# define R300_ALU_ARG1C_NEG (1 << 12) +# define R300_ALU_ARG1C_ABS (2 << 12) +# define R300_ALU_ARG1C_NAB (3 << 12) +# define R300_ALU_ARG2C_SHIFT 14 +# define R300_ALU_ARG2C_MASK (31 << 14) +# define R300_ALU_ARG2C_NOP (0 << 19) +# define R300_ALU_ARG2C_NEG (1 << 19) +# define R300_ALU_ARG2C_ABS (2 << 19) +# define R300_ALU_ARG2C_NAB (3 << 19) +# define R300_ALU_SRCP_1_MINUS_2_SRC0 (0 << 21) +# define R300_ALU_SRCP_SRC1_MINUS_SRC0 (1 << 21) +# define R300_ALU_SRCP_SRC1_PLUS_SRC0 (2 << 21) +# define R300_ALU_SRCP_1_MINUS_SRC0 (3 << 21) + +# define R300_ALU_OUTC_MAD (0 << 23) +# define R300_ALU_OUTC_DP3 (1 << 23) +# define R300_ALU_OUTC_DP4 (2 << 23) +# define R300_ALU_OUTC_D2A (3 << 23) +# define R300_ALU_OUTC_MIN (4 << 23) +# define R300_ALU_OUTC_MAX (5 << 23) +# define R300_ALU_OUTC_CMPH (7 << 23) +# define R300_ALU_OUTC_CMP (8 << 23) +# define R300_ALU_OUTC_FRC (9 << 23) +# define R300_ALU_OUTC_REPL_ALPHA (10 << 23) + +# define R300_ALU_OUTC_MOD_NOP (0 << 27) +# define R300_ALU_OUTC_MOD_MUL2 (1 << 27) +# define R300_ALU_OUTC_MOD_MUL4 (2 << 27) +# define R300_ALU_OUTC_MOD_MUL8 (3 << 27) +# define R300_ALU_OUTC_MOD_DIV2 (4 << 27) +# define R300_ALU_OUTC_MOD_DIV4 (5 << 27) +# define R300_ALU_OUTC_MOD_DIV8 (6 << 27) + +# define R300_ALU_OUTC_CLAMP (1 << 30) +# define R300_ALU_INSERT_NOP (1 << 31) + +#define R300_US_ALU_ALPHA_INST_0 0x49C0 +# define R300_ALU_ARGA_SRC0C_X 0 +# define R300_ALU_ARGA_SRC0C_Y 1 +# define R300_ALU_ARGA_SRC0C_Z 2 +# define R300_ALU_ARGA_SRC1C_X 3 +# define R300_ALU_ARGA_SRC1C_Y 4 +# define R300_ALU_ARGA_SRC1C_Z 5 +# define R300_ALU_ARGA_SRC2C_X 6 +# define R300_ALU_ARGA_SRC2C_Y 7 +# define R300_ALU_ARGA_SRC2C_Z 8 +# define R300_ALU_ARGA_SRC0A 9 +# define R300_ALU_ARGA_SRC1A 10 +# define R300_ALU_ARGA_SRC2A 11 +# define R300_ALU_ARGA_SRCP_X 12 +# define R300_ALU_ARGA_SRCP_Y 13 +# define R300_ALU_ARGA_SRCP_Z 14 +# define R300_ALU_ARGA_SRCP_W 15 +# define R300_ALU_ARGA_ZERO 16 +# define R300_ALU_ARGA_ONE 17 +# define R300_ALU_ARGA_HALF 18 +# define R300_ALPHA_SWIZA(x) ((x) << 0) +# define R300_ALPHA_SWIZB(x) ((x) << 7) +# define R300_ALPHA_SWIZC(x) ((x) << 14) + +# define R300_ALU_ARG0A_SHIFT 0 +# define R300_ALU_ARG0A_MASK (31 << 0) +# define R300_ALU_ARG0A_NOP (0 << 5) +# define R300_ALU_ARG0A_NEG (1 << 5) +# define R300_ALU_ARG0A_ABS (2 << 5) +# define R300_ALU_ARG0A_NAB (3 << 5) +# define R300_ALU_ARG1A_SHIFT 7 +# define R300_ALU_ARG1A_MASK (31 << 7) +# define R300_ALU_ARG1A_NOP (0 << 12) +# define R300_ALU_ARG1A_NEG (1 << 12) +# define R300_ALU_ARG1A_ABS (2 << 12) +# define R300_ALU_ARG1A_NAB (3 << 12) +# define R300_ALU_ARG2A_SHIFT 14 +# define R300_ALU_ARG2A_MASK (31 << 14) +# define R300_ALU_ARG2A_NOP (0 << 19) +# define R300_ALU_ARG2A_NEG (1 << 19) +# define R300_ALU_ARG2A_ABS (2 << 19) +# define R300_ALU_ARG2A_NAB (3 << 19) +# define R300_ALU_SRCP_1_MINUS_2_SRC0 (0 << 21) +# define R300_ALU_SRCP_SRC1_MINUS_SRC0 (1 << 21) +# define R300_ALU_SRCP_SRC1_PLUS_SRC0 (2 << 21) +# define R300_ALU_SRCP_1_MINUS_SRC0 (3 << 21) + +# define R300_ALU_OUTA_MAD (0 << 23) +# define R300_ALU_OUTA_DP4 (1 << 23) +# define R300_ALU_OUTA_MIN (2 << 23) +# define R300_ALU_OUTA_MAX (3 << 23) +# define R300_ALU_OUTA_CND (5 << 23) +# define R300_ALU_OUTA_CMP (6 << 23) +# define R300_ALU_OUTA_FRC (7 << 23) +# define R300_ALU_OUTA_EX2 (8 << 23) +# define R300_ALU_OUTA_LG2 (9 << 23) +# define R300_ALU_OUTA_RCP (10 << 23) +# define R300_ALU_OUTA_RSQ (11 << 23) + +# define R300_ALU_OUTA_MOD_NOP (0 << 27) +# define R300_ALU_OUTA_MOD_MUL2 (1 << 27) +# define R300_ALU_OUTA_MOD_MUL4 (2 << 27) +# define R300_ALU_OUTA_MOD_MUL8 (3 << 27) +# define R300_ALU_OUTA_MOD_DIV2 (4 << 27) +# define R300_ALU_OUTA_MOD_DIV4 (5 << 27) +# define R300_ALU_OUTA_MOD_DIV8 (6 << 27) + +# define R300_ALU_OUTA_CLAMP (1 << 30) +/* END: Fragment program instruction set */ + +/* Fog: Fog Blending Enable */ +#define R300_FG_FOG_BLEND 0x4bc0 +# define R300_FG_FOG_BLEND_DISABLE (0 << 0) +# define R300_FG_FOG_BLEND_ENABLE (1 << 0) +# define R300_FG_FOG_BLEND_FN_LINEAR (0 << 1) +# define R300_FG_FOG_BLEND_FN_EXP (1 << 1) +# define R300_FG_FOG_BLEND_FN_EXP2 (2 << 1) +# define R300_FG_FOG_BLEND_FN_CONSTANT (3 << 1) +# define R300_FG_FOG_BLEND_FN_MASK (3 << 1) + +/* Fog: Red Component of Fog Color */ +#define R300_FG_FOG_COLOR_R 0x4bc8 +/* Fog: Green Component of Fog Color */ +#define R300_FG_FOG_COLOR_G 0x4bcc +/* Fog: Blue Component of Fog Color */ +#define R300_FG_FOG_COLOR_B 0x4bd0 +# define R300_FG_FOG_COLOR_MASK 0x000003ff + +/* Fog: Constant Factor for Fog Blending */ +#define R300_FG_FOG_FACTOR 0x4bc4 +# define FG_FOG_FACTOR_MASK 0x000003ff + +/* Fog: Alpha function */ +#define R300_FG_ALPHA_FUNC 0x4bd4 +# define R300_FG_ALPHA_FUNC_VAL_MASK 0x000000ff +# define R300_FG_ALPHA_FUNC_NEVER (0 << 8) +# define R300_FG_ALPHA_FUNC_LESS (1 << 8) +# define R300_FG_ALPHA_FUNC_EQUAL (2 << 8) +# define R300_FG_ALPHA_FUNC_LE (3 << 8) +# define R300_FG_ALPHA_FUNC_GREATER (4 << 8) +# define R300_FG_ALPHA_FUNC_NOTEQUAL (5 << 8) +# define R300_FG_ALPHA_FUNC_GE (6 << 8) +# define R300_FG_ALPHA_FUNC_ALWAYS (7 << 8) +# define R300_ALPHA_TEST_OP_MASK (7 << 8) +# define R300_FG_ALPHA_FUNC_DISABLE (0 << 11) +# define R300_FG_ALPHA_FUNC_ENABLE (1 << 11) + +# define R500_FG_ALPHA_FUNC_10BIT (0 << 12) +# define R500_FG_ALPHA_FUNC_8BIT (1 << 12) + +# define R300_FG_ALPHA_FUNC_MASK_DISABLE (0 << 16) +# define R300_FG_ALPHA_FUNC_MASK_ENABLE (1 << 16) +# define R300_FG_ALPHA_FUNC_CFG_2_OF_4 (0 << 17) +# define R300_FG_ALPHA_FUNC_CFG_3_OF_6 (1 << 17) + +# define R300_FG_ALPHA_FUNC_DITH_DISABLE (0 << 20) +# define R300_FG_ALPHA_FUNC_DITH_ENABLE (1 << 20) + +# define R500_FG_ALPHA_FUNC_OFFSET_DISABLE (0 << 24) +# define R500_FG_ALPHA_FUNC_OFFSET_ENABLE (1 << 24) /* Not supported in R520 */ +# define R500_FG_ALPHA_FUNC_DISC_ZERO_MASK_DISABLE (0 << 25) +# define R500_FG_ALPHA_FUNC_DISC_ZERO_MASK_ENABLE (1 << 25) + +# define R500_FG_ALPHA_FUNC_FP16_DISABLE (0 << 28) +# define R500_FG_ALPHA_FUNC_FP16_ENABLE (1 << 28) + + +/* Fog: Where does the depth come from? */ +#define R300_FG_DEPTH_SRC 0x4bd8 +# define R300_FG_DEPTH_SRC_SCAN (0 << 0) +# define R300_FG_DEPTH_SRC_SHADER (1 << 0) + +/* Fog: Alpha Compare Value */ +#define R500_FG_ALPHA_VALUE 0x4be0 +# define R500_FG_ALPHA_VALUE_MASK 0x0000ffff + +/* gap */ + +/* Fragment program parameters in 7.16 floating point */ +#define R300_PFS_PARAM_0_X 0x4C00 +#define R300_PFS_PARAM_0_Y 0x4C04 +#define R300_PFS_PARAM_0_Z 0x4C08 +#define R300_PFS_PARAM_0_W 0x4C0C +/* last consts */ +#define R300_PFS_PARAM_31_X 0x4DF0 +#define R300_PFS_PARAM_31_Y 0x4DF4 +#define R300_PFS_PARAM_31_Z 0x4DF8 +#define R300_PFS_PARAM_31_W 0x4DFC + +/* Unpipelined. */ +#define R300_RB3D_CCTL 0x4e00 +# define R300_RB3D_CCTL_NUM_MULTIWRITES_1_BUFFER (0 << 5) +# define R300_RB3D_CCTL_NUM_MULTIWRITES_2_BUFFERS (1 << 5) +# define R300_RB3D_CCTL_NUM_MULTIWRITES_3_BUFFERS (2 << 5) +# define R300_RB3D_CCTL_NUM_MULTIWRITES_4_BUFFERS (3 << 5) +# define R300_RB3D_CCTL_CLRCMP_FLIPE_DISABLE (0 << 7) +# define R300_RB3D_CCTL_CLRCMP_FLIPE_ENABLE (1 << 7) +# define R300_RB3D_CCTL_AA_COMPRESSION_DISABLE (0 << 9) +# define R300_RB3D_CCTL_AA_COMPRESSION_ENABLE (1 << 9) +# define R300_RB3D_CCTL_CMASK_DISABLE (0 << 10) +# define R300_RB3D_CCTL_CMASK_ENABLE (1 << 10) +/* reserved */ +# define R300_RB3D_CCTL_INDEPENDENT_COLOR_CHANNEL_MASK_DISABLE (0 << 12) +# define R300_RB3D_CCTL_INDEPENDENT_COLOR_CHANNEL_MASK_ENABLE (1 << 12) +# define R300_RB3D_CCTL_WRITE_COMPRESSION_ENABLE (0 << 13) +# define R300_RB3D_CCTL_WRITE_COMPRESSION_DISABLE (1 << 13) +# define R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_DISABLE (0 << 14) +# define R300_RB3D_CCTL_INDEPENDENT_COLORFORMAT_ENABLE_ENABLE (1 << 14) + + +/* Notes: + * - AFAIK fglrx always sets BLEND_UNKNOWN when blending is used in + * the application + * - AFAIK fglrx always sets BLEND_NO_SEPARATE when CBLEND and ABLEND + * are set to the same + * function (both registers are always set up completely in any case) + * - Most blend flags are simply copied from R200 and not tested yet + */ +#define R300_RB3D_CBLEND 0x4E04 +#define R300_RB3D_ABLEND 0x4E08 +/* the following only appear in CBLEND */ +# define R300_ALPHA_BLEND_ENABLE (1 << 0) +# define R300_SEPARATE_ALPHA_ENABLE (1 << 1) +# define R300_READ_ENABLE (1 << 2) +# define R300_DISCARD_SRC_PIXELS_DIS (0 << 3) +# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_0 (1 << 3) +# define R300_DISCARD_SRC_PIXELS_SRC_COLOR_0 (2 << 3) +# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_0 (3 << 3) +# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_1 (4 << 3) +# define R300_DISCARD_SRC_PIXELS_SRC_COLOR_1 (5 << 3) +# define R300_DISCARD_SRC_PIXELS_SRC_ALPHA_COLOR_1 (6 << 3) + +/* the following are shared between CBLEND and ABLEND */ +# define R300_FCN_MASK (3 << 12) +# define R300_COMB_FCN_ADD_CLAMP (0 << 12) +# define R300_COMB_FCN_ADD_NOCLAMP (1 << 12) +# define R300_COMB_FCN_SUB_CLAMP (2 << 12) +# define R300_COMB_FCN_SUB_NOCLAMP (3 << 12) +# define R300_COMB_FCN_MIN (4 << 12) +# define R300_COMB_FCN_MAX (5 << 12) +# define R300_COMB_FCN_RSUB_CLAMP (6 << 12) +# define R300_COMB_FCN_RSUB_NOCLAMP (7 << 12) +# define R300_BLEND_GL_ZERO (32) +# define R300_BLEND_GL_ONE (33) +# define R300_BLEND_GL_SRC_COLOR (34) +# define R300_BLEND_GL_ONE_MINUS_SRC_COLOR (35) +# define R300_BLEND_GL_DST_COLOR (36) +# define R300_BLEND_GL_ONE_MINUS_DST_COLOR (37) +# define R300_BLEND_GL_SRC_ALPHA (38) +# define R300_BLEND_GL_ONE_MINUS_SRC_ALPHA (39) +# define R300_BLEND_GL_DST_ALPHA (40) +# define R300_BLEND_GL_ONE_MINUS_DST_ALPHA (41) +# define R300_BLEND_GL_SRC_ALPHA_SATURATE (42) +# define R300_BLEND_GL_CONST_COLOR (43) +# define R300_BLEND_GL_ONE_MINUS_CONST_COLOR (44) +# define R300_BLEND_GL_CONST_ALPHA (45) +# define R300_BLEND_GL_ONE_MINUS_CONST_ALPHA (46) +# define R300_BLEND_MASK (63) +# define R300_SRC_BLEND_SHIFT (16) +# define R300_DST_BLEND_SHIFT (24) + +/* Constant color used by the blender. Pipelined through the blender. + * Note: For R520, this field is ignored, use RB3D_CONSTANT_COLOR_GB__BLUE, + * RB3D_CONSTANT_COLOR_GB__GREEN, etc. instead. + */ +#define R300_RB3D_BLEND_COLOR 0x4E10 + + +/* 3D Color Channel Mask. If all the channels used in the current color format + * are disabled, then the cb will discard all the incoming quads. Pipelined + * through the blender. + */ +#define RB3D_COLOR_CHANNEL_MASK 0x4E0C +# define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK0 (1 << 0) +# define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK0 (1 << 1) +# define RB3D_COLOR_CHANNEL_MASK_RED_MASK0 (1 << 2) +# define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK0 (1 << 3) +# define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK1 (1 << 4) +# define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK1 (1 << 5) +# define RB3D_COLOR_CHANNEL_MASK_RED_MASK1 (1 << 6) +# define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK1 (1 << 7) +# define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK2 (1 << 8) +# define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK2 (1 << 9) +# define RB3D_COLOR_CHANNEL_MASK_RED_MASK2 (1 << 10) +# define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK2 (1 << 11) +# define RB3D_COLOR_CHANNEL_MASK_BLUE_MASK3 (1 << 12) +# define RB3D_COLOR_CHANNEL_MASK_GREEN_MASK3 (1 << 13) +# define RB3D_COLOR_CHANNEL_MASK_RED_MASK3 (1 << 14) +# define RB3D_COLOR_CHANNEL_MASK_ALPHA_MASK3 (1 << 15) + +/* Clear color that is used when the color mask is set to 00. Unpipelined. + * Program this register with a 32-bit value in ARGB8888 or ARGB2101010 + * formats, ignoring the fields. + */ +#define RB3D_COLOR_CLEAR_VALUE 0x4e14 + +/* gap */ + +/* Color Compare Color. Stalls the 2d/3d datapath until it is idle. */ +#define RB3D_CLRCMP_CLR 0x4e20 + +/* Color Compare Mask. Stalls the 2d/3d datapath until it is idle. */ +#define RB3D_CLRCMP_MSK 0x4e24 + +/* Color Buffer Address Offset of multibuffer 0. Unpipelined. */ +#define R300_RB3D_COLOROFFSET0 0x4E28 +# define R300_COLOROFFSET_MASK 0xFFFFFFE0 +/* Color Buffer Address Offset of multibuffer 1. Unpipelined. */ +#define R300_RB3D_COLOROFFSET1 0x4E2C +/* Color Buffer Address Offset of multibuffer 2. Unpipelined. */ +#define R300_RB3D_COLOROFFSET2 0x4E30 +/* Color Buffer Address Offset of multibuffer 3. Unpipelined. */ +#define R300_RB3D_COLOROFFSET3 0x4E34 + +/* Color buffer format and tiling control for all the multibuffers and the + * pitch of multibuffer 0 to 3. Unpipelined. The cache must be empty before any + * of the registers are changed. + * + * Bit 16: Larger tiles + * Bit 17: 4x2 tiles + * Bit 18: Extremely weird tile like, but some pixels duplicated? + */ +#define R300_RB3D_COLORPITCH0 0x4E38 +# define R300_COLORPITCH_MASK 0x00003FFE +# define R300_COLOR_TILE_DISABLE (0 << 16) +# define R300_COLOR_TILE_ENABLE (1 << 16) +# define R300_COLOR_MICROTILE_DISABLE (0 << 17) +# define R300_COLOR_MICROTILE_ENABLE (1 << 17) +# define R300_COLOR_MICROTILE_ENABLE_SQUARE (2 << 17) /* Only available in 16-bit */ +# define R300_COLOR_ENDIAN_NO_SWAP (0 << 19) +# define R300_COLOR_ENDIAN_WORD_SWAP (1 << 19) +# define R300_COLOR_ENDIAN_DWORD_SWAP (2 << 19) +# define R300_COLOR_ENDIAN_HALF_DWORD_SWAP (3 << 19) +# define R500_COLOR_FORMAT_ARGB10101010 (0 << 21) +# define R500_COLOR_FORMAT_UV1010 (1 << 21) +# define R500_COLOR_FORMAT_CI8 (2 << 21) /* 2D only */ +# define R300_COLOR_FORMAT_ARGB1555 (3 << 21) +# define R300_COLOR_FORMAT_RGB565 (4 << 21) +# define R500_COLOR_FORMAT_ARGB2101010 (5 << 21) +# define R300_COLOR_FORMAT_ARGB8888 (6 << 21) +# define R300_COLOR_FORMAT_ARGB32323232 (7 << 21) +/* reserved */ +# define R300_COLOR_FORMAT_I8 (9 << 21) +# define R300_COLOR_FORMAT_ARGB16161616 (10 << 21) +# define R300_COLOR_FORMAT_VYUY (11 << 21) +# define R300_COLOR_FORMAT_YVYU (12 << 21) +# define R300_COLOR_FORMAT_UV88 (13 << 21) +# define R500_COLOR_FORMAT_I10 (14 << 21) +# define R300_COLOR_FORMAT_ARGB4444 (15 << 21) +#define R300_RB3D_COLORPITCH1 0x4E3C +#define R300_RB3D_COLORPITCH2 0x4E40 +#define R300_RB3D_COLORPITCH3 0x4E44 + +/* gap */ + +/* Destination Color Buffer Cache Control/Status. If the cb is in e2 mode, then + * a flush or free will not occur upon a write to this register, but a sync + * will be immediately sent if one is requested. If both DC_FLUSH and DC_FREE + * are zero but DC_FINISH is one, then a sync will be sent immediately -- the + * cb will not wait for all the previous operations to complete before sending + * the sync. Unpipelined except when DC_FINISH and DC_FREE are both set to + * zero. + * + * Set to 0A before 3D operations, set to 02 afterwards. + */ +#define R300_RB3D_DSTCACHE_CTLSTAT 0x4e4c +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_NO_EFFECT (0 << 0) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_NO_EFFECT_1 (1 << 0) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D (2 << 0) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D_1 (3 << 0) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_NO_EFFECT (0 << 2) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_NO_EFFECT_1 (1 << 2) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS (2 << 2) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FREE_FREE_3D_TAGS_1 (3 << 2) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_NO_SIGNAL (0 << 4) +# define R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_SIGNAL (1 << 4) + +#define R300_RB3D_DITHER_CTL 0x4E50 +# define R300_RB3D_DITHER_CTL_DITHER_MODE_TRUNCATE (0 << 0) +# define R300_RB3D_DITHER_CTL_DITHER_MODE_ROUND (1 << 0) +# define R300_RB3D_DITHER_CTL_DITHER_MODE_LUT (2 << 0) +/* reserved */ +# define R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_TRUNCATE (0 << 2) +# define R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_ROUND (1 << 2) +# define R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_LUT (2 << 2) +/* reserved */ + +/* Resolve buffer destination address. The cache must be empty before changing + * this register if the cb is in resolve mode. Unpipelined + */ +#define R300_RB3D_AARESOLVE_OFFSET 0x4e80 +# define R300_RB3D_AARESOLVE_OFFSET_SHIFT 5 +# define R300_RB3D_AARESOLVE_OFFSET_MASK 0xffffffe0 /* At least according to the calculations of Christoph Brill */ + +/* Resolve Buffer Pitch and Tiling Control. The cache must be empty before + * changing this register if the cb is in resolve mode. Unpipelined + */ +#define R300_RB3D_AARESOLVE_PITCH 0x4e84 +# define R300_RB3D_AARESOLVE_PITCH_SHIFT 1 +# define R300_RB3D_AARESOLVE_PITCH_MASK 0x00003ffe /* At least according to the calculations of Christoph Brill */ + +/* Resolve Buffer Control. Unpipelined */ +#define R300_RB3D_AARESOLVE_CTL 0x4e88 +# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_NORMAL (0 << 0) +# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_MODE_RESOLVE (1 << 0) +# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_GAMMA_10 (0 << 1) +# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_GAMMA_22 (1 << 1) +# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_SAMPLE0 (0 << 2) +# define R300_RB3D_AARESOLVE_CTL_AARESOLVE_ALPHA_AVERAGE (1 << 2) + + +/* Discard src pixels less than or equal to threshold. */ +#define R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD 0x4ea0 +/* Discard src pixels greater than or equal to threshold. */ +#define R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD 0x4ea4 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_BLUE_SHIFT 0 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_BLUE_MASK 0x000000ff +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_GREEN_SHIFT 8 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_GREEN_MASK 0x0000ff00 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_RED_SHIFT 16 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_RED_MASK 0x00ff0000 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_ALPHA_SHIFT 24 +# define R500_RB3D_DISCARD_SRC_PIXEL_THRESHOLD_ALPHA_MASK 0xff000000 + +/* 3D ROP Control. Stalls the 2d/3d datapath until it is idle. */ +#define R300_RB3D_ROPCNTL 0x4e18 +# define R300_RB3D_ROPCNTL_ROP_ENABLE 0x00000004 +# define R300_RB3D_ROPCNTL_ROP_MASK (15 << 8) +# define R300_RB3D_ROPCNTL_ROP_SHIFT 8 + +/* Color Compare Flip. Stalls the 2d/3d datapath until it is idle. */ +#define R300_RB3D_CLRCMP_FLIPE 0x4e1c + +/* Sets the fifo sizes */ +#define R500_RB3D_FIFO_SIZE 0x4ef4 +# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_FULL (0 << 0) +# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_HALF (1 << 0) +# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_QUATER (2 << 0) +# define R500_RB3D_FIFO_SIZE_OP_FIFO_SIZE_EIGTHS (3 << 0) + +/* Constant color used by the blender. Pipelined through the blender. */ +#define R500_RB3D_CONSTANT_COLOR_AR 0x4ef8 +# define R500_RB3D_CONSTANT_COLOR_AR_RED_MASK 0x0000ffff +# define R500_RB3D_CONSTANT_COLOR_AR_RED_SHIFT 0 +# define R500_RB3D_CONSTANT_COLOR_AR_ALPHA_MASK 0xffff0000 +# define R500_RB3D_CONSTANT_COLOR_AR_ALPHA_SHIFT 16 + +/* Constant color used by the blender. Pipelined through the blender. */ +#define R500_RB3D_CONSTANT_COLOR_GB 0x4efc +# define R500_RB3D_CONSTANT_COLOR_AR_BLUE_MASK 0x0000ffff +# define R500_RB3D_CONSTANT_COLOR_AR_BLUE_SHIFT 0 +# define R500_RB3D_CONSTANT_COLOR_AR_GREEN_MASK 0xffff0000 +# define R500_RB3D_CONSTANT_COLOR_AR_GREEN_SHIFT 16 + +/* gap */ +/* There seems to be no "write only" setting, so use Z-test = ALWAYS + * for this. + * Bit (1<<8) is the "test" bit. so plain write is 6 - vd + */ +#define R300_ZB_CNTL 0x4F00 +# define R300_STENCIL_ENABLE (1 << 0) +# define R300_Z_ENABLE (1 << 1) +# define R300_Z_WRITE_ENABLE (1 << 2) +# define R300_Z_SIGNED_COMPARE (1 << 3) +# define R300_STENCIL_FRONT_BACK (1 << 4) + +#define R300_ZB_ZSTENCILCNTL 0x4f04 + /* functions */ +# define R300_ZS_NEVER 0 +# define R300_ZS_LESS 1 +# define R300_ZS_LEQUAL 2 +# define R300_ZS_EQUAL 3 +# define R300_ZS_GEQUAL 4 +# define R300_ZS_GREATER 5 +# define R300_ZS_NOTEQUAL 6 +# define R300_ZS_ALWAYS 7 +# define R300_ZS_MASK 7 + /* operations */ +# define R300_ZS_KEEP 0 +# define R300_ZS_ZERO 1 +# define R300_ZS_REPLACE 2 +# define R300_ZS_INCR 3 +# define R300_ZS_DECR 4 +# define R300_ZS_INVERT 5 +# define R300_ZS_INCR_WRAP 6 +# define R300_ZS_DECR_WRAP 7 +# define R300_Z_FUNC_SHIFT 0 + /* front and back refer to operations done for front + and back faces, i.e. separate stencil function support */ +# define R300_S_FRONT_FUNC_SHIFT 3 +# define R300_S_FRONT_SFAIL_OP_SHIFT 6 +# define R300_S_FRONT_ZPASS_OP_SHIFT 9 +# define R300_S_FRONT_ZFAIL_OP_SHIFT 12 +# define R300_S_BACK_FUNC_SHIFT 15 +# define R300_S_BACK_SFAIL_OP_SHIFT 18 +# define R300_S_BACK_ZPASS_OP_SHIFT 21 +# define R300_S_BACK_ZFAIL_OP_SHIFT 24 + +#define R300_ZB_STENCILREFMASK 0x4f08 +# define R300_STENCILREF_SHIFT 0 +# define R300_STENCILREF_MASK 0x000000ff +# define R300_STENCILMASK_SHIFT 8 +# define R300_STENCILMASK_MASK 0x0000ff00 +# define R300_STENCILWRITEMASK_SHIFT 16 +# define R300_STENCILWRITEMASK_MASK 0x00ff0000 + +/* gap */ + +#define R300_ZB_FORMAT 0x4f10 +# define R300_DEPTHFORMAT_16BIT_INT_Z (0 << 0) +# define R300_DEPTHFORMAT_16BIT_13E3 (1 << 0) +# define R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL (2 << 0) +/* reserved up to (15 << 0) */ +# define R300_INVERT_13E3_LEADING_ONES (0 << 4) +# define R300_INVERT_13E3_LEADING_ZEROS (1 << 4) + +#define R300_ZB_ZTOP 0x4F14 +# define R300_ZTOP_DISABLE (0 << 0) +# define R300_ZTOP_ENABLE (1 << 0) + +/* gap */ + +#define R300_ZB_ZCACHE_CTLSTAT 0x4f18 +# define R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_NO_EFFECT (0 << 0) +# define R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE (1 << 0) +# define R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_NO_EFFECT (0 << 1) +# define R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE (1 << 1) +# define R300_ZB_ZCACHE_CTLSTAT_ZC_BUSY_IDLE (0 << 31) +# define R300_ZB_ZCACHE_CTLSTAT_ZC_BUSY_BUSY (1 << 31) + +#define R300_ZB_BW_CNTL 0x4f1c +# define R300_HIZ_DISABLE (0 << 0) +# define R300_HIZ_ENABLE (1 << 0) +# define R300_HIZ_MIN (0 << 1) +# define R300_HIZ_MAX (1 << 1) +# define R300_FAST_FILL_DISABLE (0 << 2) +# define R300_FAST_FILL_ENABLE (1 << 2) +# define R300_RD_COMP_DISABLE (0 << 3) +# define R300_RD_COMP_ENABLE (1 << 3) +# define R300_WR_COMP_DISABLE (0 << 4) +# define R300_WR_COMP_ENABLE (1 << 4) +# define R300_ZB_CB_CLEAR_RMW (0 << 5) +# define R300_ZB_CB_CLEAR_CACHE_LINEAR (1 << 5) +# define R300_FORCE_COMPRESSED_STENCIL_VALUE_DISABLE (0 << 6) +# define R300_FORCE_COMPRESSED_STENCIL_VALUE_ENABLE (1 << 6) + +# define R500_ZEQUAL_OPTIMIZE_ENABLE (0 << 7) +# define R500_ZEQUAL_OPTIMIZE_DISABLE (1 << 7) +# define R500_SEQUAL_OPTIMIZE_ENABLE (0 << 8) +# define R500_SEQUAL_OPTIMIZE_DISABLE (1 << 8) + +# define R500_BMASK_ENABLE (0 << 10) +# define R500_BMASK_DISABLE (1 << 10) +# define R500_HIZ_EQUAL_REJECT_DISABLE (0 << 11) +# define R500_HIZ_EQUAL_REJECT_ENABLE (1 << 11) +# define R500_HIZ_FP_EXP_BITS_DISABLE (0 << 12) +# define R500_HIZ_FP_EXP_BITS_1 (1 << 12) +# define R500_HIZ_FP_EXP_BITS_2 (2 << 12) +# define R500_HIZ_FP_EXP_BITS_3 (3 << 12) +# define R500_HIZ_FP_EXP_BITS_4 (4 << 12) +# define R500_HIZ_FP_EXP_BITS_5 (5 << 12) +# define R500_HIZ_FP_INVERT_LEADING_ONES (0 << 15) +# define R500_HIZ_FP_INVERT_LEADING_ZEROS (1 << 15) +# define R500_TILE_OVERWRITE_RECOMPRESSION_ENABLE (0 << 16) +# define R500_TILE_OVERWRITE_RECOMPRESSION_DISABLE (1 << 16) +# define R500_CONTIGUOUS_6XAA_SAMPLES_ENABLE (0 << 17) +# define R500_CONTIGUOUS_6XAA_SAMPLES_DISABLE (1 << 17) +# define R500_PEQ_PACKING_DISABLE (0 << 18) +# define R500_PEQ_PACKING_ENABLE (1 << 18) +# define R500_COVERED_PTR_MASKING_DISABLE (0 << 18) +# define R500_COVERED_PTR_MASKING_ENABLE (1 << 18) + + +/* gap */ + +/* Z Buffer Address Offset. + * Bits 31 to 5 are used for aligned Z buffer address offset for macro tiles. + */ +#define R300_ZB_DEPTHOFFSET 0x4f20 + +/* Z Buffer Pitch and Endian Control */ +#define R300_ZB_DEPTHPITCH 0x4f24 +# define R300_DEPTHPITCH_MASK 0x00003FFC +# define R300_DEPTHMACROTILE_DISABLE (0 << 16) +# define R300_DEPTHMACROTILE_ENABLE (1 << 16) +# define R300_DEPTHMICROTILE_LINEAR (0 << 17) +# define R300_DEPTHMICROTILE_TILED (1 << 17) +# define R300_DEPTHMICROTILE_TILED_SQUARE (2 << 17) +# define R300_DEPTHENDIAN_NO_SWAP (0 << 18) +# define R300_DEPTHENDIAN_WORD_SWAP (1 << 18) +# define R300_DEPTHENDIAN_DWORD_SWAP (2 << 18) +# define R300_DEPTHENDIAN_HALF_DWORD_SWAP (3 << 18) + +/* Z Buffer Clear Value */ +#define R300_ZB_DEPTHCLEARVALUE 0x4f28 + +/* Hierarchical Z Memory Offset */ +#define R300_ZB_HIZ_OFFSET 0x4f44 + +/* Hierarchical Z Write Index */ +#define R300_ZB_HIZ_WRINDEX 0x4f48 + +/* Hierarchical Z Data */ +#define R300_ZB_HIZ_DWORD 0x4f4c + +/* Hierarchical Z Read Index */ +#define R300_ZB_HIZ_RDINDEX 0x4f50 + +/* Hierarchical Z Pitch */ +#define R300_ZB_HIZ_PITCH 0x4f54 + +/* Z Buffer Z Pass Counter Data */ +#define R300_ZB_ZPASS_DATA 0x4f58 + +/* Z Buffer Z Pass Counter Address */ +#define R300_ZB_ZPASS_ADDR 0x4f5c + +/* Depth buffer X and Y coordinate offset */ +#define R300_ZB_DEPTHXY_OFFSET 0x4f60 +# define R300_DEPTHX_OFFSET_SHIFT 1 +# define R300_DEPTHX_OFFSET_MASK 0x000007FE +# define R300_DEPTHY_OFFSET_SHIFT 17 +# define R300_DEPTHY_OFFSET_MASK 0x07FE0000 + +/* Sets the fifo sizes */ +#define R500_ZB_FIFO_SIZE 0x4fd0 +# define R500_OP_FIFO_SIZE_FULL (0 << 0) +# define R500_OP_FIFO_SIZE_HALF (1 << 0) +# define R500_OP_FIFO_SIZE_QUATER (2 << 0) +# define R500_OP_FIFO_SIZE_EIGTHS (4 << 0) + +/* Stencil Reference Value and Mask for backfacing quads */ +/* R300_ZB_STENCILREFMASK handles front face */ +#define R500_ZB_STENCILREFMASK_BF 0x4fd4 +# define R500_STENCILREF_SHIFT 0 +# define R500_STENCILREF_MASK 0x000000ff +# define R500_STENCILMASK_SHIFT 8 +# define R500_STENCILMASK_MASK 0x0000ff00 +# define R500_STENCILWRITEMASK_SHIFT 16 +# define R500_STENCILWRITEMASK_MASK 0x00ff0000 + +/** + * \defgroup R3XX_R5XX_PROGRAMMABLE_VERTEX_SHADER_DESCRIPTION R3XX-R5XX PROGRAMMABLE VERTEX SHADER DESCRIPTION + * + * The PVS_DST_MATH_INST is used to identify whether the instruction is a Vector + * Engine instruction or a Math Engine instruction. + */ + +/*\{*/ + +enum { + /* R3XX */ + VECTOR_NO_OP = 0, + VE_DOT_PRODUCT = 1, + VE_MULTIPLY = 2, + VE_ADD = 3, + VE_MULTIPLY_ADD = 4, + VE_DISTANCE_VECTOR = 5, + VE_FRACTION = 6, + VE_MAXIMUM = 7, + VE_MINIMUM = 8, + VE_SET_GREATER_THAN_EQUAL = 9, + VE_SET_LESS_THAN = 10, + VE_MULTIPLYX2_ADD = 11, + VE_MULTIPLY_CLAMP = 12, + VE_FLT2FIX_DX = 13, + VE_FLT2FIX_DX_RND = 14, + /* R5XX */ + VE_PRED_SET_EQ_PUSH = 15, + VE_PRED_SET_GT_PUSH = 16, + VE_PRED_SET_GTE_PUSH = 17, + VE_PRED_SET_NEQ_PUSH = 18, + VE_COND_WRITE_EQ = 19, + VE_COND_WRITE_GT = 20, + VE_COND_WRITE_GTE = 21, + VE_COND_WRITE_NEQ = 22, + VE_COND_MUX_EQ = 23, + VE_COND_MUX_GT = 24, + VE_COND_MUX_GTE = 25, + VE_SET_GREATER_THAN = 26, + VE_SET_EQUAL = 27, + VE_SET_NOT_EQUAL = 28, +}; + +enum { + /* R3XX */ + MATH_NO_OP = 0, + ME_EXP_BASE2_DX = 1, + ME_LOG_BASE2_DX = 2, + ME_EXP_BASEE_FF = 3, + ME_LIGHT_COEFF_DX = 4, + ME_POWER_FUNC_FF = 5, + ME_RECIP_DX = 6, + ME_RECIP_FF = 7, + ME_RECIP_SQRT_DX = 8, + ME_RECIP_SQRT_FF = 9, + ME_MULTIPLY = 10, + ME_EXP_BASE2_FULL_DX = 11, + ME_LOG_BASE2_FULL_DX = 12, + ME_POWER_FUNC_FF_CLAMP_B = 13, + ME_POWER_FUNC_FF_CLAMP_B1 = 14, + ME_POWER_FUNC_FF_CLAMP_01 = 15, + ME_SIN = 16, + ME_COS = 17, + /* R5XX */ + ME_LOG_BASE2_IEEE = 18, + ME_RECIP_IEEE = 19, + ME_RECIP_SQRT_IEEE = 20, + ME_PRED_SET_EQ = 21, + ME_PRED_SET_GT = 22, + ME_PRED_SET_GTE = 23, + ME_PRED_SET_NEQ = 24, + ME_PRED_SET_CLR = 25, + ME_PRED_SET_INV = 26, + ME_PRED_SET_POP = 27, + ME_PRED_SET_RESTORE = 28, +}; + +enum { + /* R3XX */ + PVS_MACRO_OP_2CLK_MADD = 0, + PVS_MACRO_OP_2CLK_M2X_ADD = 1, +}; + +enum { + PVS_SRC_REG_TEMPORARY = 0, /* Intermediate Storage */ + PVS_SRC_REG_INPUT = 1, /* Input Vertex Storage */ + PVS_SRC_REG_CONSTANT = 2, /* Constant State Storage */ + PVS_SRC_REG_ALT_TEMPORARY = 3, /* Alternate Intermediate Storage */ +}; + +enum { + PVS_DST_REG_TEMPORARY = 0, /* Intermediate Storage */ + PVS_DST_REG_A0 = 1, /* Address Register Storage */ + PVS_DST_REG_OUT = 2, /* Output Memory. Used for all outputs */ + PVS_DST_REG_OUT_REPL_X = 3, /* Output Memory & Replicate X to all channels */ + PVS_DST_REG_ALT_TEMPORARY = 4, /* Alternate Intermediate Storage */ + PVS_DST_REG_INPUT = 5, /* Output Memory & Replicate X to all channels */ +}; + +enum { + PVS_SRC_SELECT_X = 0, /* Select X Component */ + PVS_SRC_SELECT_Y = 1, /* Select Y Component */ + PVS_SRC_SELECT_Z = 2, /* Select Z Component */ + PVS_SRC_SELECT_W = 3, /* Select W Component */ + PVS_SRC_SELECT_FORCE_0 = 4, /* Force Component to 0.0 */ + PVS_SRC_SELECT_FORCE_1 = 5, /* Force Component to 1.0 */ +}; + +/* PVS Opcode & Destination Operand Description */ + +enum { + PVS_DST_OPCODE_MASK = 0x3f, + PVS_DST_OPCODE_SHIFT = 0, + PVS_DST_MATH_INST_MASK = 0x1, + PVS_DST_MATH_INST_SHIFT = 6, + PVS_DST_MACRO_INST_MASK = 0x1, + PVS_DST_MACRO_INST_SHIFT = 7, + PVS_DST_REG_TYPE_MASK = 0xf, + PVS_DST_REG_TYPE_SHIFT = 8, + PVS_DST_ADDR_MODE_1_MASK = 0x1, + PVS_DST_ADDR_MODE_1_SHIFT = 12, + PVS_DST_OFFSET_MASK = 0x7f, + PVS_DST_OFFSET_SHIFT = 13, + PVS_DST_WE_X_MASK = 0x1, + PVS_DST_WE_X_SHIFT = 20, + PVS_DST_WE_Y_MASK = 0x1, + PVS_DST_WE_Y_SHIFT = 21, + PVS_DST_WE_Z_MASK = 0x1, + PVS_DST_WE_Z_SHIFT = 22, + PVS_DST_WE_W_MASK = 0x1, + PVS_DST_WE_W_SHIFT = 23, + PVS_DST_VE_SAT_MASK = 0x1, + PVS_DST_VE_SAT_SHIFT = 24, + PVS_DST_ME_SAT_MASK = 0x1, + PVS_DST_ME_SAT_SHIFT = 25, + PVS_DST_PRED_ENABLE_MASK = 0x1, + PVS_DST_PRED_ENABLE_SHIFT = 26, + PVS_DST_PRED_SENSE_MASK = 0x1, + PVS_DST_PRED_SENSE_SHIFT = 27, + PVS_DST_DUAL_MATH_OP_MASK = 0x3, + PVS_DST_DUAL_MATH_OP_SHIFT = 27, + PVS_DST_ADDR_SEL_MASK = 0x3, + PVS_DST_ADDR_SEL_SHIFT = 29, + PVS_DST_ADDR_MODE_0_MASK = 0x1, + PVS_DST_ADDR_MODE_0_SHIFT = 31, +}; + +/* PVS Source Operand Description */ + +enum { + PVS_SRC_REG_TYPE_MASK = 0x3, + PVS_SRC_REG_TYPE_SHIFT = 0, + SPARE_0_MASK = 0x1, + SPARE_0_SHIFT = 2, + PVS_SRC_ABS_XYZW_MASK = 0x1, + PVS_SRC_ABS_XYZW_SHIFT = 3, + PVS_SRC_ADDR_MODE_0_MASK = 0x1, + PVS_SRC_ADDR_MODE_0_SHIFT = 4, + PVS_SRC_OFFSET_MASK = 0xff, + PVS_SRC_OFFSET_SHIFT = 5, + PVS_SRC_SWIZZLE_X_MASK = 0x7, + PVS_SRC_SWIZZLE_X_SHIFT = 13, + PVS_SRC_SWIZZLE_Y_MASK = 0x7, + PVS_SRC_SWIZZLE_Y_SHIFT = 16, + PVS_SRC_SWIZZLE_Z_MASK = 0x7, + PVS_SRC_SWIZZLE_Z_SHIFT = 19, + PVS_SRC_SWIZZLE_W_MASK = 0x7, + PVS_SRC_SWIZZLE_W_SHIFT = 22, + PVS_SRC_MODIFIER_X_MASK = 0x1, + PVS_SRC_MODIFIER_X_SHIFT = 25, + PVS_SRC_MODIFIER_Y_MASK = 0x1, + PVS_SRC_MODIFIER_Y_SHIFT = 26, + PVS_SRC_MODIFIER_Z_MASK = 0x1, + PVS_SRC_MODIFIER_Z_SHIFT = 27, + PVS_SRC_MODIFIER_W_MASK = 0x1, + PVS_SRC_MODIFIER_W_SHIFT = 28, + PVS_SRC_ADDR_SEL_MASK = 0x3, + PVS_SRC_ADDR_SEL_SHIFT = 29, + PVS_SRC_ADDR_MODE_1_MASK = 0x0, + PVS_SRC_ADDR_MODE_1_SHIFT = 32, +}; + +/*\}*/ + +/* BEGIN: Packet 3 commands */ + +/* A primitive emission dword. */ +#define R300_PRIM_TYPE_NONE (0 << 0) +#define R300_PRIM_TYPE_POINT (1 << 0) +#define R300_PRIM_TYPE_LINE (2 << 0) +#define R300_PRIM_TYPE_LINE_STRIP (3 << 0) +#define R300_PRIM_TYPE_TRI_LIST (4 << 0) +#define R300_PRIM_TYPE_TRI_FAN (5 << 0) +#define R300_PRIM_TYPE_TRI_STRIP (6 << 0) +#define R300_PRIM_TYPE_TRI_TYPE2 (7 << 0) +#define R300_PRIM_TYPE_RECT_LIST (8 << 0) +#define R300_PRIM_TYPE_3VRT_POINT_LIST (9 << 0) +#define R300_PRIM_TYPE_3VRT_LINE_LIST (10 << 0) + /* GUESS (based on r200) */ +#define R300_PRIM_TYPE_POINT_SPRITES (11 << 0) +#define R300_PRIM_TYPE_LINE_LOOP (12 << 0) +#define R300_PRIM_TYPE_QUADS (13 << 0) +#define R300_PRIM_TYPE_QUAD_STRIP (14 << 0) +#define R300_PRIM_TYPE_POLYGON (15 << 0) +#define R300_PRIM_TYPE_MASK 0xF +#define R300_PRIM_WALK_IND (1 << 4) +#define R300_PRIM_WALK_LIST (2 << 4) +#define R300_PRIM_WALK_RING (3 << 4) +#define R300_PRIM_WALK_MASK (3 << 4) + /* GUESS (based on r200) */ +#define R300_PRIM_COLOR_ORDER_BGRA (0 << 6) +#define R300_PRIM_COLOR_ORDER_RGBA (1 << 6) +#define R300_PRIM_NUM_VERTICES_SHIFT 16 +#define R300_PRIM_NUM_VERTICES_MASK 0xffff + + + +/* + * The R500 unified shader (US) registers come in banks of 512 each, one + * for each instruction slot in the shader. You can't touch them directly. + * R500_US_VECTOR_INDEX() sets the base instruction to modify; successive + * writes to R500_GA_US_VECTOR_DATA autoincrement the index after the + * instruction is fully specified. + */ +#define R500_US_ALU_ALPHA_INST_0 0xa800 +# define R500_ALPHA_OP_MAD 0 +# define R500_ALPHA_OP_DP 1 +# define R500_ALPHA_OP_MIN 2 +# define R500_ALPHA_OP_MAX 3 +/* #define R500_ALPHA_OP_RESERVED 4 */ +# define R500_ALPHA_OP_CND 5 +# define R500_ALPHA_OP_CMP 6 +# define R500_ALPHA_OP_FRC 7 +# define R500_ALPHA_OP_EX2 8 +# define R500_ALPHA_OP_LN2 9 +# define R500_ALPHA_OP_RCP 10 +# define R500_ALPHA_OP_RSQ 11 +# define R500_ALPHA_OP_SIN 12 +# define R500_ALPHA_OP_COS 13 +# define R500_ALPHA_OP_MDH 14 +# define R500_ALPHA_OP_MDV 15 +# define R500_ALPHA_ADDRD(x) ((x) << 4) +# define R500_ALPHA_ADDRD_REL (1 << 11) +# define R500_ALPHA_SEL_A_SHIFT 12 +# define R500_ALPHA_SEL_A_SRC0 (0 << 12) +# define R500_ALPHA_SEL_A_SRC1 (1 << 12) +# define R500_ALPHA_SEL_A_SRC2 (2 << 12) +# define R500_ALPHA_SEL_A_SRCP (3 << 12) +# define R500_ALPHA_SWIZ_A_R (0 << 14) +# define R500_ALPHA_SWIZ_A_G (1 << 14) +# define R500_ALPHA_SWIZ_A_B (2 << 14) +# define R500_ALPHA_SWIZ_A_A (3 << 14) +# define R500_ALPHA_SWIZ_A_0 (4 << 14) +# define R500_ALPHA_SWIZ_A_HALF (5 << 14) +# define R500_ALPHA_SWIZ_A_1 (6 << 14) +/* #define R500_ALPHA_SWIZ_A_UNUSED (7 << 14) */ +# define R500_ALPHA_MOD_A_NOP (0 << 17) +# define R500_ALPHA_MOD_A_NEG (1 << 17) +# define R500_ALPHA_MOD_A_ABS (2 << 17) +# define R500_ALPHA_MOD_A_NAB (3 << 17) +# define R500_ALPHA_SEL_B_SHIFT 19 +# define R500_ALPHA_SEL_B_SRC0 (0 << 19) +# define R500_ALPHA_SEL_B_SRC1 (1 << 19) +# define R500_ALPHA_SEL_B_SRC2 (2 << 19) +# define R500_ALPHA_SEL_B_SRCP (3 << 19) +# define R500_ALPHA_SWIZ_B_R (0 << 21) +# define R500_ALPHA_SWIZ_B_G (1 << 21) +# define R500_ALPHA_SWIZ_B_B (2 << 21) +# define R500_ALPHA_SWIZ_B_A (3 << 21) +# define R500_ALPHA_SWIZ_B_0 (4 << 21) +# define R500_ALPHA_SWIZ_B_HALF (5 << 21) +# define R500_ALPHA_SWIZ_B_1 (6 << 21) +/* #define R500_ALPHA_SWIZ_B_UNUSED (7 << 21) */ +# define R500_ALPHA_MOD_B_NOP (0 << 24) +# define R500_ALPHA_MOD_B_NEG (1 << 24) +# define R500_ALPHA_MOD_B_ABS (2 << 24) +# define R500_ALPHA_MOD_B_NAB (3 << 24) +# define R500_ALPHA_OMOD_IDENTITY (0 << 26) +# define R500_ALPHA_OMOD_MUL_2 (1 << 26) +# define R500_ALPHA_OMOD_MUL_4 (2 << 26) +# define R500_ALPHA_OMOD_MUL_8 (3 << 26) +# define R500_ALPHA_OMOD_DIV_2 (4 << 26) +# define R500_ALPHA_OMOD_DIV_4 (5 << 26) +# define R500_ALPHA_OMOD_DIV_8 (6 << 26) +# define R500_ALPHA_OMOD_DISABLE (7 << 26) +# define R500_ALPHA_TARGET(x) ((x) << 29) +# define R500_ALPHA_W_OMASK (1 << 31) +#define R500_US_ALU_ALPHA_ADDR_0 0x9800 +# define R500_ALPHA_ADDR0(x) ((x) << 0) +# define R500_ALPHA_ADDR0_CONST (1 << 8) +# define R500_ALPHA_ADDR0_REL (1 << 9) +# define R500_ALPHA_ADDR1(x) ((x) << 10) +# define R500_ALPHA_ADDR1_CONST (1 << 18) +# define R500_ALPHA_ADDR1_REL (1 << 19) +# define R500_ALPHA_ADDR2(x) ((x) << 20) +# define R500_ALPHA_ADDR2_CONST (1 << 28) +# define R500_ALPHA_ADDR2_REL (1 << 29) +# define R500_ALPHA_SRCP_OP_1_MINUS_2A0 (0 << 30) +# define R500_ALPHA_SRCP_OP_A1_MINUS_A0 (1 << 30) +# define R500_ALPHA_SRCP_OP_A1_PLUS_A0 (2 << 30) +# define R500_ALPHA_SRCP_OP_1_MINUS_A0 (3 << 30) +#define R500_US_ALU_RGBA_INST_0 0xb000 +# define R500_ALU_RGBA_OP_MAD (0 << 0) +# define R500_ALU_RGBA_OP_DP3 (1 << 0) +# define R500_ALU_RGBA_OP_DP4 (2 << 0) +# define R500_ALU_RGBA_OP_D2A (3 << 0) +# define R500_ALU_RGBA_OP_MIN (4 << 0) +# define R500_ALU_RGBA_OP_MAX (5 << 0) +/* #define R500_ALU_RGBA_OP_RESERVED (6 << 0) */ +# define R500_ALU_RGBA_OP_CND (7 << 0) +# define R500_ALU_RGBA_OP_CMP (8 << 0) +# define R500_ALU_RGBA_OP_FRC (9 << 0) +# define R500_ALU_RGBA_OP_SOP (10 << 0) +# define R500_ALU_RGBA_OP_MDH (11 << 0) +# define R500_ALU_RGBA_OP_MDV (12 << 0) +# define R500_ALU_RGBA_ADDRD(x) ((x) << 4) +# define R500_ALU_RGBA_ADDRD_REL (1 << 11) +# define R500_ALU_RGBA_SEL_C_SHIFT 12 +# define R500_ALU_RGBA_SEL_C_SRC0 (0 << 12) +# define R500_ALU_RGBA_SEL_C_SRC1 (1 << 12) +# define R500_ALU_RGBA_SEL_C_SRC2 (2 << 12) +# define R500_ALU_RGBA_SEL_C_SRCP (3 << 12) +# define R500_ALU_RGBA_R_SWIZ_R (0 << 14) +# define R500_ALU_RGBA_R_SWIZ_G (1 << 14) +# define R500_ALU_RGBA_R_SWIZ_B (2 << 14) +# define R500_ALU_RGBA_R_SWIZ_A (3 << 14) +# define R500_ALU_RGBA_R_SWIZ_0 (4 << 14) +# define R500_ALU_RGBA_R_SWIZ_HALF (5 << 14) +# define R500_ALU_RGBA_R_SWIZ_1 (6 << 14) +/* #define R500_ALU_RGBA_R_SWIZ_UNUSED (7 << 14) */ +# define R500_ALU_RGBA_G_SWIZ_R (0 << 17) +# define R500_ALU_RGBA_G_SWIZ_G (1 << 17) +# define R500_ALU_RGBA_G_SWIZ_B (2 << 17) +# define R500_ALU_RGBA_G_SWIZ_A (3 << 17) +# define R500_ALU_RGBA_G_SWIZ_0 (4 << 17) +# define R500_ALU_RGBA_G_SWIZ_HALF (5 << 17) +# define R500_ALU_RGBA_G_SWIZ_1 (6 << 17) +/* #define R500_ALU_RGBA_G_SWIZ_UNUSED (7 << 17) */ +# define R500_ALU_RGBA_B_SWIZ_R (0 << 20) +# define R500_ALU_RGBA_B_SWIZ_G (1 << 20) +# define R500_ALU_RGBA_B_SWIZ_B (2 << 20) +# define R500_ALU_RGBA_B_SWIZ_A (3 << 20) +# define R500_ALU_RGBA_B_SWIZ_0 (4 << 20) +# define R500_ALU_RGBA_B_SWIZ_HALF (5 << 20) +# define R500_ALU_RGBA_B_SWIZ_1 (6 << 20) +/* #define R500_ALU_RGBA_B_SWIZ_UNUSED (7 << 20) */ +# define R500_ALU_RGBA_MOD_C_NOP (0 << 23) +# define R500_ALU_RGBA_MOD_C_NEG (1 << 23) +# define R500_ALU_RGBA_MOD_C_ABS (2 << 23) +# define R500_ALU_RGBA_MOD_C_NAB (3 << 23) +# define R500_ALU_RGBA_ALPHA_SEL_C_SHIFT 25 +# define R500_ALU_RGBA_ALPHA_SEL_C_SRC0 (0 << 25) +# define R500_ALU_RGBA_ALPHA_SEL_C_SRC1 (1 << 25) +# define R500_ALU_RGBA_ALPHA_SEL_C_SRC2 (2 << 25) +# define R500_ALU_RGBA_ALPHA_SEL_C_SRCP (3 << 25) +# define R500_ALU_RGBA_A_SWIZ_R (0 << 27) +# define R500_ALU_RGBA_A_SWIZ_G (1 << 27) +# define R500_ALU_RGBA_A_SWIZ_B (2 << 27) +# define R500_ALU_RGBA_A_SWIZ_A (3 << 27) +# define R500_ALU_RGBA_A_SWIZ_0 (4 << 27) +# define R500_ALU_RGBA_A_SWIZ_HALF (5 << 27) +# define R500_ALU_RGBA_A_SWIZ_1 (6 << 27) +/* #define R500_ALU_RGBA_A_SWIZ_UNUSED (7 << 27) */ +# define R500_ALU_RGBA_ALPHA_MOD_C_NOP (0 << 30) +# define R500_ALU_RGBA_ALPHA_MOD_C_NEG (1 << 30) +# define R500_ALU_RGBA_ALPHA_MOD_C_ABS (2 << 30) +# define R500_ALU_RGBA_ALPHA_MOD_C_NAB (3 << 30) +#define R500_US_ALU_RGB_INST_0 0xa000 +# define R500_ALU_RGB_SEL_A_SHIFT 0 +# define R500_ALU_RGB_SEL_A_SRC0 (0 << 0) +# define R500_ALU_RGB_SEL_A_SRC1 (1 << 0) +# define R500_ALU_RGB_SEL_A_SRC2 (2 << 0) +# define R500_ALU_RGB_SEL_A_SRCP (3 << 0) +# define R500_ALU_RGB_R_SWIZ_A_R (0 << 2) +# define R500_ALU_RGB_R_SWIZ_A_G (1 << 2) +# define R500_ALU_RGB_R_SWIZ_A_B (2 << 2) +# define R500_ALU_RGB_R_SWIZ_A_A (3 << 2) +# define R500_ALU_RGB_R_SWIZ_A_0 (4 << 2) +# define R500_ALU_RGB_R_SWIZ_A_HALF (5 << 2) +# define R500_ALU_RGB_R_SWIZ_A_1 (6 << 2) +/* #define R500_ALU_RGB_R_SWIZ_A_UNUSED (7 << 2) */ +# define R500_ALU_RGB_G_SWIZ_A_R (0 << 5) +# define R500_ALU_RGB_G_SWIZ_A_G (1 << 5) +# define R500_ALU_RGB_G_SWIZ_A_B (2 << 5) +# define R500_ALU_RGB_G_SWIZ_A_A (3 << 5) +# define R500_ALU_RGB_G_SWIZ_A_0 (4 << 5) +# define R500_ALU_RGB_G_SWIZ_A_HALF (5 << 5) +# define R500_ALU_RGB_G_SWIZ_A_1 (6 << 5) +/* #define R500_ALU_RGB_G_SWIZ_A_UNUSED (7 << 5) */ +# define R500_ALU_RGB_B_SWIZ_A_R (0 << 8) +# define R500_ALU_RGB_B_SWIZ_A_G (1 << 8) +# define R500_ALU_RGB_B_SWIZ_A_B (2 << 8) +# define R500_ALU_RGB_B_SWIZ_A_A (3 << 8) +# define R500_ALU_RGB_B_SWIZ_A_0 (4 << 8) +# define R500_ALU_RGB_B_SWIZ_A_HALF (5 << 8) +# define R500_ALU_RGB_B_SWIZ_A_1 (6 << 8) +/* #define R500_ALU_RGB_B_SWIZ_A_UNUSED (7 << 8) */ +# define R500_ALU_RGB_MOD_A_NOP (0 << 11) +# define R500_ALU_RGB_MOD_A_NEG (1 << 11) +# define R500_ALU_RGB_MOD_A_ABS (2 << 11) +# define R500_ALU_RGB_MOD_A_NAB (3 << 11) +# define R500_ALU_RGB_SEL_B_SHIFT 13 +# define R500_ALU_RGB_SEL_B_SRC0 (0 << 13) +# define R500_ALU_RGB_SEL_B_SRC1 (1 << 13) +# define R500_ALU_RGB_SEL_B_SRC2 (2 << 13) +# define R500_ALU_RGB_SEL_B_SRCP (3 << 13) +# define R500_ALU_RGB_R_SWIZ_B_R (0 << 15) +# define R500_ALU_RGB_R_SWIZ_B_G (1 << 15) +# define R500_ALU_RGB_R_SWIZ_B_B (2 << 15) +# define R500_ALU_RGB_R_SWIZ_B_A (3 << 15) +# define R500_ALU_RGB_R_SWIZ_B_0 (4 << 15) +# define R500_ALU_RGB_R_SWIZ_B_HALF (5 << 15) +# define R500_ALU_RGB_R_SWIZ_B_1 (6 << 15) +/* #define R500_ALU_RGB_R_SWIZ_B_UNUSED (7 << 15) */ +# define R500_ALU_RGB_G_SWIZ_B_R (0 << 18) +# define R500_ALU_RGB_G_SWIZ_B_G (1 << 18) +# define R500_ALU_RGB_G_SWIZ_B_B (2 << 18) +# define R500_ALU_RGB_G_SWIZ_B_A (3 << 18) +# define R500_ALU_RGB_G_SWIZ_B_0 (4 << 18) +# define R500_ALU_RGB_G_SWIZ_B_HALF (5 << 18) +# define R500_ALU_RGB_G_SWIZ_B_1 (6 << 18) +/* #define R500_ALU_RGB_G_SWIZ_B_UNUSED (7 << 18) */ +# define R500_ALU_RGB_B_SWIZ_B_R (0 << 21) +# define R500_ALU_RGB_B_SWIZ_B_G (1 << 21) +# define R500_ALU_RGB_B_SWIZ_B_B (2 << 21) +# define R500_ALU_RGB_B_SWIZ_B_A (3 << 21) +# define R500_ALU_RGB_B_SWIZ_B_0 (4 << 21) +# define R500_ALU_RGB_B_SWIZ_B_HALF (5 << 21) +# define R500_ALU_RGB_B_SWIZ_B_1 (6 << 21) +/* #define R500_ALU_RGB_B_SWIZ_B_UNUSED (7 << 21) */ +# define R500_ALU_RGB_MOD_B_NOP (0 << 24) +# define R500_ALU_RGB_MOD_B_NEG (1 << 24) +# define R500_ALU_RGB_MOD_B_ABS (2 << 24) +# define R500_ALU_RGB_MOD_B_NAB (3 << 24) +# define R500_ALU_RGB_OMOD_IDENTITY (0 << 26) +# define R500_ALU_RGB_OMOD_MUL_2 (1 << 26) +# define R500_ALU_RGB_OMOD_MUL_4 (2 << 26) +# define R500_ALU_RGB_OMOD_MUL_8 (3 << 26) +# define R500_ALU_RGB_OMOD_DIV_2 (4 << 26) +# define R500_ALU_RGB_OMOD_DIV_4 (5 << 26) +# define R500_ALU_RGB_OMOD_DIV_8 (6 << 26) +# define R500_ALU_RGB_OMOD_DISABLE (7 << 26) +# define R500_ALU_RGB_TARGET(x) ((x) << 29) +# define R500_ALU_RGB_WMASK (1 << 31) +#define R500_US_ALU_RGB_ADDR_0 0x9000 +# define R500_RGB_ADDR0(x) ((x) << 0) +# define R500_RGB_ADDR0_CONST (1 << 8) +# define R500_RGB_ADDR0_REL (1 << 9) +# define R500_RGB_ADDR1(x) ((x) << 10) +# define R500_RGB_ADDR1_CONST (1 << 18) +# define R500_RGB_ADDR1_REL (1 << 19) +# define R500_RGB_ADDR2(x) ((x) << 20) +# define R500_RGB_ADDR2_CONST (1 << 28) +# define R500_RGB_ADDR2_REL (1 << 29) +# define R500_RGB_SRCP_OP_1_MINUS_2RGB0 (0 << 30) +# define R500_RGB_SRCP_OP_RGB1_MINUS_RGB0 (1 << 30) +# define R500_RGB_SRCP_OP_RGB1_PLUS_RGB0 (2 << 30) +# define R500_RGB_SRCP_OP_1_MINUS_RGB0 (3 << 30) +#define R500_US_CMN_INST_0 0xb800 +# define R500_INST_TYPE_MASK (3 << 0) +# define R500_INST_TYPE_ALU (0 << 0) +# define R500_INST_TYPE_OUT (1 << 0) +# define R500_INST_TYPE_FC (2 << 0) +# define R500_INST_TYPE_TEX (3 << 0) +# define R500_INST_TEX_SEM_WAIT (1 << 2) +# define R500_INST_RGB_PRED_SEL_NONE (0 << 3) +# define R500_INST_RGB_PRED_SEL_RGBA (1 << 3) +# define R500_INST_RGB_PRED_SEL_RRRR (2 << 3) +# define R500_INST_RGB_PRED_SEL_GGGG (3 << 3) +# define R500_INST_RGB_PRED_SEL_BBBB (4 << 3) +# define R500_INST_RGB_PRED_SEL_AAAA (5 << 3) +# define R500_INST_RGB_PRED_INV (1 << 6) +# define R500_INST_WRITE_INACTIVE (1 << 7) +# define R500_INST_LAST (1 << 8) +# define R500_INST_NOP (1 << 9) +# define R500_INST_ALU_WAIT (1 << 10) +# define R500_INST_RGB_WMASK_R (1 << 11) +# define R500_INST_RGB_WMASK_G (1 << 12) +# define R500_INST_RGB_WMASK_B (1 << 13) +# define R500_INST_ALPHA_WMASK (1 << 14) +# define R500_INST_RGB_OMASK_R (1 << 15) +# define R500_INST_RGB_OMASK_G (1 << 16) +# define R500_INST_RGB_OMASK_B (1 << 17) +# define R500_INST_RGB_OMASK_RGB (7 << 15) +# define R500_INST_ALPHA_OMASK (1 << 18) +# define R500_INST_RGB_CLAMP (1 << 19) +# define R500_INST_ALPHA_CLAMP (1 << 20) +# define R500_INST_ALU_RESULT_SEL (1 << 21) +# define R500_INST_ALPHA_PRED_INV (1 << 22) +# define R500_INST_ALU_RESULT_OP_EQ (0 << 23) +# define R500_INST_ALU_RESULT_OP_LT (1 << 23) +# define R500_INST_ALU_RESULT_OP_GE (2 << 23) +# define R500_INST_ALU_RESULT_OP_NE (3 << 23) +# define R500_INST_ALPHA_PRED_SEL_NONE (0 << 25) +# define R500_INST_ALPHA_PRED_SEL_RGBA (1 << 25) +# define R500_INST_ALPHA_PRED_SEL_RRRR (2 << 25) +# define R500_INST_ALPHA_PRED_SEL_GGGG (3 << 25) +# define R500_INST_ALPHA_PRED_SEL_BBBB (4 << 25) +# define R500_INST_ALPHA_PRED_SEL_AAAA (5 << 25) +/* XXX next four are kind of guessed */ +# define R500_INST_STAT_WE_R (1 << 28) +# define R500_INST_STAT_WE_G (1 << 29) +# define R500_INST_STAT_WE_B (1 << 30) +# define R500_INST_STAT_WE_A (1 << 31) + +/* note that these are 8 bit lengths, despite the offsets, at least for R500 */ +#define R500_US_CODE_ADDR 0x4630 +# define R500_US_CODE_START_ADDR(x) ((x) << 0) +# define R500_US_CODE_END_ADDR(x) ((x) << 16) +#define R500_US_CODE_OFFSET 0x4638 +# define R500_US_CODE_OFFSET_ADDR(x) ((x) << 0) +#define R500_US_CODE_RANGE 0x4634 +# define R500_US_CODE_RANGE_ADDR(x) ((x) << 0) +# define R500_US_CODE_RANGE_SIZE(x) ((x) << 16) +#define R500_US_CONFIG 0x4600 +# define R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO (1 << 1) +#define R500_US_FC_ADDR_0 0xa000 +# define R500_FC_BOOL_ADDR(x) ((x) << 0) +# define R500_FC_INT_ADDR(x) ((x) << 8) +# define R500_FC_JUMP_ADDR(x) ((x) << 16) +# define R500_FC_JUMP_GLOBAL (1 << 31) +#define R500_US_FC_BOOL_CONST 0x4620 +# define R500_FC_KBOOL(x) (x) +#define R500_US_FC_CTRL 0x4624 +# define R500_FC_TEST_EN (1 << 30) +# define R500_FC_FULL_FC_EN (1 << 31) +#define R500_US_FC_INST_0 0x9800 +# define R500_FC_OP_JUMP (0 << 0) +# define R500_FC_OP_LOOP (1 << 0) +# define R500_FC_OP_ENDLOOP (2 << 0) +# define R500_FC_OP_REP (3 << 0) +# define R500_FC_OP_ENDREP (4 << 0) +# define R500_FC_OP_BREAKLOOP (5 << 0) +# define R500_FC_OP_BREAKREP (6 << 0) +# define R500_FC_OP_CONTINUE (7 << 0) +# define R500_FC_B_ELSE (1 << 4) +# define R500_FC_JUMP_ANY (1 << 5) +# define R500_FC_A_OP_NONE (0 << 6) +# define R500_FC_A_OP_POP (1 << 6) +# define R500_FC_A_OP_PUSH (2 << 6) +# define R500_FC_JUMP_FUNC(x) ((x) << 8) +# define R500_FC_B_POP_CNT(x) ((x) << 16) +# define R500_FC_B_OP0_NONE (0 << 24) +# define R500_FC_B_OP0_DECR (1 << 24) +# define R500_FC_B_OP0_INCR (2 << 24) +# define R500_FC_B_OP1_DECR (0 << 26) +# define R500_FC_B_OP1_NONE (1 << 26) +# define R500_FC_B_OP1_INCR (2 << 26) +# define R500_FC_IGNORE_UNCOVERED (1 << 28) +#define R500_US_FC_INT_CONST_0 0x4c00 +# define R500_FC_INT_CONST_KR(x) ((x) << 0) +# define R500_FC_INT_CONST_KG(x) ((x) << 8) +# define R500_FC_INT_CONST_KB(x) ((x) << 16) +/* _0 through _15 */ +#define R500_US_FORMAT0_0 0x4640 +# define R500_FORMAT_TXWIDTH(x) ((x) << 0) +# define R500_FORMAT_TXHEIGHT(x) ((x) << 11) +# define R500_FORMAT_TXDEPTH(x) ((x) << 22) +#define R500_US_PIXSIZE 0x4604 +# define R500_PIX_SIZE(x) (x) +#define R500_US_TEX_ADDR_0 0x9800 +# define R500_TEX_SRC_ADDR(x) ((x) << 0) +# define R500_TEX_SRC_ADDR_REL (1 << 7) +# define R500_TEX_SRC_S_SWIZ_R (0 << 8) +# define R500_TEX_SRC_S_SWIZ_G (1 << 8) +# define R500_TEX_SRC_S_SWIZ_B (2 << 8) +# define R500_TEX_SRC_S_SWIZ_A (3 << 8) +# define R500_TEX_SRC_T_SWIZ_R (0 << 10) +# define R500_TEX_SRC_T_SWIZ_G (1 << 10) +# define R500_TEX_SRC_T_SWIZ_B (2 << 10) +# define R500_TEX_SRC_T_SWIZ_A (3 << 10) +# define R500_TEX_SRC_R_SWIZ_R (0 << 12) +# define R500_TEX_SRC_R_SWIZ_G (1 << 12) +# define R500_TEX_SRC_R_SWIZ_B (2 << 12) +# define R500_TEX_SRC_R_SWIZ_A (3 << 12) +# define R500_TEX_SRC_Q_SWIZ_R (0 << 14) +# define R500_TEX_SRC_Q_SWIZ_G (1 << 14) +# define R500_TEX_SRC_Q_SWIZ_B (2 << 14) +# define R500_TEX_SRC_Q_SWIZ_A (3 << 14) +# define R500_TEX_DST_ADDR(x) ((x) << 16) +# define R500_TEX_DST_ADDR_REL (1 << 23) +# define R500_TEX_DST_R_SWIZ_R (0 << 24) +# define R500_TEX_DST_R_SWIZ_G (1 << 24) +# define R500_TEX_DST_R_SWIZ_B (2 << 24) +# define R500_TEX_DST_R_SWIZ_A (3 << 24) +# define R500_TEX_DST_G_SWIZ_R (0 << 26) +# define R500_TEX_DST_G_SWIZ_G (1 << 26) +# define R500_TEX_DST_G_SWIZ_B (2 << 26) +# define R500_TEX_DST_G_SWIZ_A (3 << 26) +# define R500_TEX_DST_B_SWIZ_R (0 << 28) +# define R500_TEX_DST_B_SWIZ_G (1 << 28) +# define R500_TEX_DST_B_SWIZ_B (2 << 28) +# define R500_TEX_DST_B_SWIZ_A (3 << 28) +# define R500_TEX_DST_A_SWIZ_R (0 << 30) +# define R500_TEX_DST_A_SWIZ_G (1 << 30) +# define R500_TEX_DST_A_SWIZ_B (2 << 30) +# define R500_TEX_DST_A_SWIZ_A (3 << 30) +#define R500_US_TEX_ADDR_DXDY_0 0xa000 +# define R500_DX_ADDR(x) ((x) << 0) +# define R500_DX_ADDR_REL (1 << 7) +# define R500_DX_S_SWIZ_R (0 << 8) +# define R500_DX_S_SWIZ_G (1 << 8) +# define R500_DX_S_SWIZ_B (2 << 8) +# define R500_DX_S_SWIZ_A (3 << 8) +# define R500_DX_T_SWIZ_R (0 << 10) +# define R500_DX_T_SWIZ_G (1 << 10) +# define R500_DX_T_SWIZ_B (2 << 10) +# define R500_DX_T_SWIZ_A (3 << 10) +# define R500_DX_R_SWIZ_R (0 << 12) +# define R500_DX_R_SWIZ_G (1 << 12) +# define R500_DX_R_SWIZ_B (2 << 12) +# define R500_DX_R_SWIZ_A (3 << 12) +# define R500_DX_Q_SWIZ_R (0 << 14) +# define R500_DX_Q_SWIZ_G (1 << 14) +# define R500_DX_Q_SWIZ_B (2 << 14) +# define R500_DX_Q_SWIZ_A (3 << 14) +# define R500_DY_ADDR(x) ((x) << 16) +# define R500_DY_ADDR_REL (1 << 17) +# define R500_DY_S_SWIZ_R (0 << 24) +# define R500_DY_S_SWIZ_G (1 << 24) +# define R500_DY_S_SWIZ_B (2 << 24) +# define R500_DY_S_SWIZ_A (3 << 24) +# define R500_DY_T_SWIZ_R (0 << 26) +# define R500_DY_T_SWIZ_G (1 << 26) +# define R500_DY_T_SWIZ_B (2 << 26) +# define R500_DY_T_SWIZ_A (3 << 26) +# define R500_DY_R_SWIZ_R (0 << 28) +# define R500_DY_R_SWIZ_G (1 << 28) +# define R500_DY_R_SWIZ_B (2 << 28) +# define R500_DY_R_SWIZ_A (3 << 28) +# define R500_DY_Q_SWIZ_R (0 << 30) +# define R500_DY_Q_SWIZ_G (1 << 30) +# define R500_DY_Q_SWIZ_B (2 << 30) +# define R500_DY_Q_SWIZ_A (3 << 30) +#define R500_US_TEX_INST_0 0x9000 +# define R500_TEX_ID(x) ((x) << 16) +# define R500_TEX_INST_NOP (0 << 22) +# define R500_TEX_INST_LD (1 << 22) +# define R500_TEX_INST_TEXKILL (2 << 22) +# define R500_TEX_INST_PROJ (3 << 22) +# define R500_TEX_INST_LODBIAS (4 << 22) +# define R500_TEX_INST_LOD (5 << 22) +# define R500_TEX_INST_DXDY (6 << 22) +# define R500_TEX_SEM_ACQUIRE (1 << 25) +# define R500_TEX_IGNORE_UNCOVERED (1 << 26) +# define R500_TEX_UNSCALED (1 << 27) +#define R300_US_W_FMT 0x46b4 +# define R300_W_FMT_W0 (0 << 0) +# define R300_W_FMT_W24 (1 << 0) +# define R300_W_FMT_W24FP (2 << 0) +# define R300_W_SRC_US (0 << 2) +# define R300_W_SRC_RAS (1 << 2) + + +/* Draw a primitive from vertex data in arrays loaded via 3D_LOAD_VBPNTR. + * Two parameter dwords: + * 0. VAP_VTX_FMT: The first parameter is not written to hardware + * 1. VAP_VF_CTL: The second parameter is a standard primitive emission dword. + */ +#define R300_PACKET3_3D_DRAW_VBUF 0x00002800 + +/* Draw a primitive from immediate vertices in this packet + * Up to 16382 dwords: + * 0. VAP_VTX_FMT: The first parameter is not written to hardware + * 1. VAP_VF_CTL: The second parameter is a standard primitive emission dword. + * 2 to end: Up to 16380 dwords of vertex data. + */ +#define R300_PACKET3_3D_DRAW_IMMD 0x00002900 + +/* Draw a primitive from vertex data in arrays loaded via 3D_LOAD_VBPNTR and + * immediate vertices in this packet + * Up to 16382 dwords: + * 0. VAP_VTX_FMT: The first parameter is not written to hardware + * 1. VAP_VF_CTL: The second parameter is a standard primitive emission dword. + * 2 to end: Up to 16380 dwords of vertex data. + */ +#define R300_PACKET3_3D_DRAW_INDX 0x00002A00 + + +/* Specify the full set of vertex arrays as (address, stride). + * The first parameter is the number of vertex arrays specified. + * The rest of the command is a variable length list of blocks, where + * each block is three dwords long and specifies two arrays. + * The first dword of a block is split into two words, the lower significant + * word refers to the first array, the more significant word to the second + * array in the block. + * The low byte of each word contains the size of an array entry in dwords, + * the high byte contains the stride of the array. + * The second dword of a block contains the pointer to the first array, + * the third dword of a block contains the pointer to the second array. + * Note that if the total number of arrays is odd, the third dword of + * the last block is omitted. + */ +#define R300_PACKET3_3D_LOAD_VBPNTR 0x00002F00 + +#define R300_PACKET3_INDX_BUFFER 0x00003300 +# define R300_INDX_BUFFER_DST_SHIFT 0 +# define R300_INDX_BUFFER_SKIP_SHIFT 16 +# define R300_INDX_BUFFER_ONE_REG_WR (1<<31) + +/* Same as R300_PACKET3_3D_DRAW_VBUF but without VAP_VTX_FMT */ +#define R300_PACKET3_3D_DRAW_VBUF_2 0x00003400 +/* Same as R300_PACKET3_3D_DRAW_IMMD but without VAP_VTX_FMT */ +#define R300_PACKET3_3D_DRAW_IMMD_2 0x00003500 +/* Same as R300_PACKET3_3D_DRAW_INDX but without VAP_VTX_FMT */ +#define R300_PACKET3_3D_DRAW_INDX_2 0x00003600 + +/* Clears a portion of hierachical Z RAM + * 3 dword parameters + * 0. START + * 1. COUNT: 13:0 (max is 0x3FFF) + * 2. CLEAR_VALUE: Value to write into HIZ RAM. + */ +#define R300_PACKET3_3D_CLEAR_HIZ 0x00003700 + +/* Draws a set of primitives using vertex buffers pointed by the state data. + * At least 2 Parameters: + * 0. VAP_VF_CNTL: The first parameter is a standard primitive emission dword. + * 2 to end: Data or indices (see other 3D_DRAW_* packets for details) + */ +#define R300_PACKET3_3D_DRAW_128 0x00003900 + +/* END: Packet 3 commands */ + + +/* Color formats for 2d packets + */ +#define R300_CP_COLOR_FORMAT_CI8 2 +#define R300_CP_COLOR_FORMAT_ARGB1555 3 +#define R300_CP_COLOR_FORMAT_RGB565 4 +#define R300_CP_COLOR_FORMAT_ARGB8888 6 +#define R300_CP_COLOR_FORMAT_RGB332 7 +#define R300_CP_COLOR_FORMAT_RGB8 9 +#define R300_CP_COLOR_FORMAT_ARGB4444 15 + +/* + * CP type-3 packets + */ +#define R300_CP_CMD_BITBLT_MULTI 0xC0009B00 + +/* XXX Corbin's stuff from radeon and r200 */ + +#define RADEON_WAIT_UNTIL 0x1720 +# define RADEON_WAIT_CRTC_PFLIP (1 << 0) +# define RADEON_WAIT_2D_IDLECLEAN (1 << 16) +# define RADEON_WAIT_3D_IDLECLEAN (1 << 17) +# define RADEON_WAIT_HOST_IDLECLEAN (1 << 18) + +#define RADEON_CP_PACKET3 0xC0000000 + +#define R200_3D_DRAW_IMMD_2 0xC0003500 + +#endif /* _R300_REG_H */ + +/* *INDENT-ON* */ + +/* vim: set foldenable foldmarker=\\{,\\} foldmethod=marker : */ diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c new file mode 100644 index 0000000000..5ff9015a7b --- /dev/null +++ b/src/gallium/drivers/r300/r300_screen.c @@ -0,0 +1,371 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "r300_screen.h" + +/* Return the identifier behind whom the brave coders responsible for this + * amalgamation of code, sweat, and duct tape, routinely obscure their names. + * + * ...I should have just put "Corbin Simpson", but I'm not that cool. + * + * (Or egotistical. Yet.) */ +static const char* r300_get_vendor(struct pipe_screen* pscreen) +{ + return "X.Org R300 Project"; +} + +static const char* chip_families[] = { + "R300", + "R350", + "R360", + "RV350", + "RV370", + "RV380", + "R420", + "R423", + "R430", + "R480", + "R481", + "RV410", + "RS400", + "RC410", + "RS480", + "RS482", + "RS690", + "RS740", + "RV515", + "R520", + "RV530", + "R580", + "RV560", + "RV570" +}; + +static const char* r300_get_name(struct pipe_screen* pscreen) +{ + struct r300_screen* r300screen = r300_screen(pscreen); + + return chip_families[r300screen->caps->family]; +} + +static int r300_get_param(struct pipe_screen* pscreen, int param) +{ + struct r300_screen* r300screen = r300_screen(pscreen); + + switch (param) { + /* XXX cases marked "IN THEORY" are possible on the hardware, + * but haven't been implemented yet. */ + case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: + /* XXX I'm told this goes up to 16 */ + return 8; + case PIPE_CAP_NPOT_TEXTURES: + /* IN THEORY */ + return 0; + case PIPE_CAP_TWO_SIDED_STENCIL: + if (r300screen->caps->is_r500) { + return 1; + } else { + return 0; + } + return 0; + case PIPE_CAP_GLSL: + /* IN THEORY */ + return 0; + case PIPE_CAP_S3TC: + /* IN THEORY */ + return 0; + case PIPE_CAP_ANISOTROPIC_FILTER: + /* IN THEORY */ + return 0; + case PIPE_CAP_POINT_SPRITE: + /* IN THEORY */ + return 0; + case PIPE_CAP_MAX_RENDER_TARGETS: + return 4; + case PIPE_CAP_OCCLUSION_QUERY: + /* IN THEORY */ + return 0; + case PIPE_CAP_TEXTURE_SHADOW_MAP: + /* IN THEORY */ + return 0; + case PIPE_CAP_MAX_TEXTURE_2D_LEVELS: + if (r300screen->caps->is_r500) { + /* 13 == 4096x4096 */ + return 13; + } else { + /* 12 == 2048x2048 */ + return 12; + } + case PIPE_CAP_MAX_TEXTURE_3D_LEVELS: + /* So, technically, the limit is the same as above, but some math + * shows why this is silly. Assuming RGBA, 4cpp, we can see that + * 4096*4096*4096 = 64.0 GiB exactly, so it's not exactly + * practical. However, if at some point a game really wants this, + * then we can remove or raise this limit. */ + if (r300screen->caps->is_r500) { + /* 9 == 256x256x256 */ + return 9; + } else { + /* 8 == 128*128*128 */ + return 8; + } + case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS: + if (r300screen->caps->is_r500) { + /* 13 == 4096x4096 */ + return 13; + } else { + /* 12 == 2048x2048 */ + return 12; + } + case PIPE_CAP_TEXTURE_MIRROR_CLAMP: + return 1; + case PIPE_CAP_TEXTURE_MIRROR_REPEAT: + return 1; + case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: + /* XXX guessing (what a terrible guess) */ + return 2; + default: + debug_printf("r300: Implementation error: Bad param %d\n", + param); + return 0; + } +} + +static float r300_get_paramf(struct pipe_screen* pscreen, int param) +{ + switch (param) { + case PIPE_CAP_MAX_LINE_WIDTH: + case PIPE_CAP_MAX_LINE_WIDTH_AA: + /* XXX this is the biggest thing that will fit in that register. + * Perhaps the actual rendering limits are less? */ + return 10922.0f; + case PIPE_CAP_MAX_POINT_WIDTH: + case PIPE_CAP_MAX_POINT_WIDTH_AA: + /* XXX this is the biggest thing that will fit in that register. + * Perhaps the actual rendering limits are less? */ + return 10922.0f; + case PIPE_CAP_MAX_TEXTURE_ANISOTROPY: + return 16.0f; + case PIPE_CAP_MAX_TEXTURE_LOD_BIAS: + return 16.0f; + default: + debug_printf("r300: Implementation error: Bad paramf %d\n", + param); + return 0.0f; + } +} + +static boolean check_tex_2d_format(enum pipe_format format, boolean is_r500) +{ + switch (format) { + /* Colorbuffer */ + case PIPE_FORMAT_A8R8G8B8_UNORM: + /* Texture */ + case PIPE_FORMAT_I8_UNORM: + /* Z buffer */ + case PIPE_FORMAT_Z16_UNORM: + /* Z buffer with stencil */ + case PIPE_FORMAT_Z24S8_UNORM: + return TRUE; + + /* XXX Supported yet unimplemented formats: */ + case PIPE_FORMAT_A1R5G5B5_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + /* XXX These don't even exist + case PIPE_FORMAT_A32R32G32B32: + case PIPE_FORMAT_A16R16G16B16: */ + /* XXX Insert YUV422 packed VYUY and YVYU here */ + /* XXX What the deuce is UV88? (r3xx accel page 14) */ + case PIPE_FORMAT_A4R4G4B4_UNORM: + debug_printf("r300: Warning: Got unimplemented format: %s in %s\n", + pf_name(format), __FUNCTION__); + return FALSE; + + /* XXX Supported yet unimplemented r5xx formats: */ + /* XXX Again, what is UV1010 this time? (r5xx accel page 148) */ + /* XXX Even more that don't exist + case PIPE_FORMAT_A10R10G10B10_UNORM: + case PIPE_FORMAT_A2R10G10B10_UNORM: + case PIPE_FORMAT_I10_UNORM: */ + debug_printf( + "r300: Warning: Got unimplemented r500 format: %s in %s\n", + pf_name(format), __FUNCTION__); + return FALSE; + + default: + debug_printf("r300: Warning: Got unsupported format: %s in %s\n", + pf_name(format), __FUNCTION__); + break; + } + + return FALSE; +} + +/* XXX moar targets */ +static boolean r300_is_format_supported(struct pipe_screen* pscreen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags) +{ + switch (target) { + case PIPE_TEXTURE_2D: + return check_tex_2d_format(format, + r300_screen(pscreen)->caps->is_r500); + default: + debug_printf("r300: Warning: Got unknown format target: %d\n", + format); + break; + } + + return FALSE; +} + +static struct pipe_transfer* +r300_get_tex_transfer(struct pipe_screen *screen, + struct pipe_texture *texture, + unsigned face, unsigned level, unsigned zslice, + enum pipe_transfer_usage usage, unsigned x, unsigned y, + unsigned w, unsigned h) +{ + struct r300_texture *tex = (struct r300_texture *)texture; + struct r300_transfer *trans; + unsigned offset; /* in bytes */ + + /* XXX Add support for these things */ + if (texture->target == PIPE_TEXTURE_CUBE) { + debug_printf("PIPE_TEXTURE_CUBE is not yet supported.\n"); + /* offset = tex->image_offset[level][face]; */ + } + else if (texture->target == PIPE_TEXTURE_3D) { + debug_printf("PIPE_TEXTURE_3D is not yet supported.\n"); + /* offset = tex->image_offset[level][zslice]; */ + } + else { + offset = tex->offset[level]; + assert(face == 0); + assert(zslice == 0); + } + + trans = CALLOC_STRUCT(r300_transfer); + if (trans) { + trans->transfer.refcount = 1; + pipe_texture_reference(&trans->transfer.texture, texture); + trans->transfer.format = trans->transfer.format; + trans->transfer.width = w; + trans->transfer.height = h; + trans->transfer.block = texture->block; + trans->transfer.nblocksx = texture->nblocksx[level]; + trans->transfer.nblocksy = texture->nblocksy[level]; + trans->transfer.stride = tex->stride; + trans->transfer.usage = usage; + trans->offset = offset; + } + return &trans->transfer; +} + +static void +r300_tex_transfer_release(struct pipe_screen *screen, + struct pipe_transfer **transfer) +{ + struct pipe_transfer *trans = *transfer; + + if (--trans->refcount == 0) { + pipe_texture_reference(&trans->texture, NULL); + FREE(trans); + } + + *transfer = NULL; +} + +static void* r300_transfer_map(struct pipe_screen* screen, + struct pipe_transfer* transfer) +{ + struct r300_texture* tex = (struct r300_texture*)transfer->texture; + char* map; + unsigned flags = 0; + + if (transfer->usage != PIPE_TRANSFER_WRITE) { + flags |= PIPE_BUFFER_USAGE_CPU_READ; + } + if (transfer->usage != PIPE_TRANSFER_READ) { + flags |= PIPE_BUFFER_USAGE_CPU_WRITE; + } + + map = pipe_buffer_map(screen, tex->buffer, flags); + + if (!map) { + return NULL; + } + + return map + r300_transfer(transfer)->offset + + transfer->y / transfer->block.height * transfer->stride + + transfer->x / transfer->block.width * transfer->block.size; +} + +static void r300_transfer_unmap(struct pipe_screen* screen, + struct pipe_transfer* transfer) +{ + struct r300_texture* tex = (struct r300_texture*)transfer->texture; + pipe_buffer_unmap(screen, tex->buffer); +} + +static void r300_destroy_screen(struct pipe_screen* pscreen) +{ + struct r300_screen* r300screen = r300_screen(pscreen); + + FREE(r300screen->caps); + FREE(r300screen); +} + +struct pipe_screen* r300_create_screen(struct pipe_winsys* winsys, + struct r300_winsys* r300_winsys) +{ + struct r300_screen* r300screen = CALLOC_STRUCT(r300_screen); + struct r300_capabilities* caps = CALLOC_STRUCT(r300_capabilities); + + if (!r300screen || !caps) + return NULL; + + caps->pci_id = r300_winsys->pci_id; + caps->num_frag_pipes = r300_winsys->gb_pipes; + + r300_parse_chipset(caps); + + r300screen->caps = caps; + r300screen->screen.winsys = winsys; + r300screen->screen.destroy = r300_destroy_screen; + r300screen->screen.get_name = r300_get_name; + r300screen->screen.get_vendor = r300_get_vendor; + r300screen->screen.get_param = r300_get_param; + r300screen->screen.get_paramf = r300_get_paramf; + r300screen->screen.is_format_supported = r300_is_format_supported; + r300screen->screen.get_tex_transfer = r300_get_tex_transfer; + r300screen->screen.tex_transfer_release = r300_tex_transfer_release; + r300screen->screen.transfer_map = r300_transfer_map; + r300screen->screen.transfer_unmap = r300_transfer_unmap; + + r300_init_screen_texture_functions(&r300screen->screen); + u_simple_screen_init(&r300screen->screen); + + return &r300screen->screen; +} diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h new file mode 100644 index 0000000000..6c845144cb --- /dev/null +++ b/src/gallium/drivers/r300/r300_screen.h @@ -0,0 +1,67 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_SCREEN_H +#define R300_SCREEN_H + +#include "pipe/p_inlines.h" +#include "pipe/p_screen.h" +#include "util/u_memory.h" +#include "util/u_simple_screen.h" + +#include "r300_chipset.h" +#include "r300_texture.h" +#include "r300_winsys.h" + +struct r300_screen { + /* Parent class */ + struct pipe_screen screen; + + /* Chipset capabilities */ + struct r300_capabilities* caps; +}; + +struct r300_transfer { + /* Parent class */ + struct pipe_transfer transfer; + + /* Offset from start of buffer. */ + unsigned offset; +}; + +/* Convenience cast wrapper. */ +static struct r300_screen* r300_screen(struct pipe_screen* screen) { + return (struct r300_screen*)screen; +} + +/* Convenience cast wrapper. */ +static INLINE struct r300_transfer* +r300_transfer(struct pipe_transfer* transfer) +{ + return (struct r300_transfer*)transfer; +} + +/* Creates a new r300 screen. */ +struct pipe_screen* r300_create_screen(struct pipe_winsys* winsys, + struct r300_winsys* r300_winsys); + +#endif /* R300_SCREEN_H */ diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c new file mode 100644 index 0000000000..da99a3be6b --- /dev/null +++ b/src/gallium/drivers/r300/r300_state.c @@ -0,0 +1,854 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "util/u_math.h" +#include "util/u_pack_color.h" + +#include "util/u_debug.h" +#include "pipe/internal/p_winsys_screen.h" + +#include "r300_context.h" +#include "r300_reg.h" +#include "r300_state_shader.h" + +/* r300_state: Functions used to intialize state context by translating + * Gallium state objects into semi-native r300 state objects. + * + * XXX break this file up into pieces if it gets too big! */ + +/* Pack a float into a dword. */ +static uint32_t pack_float_32(float f) +{ + union { + float f; + uint32_t u; + } u; + + u.f = f; + return u.u; +} + +static uint32_t translate_blend_function(int blend_func) { + switch (blend_func) { + case PIPE_BLEND_ADD: + return R300_COMB_FCN_ADD_CLAMP; + case PIPE_BLEND_SUBTRACT: + return R300_COMB_FCN_SUB_CLAMP; + case PIPE_BLEND_REVERSE_SUBTRACT: + return R300_COMB_FCN_RSUB_CLAMP; + case PIPE_BLEND_MIN: + return R300_COMB_FCN_MIN; + case PIPE_BLEND_MAX: + return R300_COMB_FCN_MAX; + default: + debug_printf("r300: Unknown blend function %d\n", blend_func); + break; + } + return 0; +} + +/* XXX we can also offer the D3D versions of some of these... */ +static uint32_t translate_blend_factor(int blend_fact) { + switch (blend_fact) { + case PIPE_BLENDFACTOR_ONE: + return R300_BLEND_GL_ONE; + case PIPE_BLENDFACTOR_SRC_COLOR: + return R300_BLEND_GL_SRC_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA: + return R300_BLEND_GL_SRC_ALPHA; + case PIPE_BLENDFACTOR_DST_ALPHA: + return R300_BLEND_GL_DST_ALPHA; + case PIPE_BLENDFACTOR_DST_COLOR: + return R300_BLEND_GL_DST_COLOR; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return R300_BLEND_GL_SRC_ALPHA_SATURATE; + case PIPE_BLENDFACTOR_CONST_COLOR: + return R300_BLEND_GL_CONST_COLOR; + case PIPE_BLENDFACTOR_CONST_ALPHA: + return R300_BLEND_GL_CONST_ALPHA; + /* XXX WTF are these? + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: */ + case PIPE_BLENDFACTOR_ZERO: + return R300_BLEND_GL_ZERO; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return R300_BLEND_GL_ONE_MINUS_SRC_COLOR; + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + return R300_BLEND_GL_ONE_MINUS_SRC_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return R300_BLEND_GL_ONE_MINUS_DST_ALPHA; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + return R300_BLEND_GL_ONE_MINUS_DST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return R300_BLEND_GL_ONE_MINUS_CONST_COLOR; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return R300_BLEND_GL_ONE_MINUS_CONST_ALPHA; + /* XXX see above + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: */ + default: + debug_printf("r300: Unknown blend factor %d\n", blend_fact); + break; + } + return 0; +} + +/* Create a new blend state based on the CSO blend state. + * + * This encompasses alpha blending, logic/raster ops, and blend dithering. */ +static void* r300_create_blend_state(struct pipe_context* pipe, + const struct pipe_blend_state* state) +{ + struct r300_blend_state* blend = CALLOC_STRUCT(r300_blend_state); + + if (state->blend_enable) { + /* XXX for now, always do separate alpha... + * is it faster to do it with one reg? */ + blend->blend_control = R300_ALPHA_BLEND_ENABLE | + R300_SEPARATE_ALPHA_ENABLE | + R300_READ_ENABLE | + translate_blend_function(state->rgb_func) | + (translate_blend_factor(state->rgb_src_factor) << + R300_SRC_BLEND_SHIFT) | + (translate_blend_factor(state->rgb_dst_factor) << + R300_DST_BLEND_SHIFT); + blend->alpha_blend_control = + translate_blend_function(state->alpha_func) | + (translate_blend_factor(state->alpha_src_factor) << + R300_SRC_BLEND_SHIFT) | + (translate_blend_factor(state->alpha_dst_factor) << + R300_DST_BLEND_SHIFT); + } + + /* PIPE_LOGICOP_* don't need to be translated, fortunately. */ + /* XXX are logicops still allowed if blending's disabled? + * Does Gallium take care of it for us? */ + if (state->logicop_enable) { + blend->rop = R300_RB3D_ROPCNTL_ROP_ENABLE | + (state->logicop_func) << R300_RB3D_ROPCNTL_ROP_SHIFT; + } + + if (state->dither) { + blend->dither = R300_RB3D_DITHER_CTL_DITHER_MODE_LUT | + R300_RB3D_DITHER_CTL_ALPHA_DITHER_MODE_LUT; + } + + return (void*)blend; +} + +/* Bind blend state. */ +static void r300_bind_blend_state(struct pipe_context* pipe, + void* state) +{ + struct r300_context* r300 = r300_context(pipe); + + r300->blend_state = (struct r300_blend_state*)state; + r300->dirty_state |= R300_NEW_BLEND; +} + +/* Free blend state. */ +static void r300_delete_blend_state(struct pipe_context* pipe, + void* state) +{ + FREE(state); +} + +/* Set blend color. + * Setup both R300 and R500 registers, figure out later which one to write. */ +static void r300_set_blend_color(struct pipe_context* pipe, + const struct pipe_blend_color* color) +{ + struct r300_context* r300 = r300_context(pipe); + uint32_t r, g, b, a; + ubyte ur, ug, ub, ua; + + r = util_iround(color->color[0] * 1023.0f); + g = util_iround(color->color[1] * 1023.0f); + b = util_iround(color->color[2] * 1023.0f); + a = util_iround(color->color[3] * 1023.0f); + + ur = float_to_ubyte(color->color[0]); + ug = float_to_ubyte(color->color[1]); + ub = float_to_ubyte(color->color[2]); + ua = float_to_ubyte(color->color[3]); + + r300->blend_color_state->blend_color = (a << 24) | (r << 16) | (g << 8) | b; + + r300->blend_color_state->blend_color_red_alpha = ur | (ua << 16); + r300->blend_color_state->blend_color_green_blue = ub | (ug << 16); + + r300->dirty_state |= R300_NEW_BLEND_COLOR; +} + +static void r300_set_clip_state(struct pipe_context* pipe, + const struct pipe_clip_state* state) +{ + struct r300_context* r300 = r300_context(pipe); + /* XXX Draw */ + draw_flush(r300->draw); + draw_set_clip_state(r300->draw, state); +} + +static void + r300_set_constant_buffer(struct pipe_context* pipe, + uint shader, uint index, + const struct pipe_constant_buffer* buffer) +{ + struct r300_context* r300 = r300_context(pipe); + + /* This entire chunk of code seems ever-so-slightly baked. + * It's as if I've got pipe_buffer* matryoshkas... */ + if (buffer && buffer->buffer && buffer->buffer->size) { + void* map = pipe->winsys->buffer_map(pipe->winsys, buffer->buffer, + PIPE_BUFFER_USAGE_CPU_READ); + memcpy(r300->shader_constants[shader].constants, + map, buffer->buffer->size); + pipe->winsys->buffer_unmap(pipe->winsys, buffer->buffer); + + r300->shader_constants[shader].user_count = + buffer->buffer->size / (sizeof(float) * 4); + } else { + r300->shader_constants[shader].user_count = 0; + } + + r300->dirty_state |= R300_NEW_CONSTANTS; +} + +static uint32_t translate_depth_stencil_function(int zs_func) { + switch (zs_func) { + case PIPE_FUNC_NEVER: + return R300_ZS_NEVER; + case PIPE_FUNC_LESS: + return R300_ZS_LESS; + case PIPE_FUNC_EQUAL: + return R300_ZS_EQUAL; + case PIPE_FUNC_LEQUAL: + return R300_ZS_LEQUAL; + case PIPE_FUNC_GREATER: + return R300_ZS_GREATER; + case PIPE_FUNC_NOTEQUAL: + return R300_ZS_NOTEQUAL; + case PIPE_FUNC_GEQUAL: + return R300_ZS_GEQUAL; + case PIPE_FUNC_ALWAYS: + return R300_ZS_ALWAYS; + default: + debug_printf("r300: Unknown depth/stencil function %d\n", + zs_func); + break; + } + return 0; +} + +static uint32_t translate_stencil_op(int s_op) { + switch (s_op) { + case PIPE_STENCIL_OP_KEEP: + return R300_ZS_KEEP; + case PIPE_STENCIL_OP_ZERO: + return R300_ZS_ZERO; + case PIPE_STENCIL_OP_REPLACE: + return R300_ZS_REPLACE; + case PIPE_STENCIL_OP_INCR: + return R300_ZS_INCR; + case PIPE_STENCIL_OP_DECR: + return R300_ZS_DECR; + case PIPE_STENCIL_OP_INCR_WRAP: + return R300_ZS_INCR_WRAP; + case PIPE_STENCIL_OP_DECR_WRAP: + return R300_ZS_DECR_WRAP; + case PIPE_STENCIL_OP_INVERT: + return R300_ZS_INVERT; + default: + debug_printf("r300: Unknown stencil op %d", s_op); + break; + } + return 0; +} + +static uint32_t translate_alpha_function(int alpha_func) { + switch (alpha_func) { + case PIPE_FUNC_NEVER: + return R300_FG_ALPHA_FUNC_NEVER; + case PIPE_FUNC_LESS: + return R300_FG_ALPHA_FUNC_LESS; + case PIPE_FUNC_EQUAL: + return R300_FG_ALPHA_FUNC_EQUAL; + case PIPE_FUNC_LEQUAL: + return R300_FG_ALPHA_FUNC_LE; + case PIPE_FUNC_GREATER: + return R300_FG_ALPHA_FUNC_GREATER; + case PIPE_FUNC_NOTEQUAL: + return R300_FG_ALPHA_FUNC_NOTEQUAL; + case PIPE_FUNC_GEQUAL: + return R300_FG_ALPHA_FUNC_GE; + case PIPE_FUNC_ALWAYS: + return R300_FG_ALPHA_FUNC_ALWAYS; + default: + debug_printf("r300: Unknown alpha function %d", alpha_func); + break; + } + return 0; +} + +/* Create a new depth, stencil, and alpha state based on the CSO dsa state. + * + * This contains the depth buffer, stencil buffer, alpha test, and such. + * On the Radeon, depth and stencil buffer setup are intertwined, which is + * the reason for some of the strange-looking assignments across registers. */ +static void* + r300_create_dsa_state(struct pipe_context* pipe, + const struct pipe_depth_stencil_alpha_state* state) +{ + struct r300_dsa_state* dsa = CALLOC_STRUCT(r300_dsa_state); + + /* Depth test setup. */ + if (state->depth.enabled) { + dsa->z_buffer_control |= R300_Z_ENABLE; + + if (state->depth.writemask) { + dsa->z_buffer_control |= R300_Z_WRITE_ENABLE; + } + + dsa->z_stencil_control |= + (translate_depth_stencil_function(state->depth.func) << + R300_Z_FUNC_SHIFT); + } + + /* Stencil buffer setup. */ + if (state->stencil[0].enabled) { + dsa->z_buffer_control |= R300_STENCIL_ENABLE; + dsa->z_stencil_control |= + (translate_depth_stencil_function(state->stencil[0].func) << + R300_S_FRONT_FUNC_SHIFT) | + (translate_stencil_op(state->stencil[0].fail_op) << + R300_S_FRONT_SFAIL_OP_SHIFT) | + (translate_stencil_op(state->stencil[0].zpass_op) << + R300_S_FRONT_ZPASS_OP_SHIFT) | + (translate_stencil_op(state->stencil[0].zfail_op) << + R300_S_FRONT_ZFAIL_OP_SHIFT); + + dsa->stencil_ref_mask = (state->stencil[0].ref_value) | + (state->stencil[0].valuemask << R300_STENCILMASK_SHIFT) | + (state->stencil[0].writemask << R300_STENCILWRITEMASK_SHIFT); + + if (state->stencil[1].enabled) { + dsa->z_buffer_control |= R300_STENCIL_FRONT_BACK; + dsa->z_stencil_control |= + (translate_depth_stencil_function(state->stencil[1].func) << + R300_S_BACK_FUNC_SHIFT) | + (translate_stencil_op(state->stencil[1].fail_op) << + R300_S_BACK_SFAIL_OP_SHIFT) | + (translate_stencil_op(state->stencil[1].zpass_op) << + R300_S_BACK_ZPASS_OP_SHIFT) | + (translate_stencil_op(state->stencil[1].zfail_op) << + R300_S_BACK_ZFAIL_OP_SHIFT); + + dsa->stencil_ref_bf = (state->stencil[1].ref_value) | + (state->stencil[1].valuemask << R300_STENCILMASK_SHIFT) | + (state->stencil[1].writemask << R300_STENCILWRITEMASK_SHIFT); + } + } + + /* Alpha test setup. */ + if (state->alpha.enabled) { + dsa->alpha_function = translate_alpha_function(state->alpha.func) | + R300_FG_ALPHA_FUNC_ENABLE; + dsa->alpha_reference = CLAMP(state->alpha.ref_value * 1023.0f, + 0, 1023); + } else { + dsa->z_buffer_top = R300_ZTOP_ENABLE; + } + + return (void*)dsa; +} + +/* Bind DSA state. */ +static void r300_bind_dsa_state(struct pipe_context* pipe, + void* state) +{ + struct r300_context* r300 = r300_context(pipe); + + r300->dsa_state = (struct r300_dsa_state*)state; + r300->dirty_state |= R300_NEW_DSA; +} + +/* Free DSA state. */ +static void r300_delete_dsa_state(struct pipe_context* pipe, + void* state) +{ + FREE(state); +} + +static void r300_set_edgeflags(struct pipe_context* pipe, + const unsigned* bitfield) +{ + /* XXX you know it's bad when i915 has this blank too */ +} + +static void + r300_set_framebuffer_state(struct pipe_context* pipe, + const struct pipe_framebuffer_state* state) +{ + struct r300_context* r300 = r300_context(pipe); + + draw_flush(r300->draw); + + r300->framebuffer_state = *state; + + r300->dirty_state |= R300_NEW_FRAMEBUFFERS; +} + +/* Create fragment shader state. */ +static void* r300_create_fs_state(struct pipe_context* pipe, + const struct pipe_shader_state* shader) +{ + struct r300_context* r300 = r300_context(pipe); + struct r3xx_fragment_shader* fs = NULL; + + if (r300_screen(r300->context.screen)->caps->is_r500) { + fs = + (struct r3xx_fragment_shader*)CALLOC_STRUCT(r500_fragment_shader); + } else { + fs = + (struct r3xx_fragment_shader*)CALLOC_STRUCT(r300_fragment_shader); + } + + /* Copy state directly into shader. */ + fs->state = *shader; + + tgsi_scan_shader(shader->tokens, &fs->info); + + return (void*)fs; +} + +/* Bind fragment shader state. */ +static void r300_bind_fs_state(struct pipe_context* pipe, void* shader) +{ + struct r300_context* r300 = r300_context(pipe); + struct r3xx_fragment_shader* fs = (struct r3xx_fragment_shader*)shader; + + if (fs == NULL) { + r300->fs = NULL; + return; + } else if (!fs->translated) { + if (r300_screen(r300->context.screen)->caps->is_r500) { + r500_translate_fragment_shader(r300, (struct r500_fragment_shader*)fs); + } else { + r300_translate_fragment_shader(r300, (struct r300_fragment_shader*)fs); + } + } + + fs->translated = true; + r300->fs = fs; + + r300->dirty_state |= R300_NEW_FRAGMENT_SHADER; +} + +/* Delete fragment shader state. */ +static void r300_delete_fs_state(struct pipe_context* pipe, void* shader) +{ + FREE(shader); +} + +static void r300_set_polygon_stipple(struct pipe_context* pipe, + const struct pipe_poly_stipple* state) +{ + /* XXX */ +} + +static INLINE int pack_float_16_6x(float f) { + return ((int)(f * 6.0) & 0xffff); +} + +/* Create a new rasterizer state based on the CSO rasterizer state. + * + * This is a very large chunk of state, and covers most of the graphics + * backend (GB), geometry assembly (GA), and setup unit (SU) blocks. + * + * In a not entirely unironic sidenote, this state has nearly nothing to do + * with the actual block on the Radeon called the rasterizer (RS). */ +static void* r300_create_rs_state(struct pipe_context* pipe, + const struct pipe_rasterizer_state* state) +{ + struct r300_rs_state* rs = CALLOC_STRUCT(r300_rs_state); + + /* XXX this is part of HW TCL */ + /* XXX endian control */ + rs->vap_control_status = R300_VAP_TCL_BYPASS; + + rs->point_size = pack_float_16_6x(state->point_size) | + (pack_float_16_6x(state->point_size) << R300_POINTSIZE_X_SHIFT); + + rs->line_control = pack_float_16_6x(state->line_width) | + R300_GA_LINE_CNTL_END_TYPE_COMP; + + /* Radeons don't think in "CW/CCW", they think in "front/back". */ + if (state->front_winding == PIPE_WINDING_CW) { + rs->cull_mode = R300_FRONT_FACE_CW; + + if (state->offset_cw) { + rs->polygon_offset_enable |= R300_FRONT_ENABLE; + } + if (state->offset_ccw) { + rs->polygon_offset_enable |= R300_BACK_ENABLE; + } + } else { + rs->cull_mode = R300_FRONT_FACE_CCW; + + if (state->offset_ccw) { + rs->polygon_offset_enable |= R300_FRONT_ENABLE; + } + if (state->offset_cw) { + rs->polygon_offset_enable |= R300_BACK_ENABLE; + } + } + if (state->front_winding & state->cull_mode) { + rs->cull_mode |= R300_CULL_FRONT; + } + if (~(state->front_winding) & state->cull_mode) { + rs->cull_mode |= R300_CULL_BACK; + } + + if (rs->polygon_offset_enable) { + rs->depth_offset_front = rs->depth_offset_back = + pack_float_32(state->offset_units); + rs->depth_scale_front = rs->depth_scale_back = + pack_float_32(state->offset_scale); + } + + if (state->line_stipple_enable) { + rs->line_stipple_config = + R300_GA_LINE_STIPPLE_CONFIG_LINE_RESET_LINE | + (pack_float_32((float)state->line_stipple_factor) & + R300_GA_LINE_STIPPLE_CONFIG_STIPPLE_SCALE_MASK); + /* XXX this might need to be scaled up */ + rs->line_stipple_value = state->line_stipple_pattern; + } + + rs->rs = *state; + + return (void*)rs; +} + +/* Bind rasterizer state. */ +static void r300_bind_rs_state(struct pipe_context* pipe, void* state) +{ + struct r300_context* r300 = r300_context(pipe); + struct r300_rs_state* rs = (struct r300_rs_state*)state; + + draw_set_rasterizer_state(r300->draw, &rs->rs); + + r300->rs_state = rs; + r300->dirty_state |= R300_NEW_RASTERIZER; +} + +/* Free rasterizer state. */ +static void r300_delete_rs_state(struct pipe_context* pipe, void* state) +{ + FREE(state); +} + +static uint32_t translate_wrap(int wrap) { + switch (wrap) { + case PIPE_TEX_WRAP_REPEAT: + return R300_TX_REPEAT; + case PIPE_TEX_WRAP_CLAMP: + return R300_TX_CLAMP; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return R300_TX_CLAMP_TO_EDGE; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + return R300_TX_CLAMP_TO_BORDER; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + return R300_TX_REPEAT | R300_TX_MIRRORED; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + return R300_TX_CLAMP | R300_TX_MIRRORED; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + return R300_TX_CLAMP_TO_EDGE | R300_TX_MIRRORED; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + return R300_TX_CLAMP_TO_EDGE | R300_TX_MIRRORED; + default: + debug_printf("r300: Unknown texture wrap %d", wrap); + return 0; + } +} + +static uint32_t translate_tex_filters(int min, int mag, int mip) { + uint32_t retval = 0; + switch (min) { + case PIPE_TEX_FILTER_NEAREST: + retval |= R300_TX_MIN_FILTER_NEAREST; + case PIPE_TEX_FILTER_LINEAR: + retval |= R300_TX_MIN_FILTER_LINEAR; + case PIPE_TEX_FILTER_ANISO: + retval |= R300_TX_MIN_FILTER_ANISO; + default: + debug_printf("r300: Unknown texture filter %d", min); + break; + } + switch (mag) { + case PIPE_TEX_FILTER_NEAREST: + retval |= R300_TX_MAG_FILTER_NEAREST; + case PIPE_TEX_FILTER_LINEAR: + retval |= R300_TX_MAG_FILTER_LINEAR; + case PIPE_TEX_FILTER_ANISO: + retval |= R300_TX_MAG_FILTER_ANISO; + default: + debug_printf("r300: Unknown texture filter %d", mag); + break; + } + switch (mip) { + case PIPE_TEX_MIPFILTER_NONE: + retval |= R300_TX_MIN_FILTER_MIP_NONE; + case PIPE_TEX_MIPFILTER_NEAREST: + retval |= R300_TX_MIN_FILTER_MIP_NEAREST; + case PIPE_TEX_MIPFILTER_LINEAR: + retval |= R300_TX_MIN_FILTER_MIP_LINEAR; + default: + debug_printf("r300: Unknown texture filter %d", mip); + break; + } + + return retval; +} + +static uint32_t anisotropy(float max_aniso) { + if (max_aniso >= 16.0f) { + return R300_TX_MAX_ANISO_16_TO_1; + } else if (max_aniso >= 8.0f) { + return R300_TX_MAX_ANISO_8_TO_1; + } else if (max_aniso >= 4.0f) { + return R300_TX_MAX_ANISO_4_TO_1; + } else if (max_aniso >= 2.0f) { + return R300_TX_MAX_ANISO_2_TO_1; + } else { + return R300_TX_MAX_ANISO_1_TO_1; + } +} + +static void* + r300_create_sampler_state(struct pipe_context* pipe, + const struct pipe_sampler_state* state) +{ + struct r300_context* r300 = r300_context(pipe); + struct r300_sampler_state* sampler = CALLOC_STRUCT(r300_sampler_state); + int lod_bias; + + sampler->filter0 |= + (translate_wrap(state->wrap_s) << R300_TX_WRAP_S_SHIFT) | + (translate_wrap(state->wrap_t) << R300_TX_WRAP_T_SHIFT) | + (translate_wrap(state->wrap_r) << R300_TX_WRAP_R_SHIFT); + + sampler->filter0 |= translate_tex_filters(state->min_img_filter, + state->mag_img_filter, + state->min_mip_filter); + + lod_bias = CLAMP((int)(state->lod_bias * 32), -(1 << 9), (1 << 9) - 1); + + sampler->filter1 |= lod_bias << R300_LOD_BIAS_SHIFT; + + sampler->filter1 |= anisotropy(state->max_anisotropy); + + util_pack_color(state->border_color, PIPE_FORMAT_A8R8G8B8_UNORM, + &sampler->border_color); + + /* R500-specific fixups and optimizations */ + if (r300_screen(r300->context.screen)->caps->is_r500) { + sampler->filter1 |= R500_BORDER_FIX; + } + + return (void*)sampler; +} + +static void r300_bind_sampler_states(struct pipe_context* pipe, + unsigned count, + void** states) +{ + struct r300_context* r300 = r300_context(pipe); + int i; + + if (count > 8) { + return; + } + + for (i = 0; i < count; i++) { + if (r300->sampler_states[i] != states[i]) { + r300->sampler_states[i] = (struct r300_sampler_state*)states[i]; + r300->dirty_state |= (R300_NEW_SAMPLER << i); + } + } + + r300->sampler_count = count; +} + +static void r300_delete_sampler_state(struct pipe_context* pipe, void* state) +{ + FREE(state); +} + +static void r300_set_sampler_textures(struct pipe_context* pipe, + unsigned count, + struct pipe_texture** texture) +{ + struct r300_context* r300 = r300_context(pipe); + int i; + + /* XXX magic num */ + if (count > 8) { + return; + } + + for (i = 0; i < count; i++) { + if (r300->textures[i] != (struct r300_texture*)texture[i]) { + pipe_texture_reference((struct pipe_texture**)&r300->textures[i], + texture[i]); + r300->dirty_state |= (R300_NEW_TEXTURE << i); + } + } + + for (i = count; i < 8; i++) { + if (r300->textures[i]) { + pipe_texture_reference((struct pipe_texture**)&r300->textures[i], + NULL); + r300->dirty_state |= (R300_NEW_TEXTURE << i); + } + } + + r300->texture_count = count; +} + +static void r300_set_scissor_state(struct pipe_context* pipe, + const struct pipe_scissor_state* state) +{ + struct r300_context* r300 = r300_context(pipe); + draw_flush(r300->draw); + + r300->scissor_state->scissor_top_left = + (state->minx << R300_SCISSORS_X_SHIFT) | + (state->miny << R300_SCISSORS_Y_SHIFT); + r300->scissor_state->scissor_bottom_right = + (state->maxx << R300_SCISSORS_X_SHIFT) | + (state->maxy << R300_SCISSORS_Y_SHIFT); + + r300->dirty_state |= R300_NEW_SCISSOR; +} + +static void r300_set_viewport_state(struct pipe_context* pipe, + const struct pipe_viewport_state* state) +{ + struct r300_context* r300 = r300_context(pipe); + /* XXX handing this off to Draw for now */ + draw_set_viewport_state(r300->draw, state); +} + +static void r300_set_vertex_buffers(struct pipe_context* pipe, + unsigned count, + const struct pipe_vertex_buffer* buffers) +{ + struct r300_context* r300 = r300_context(pipe); + + memcpy(r300->vertex_buffers, buffers, + sizeof(struct pipe_vertex_buffer) * count); + + r300->vertex_buffer_count = count; + + draw_flush(r300->draw); + draw_set_vertex_buffers(r300->draw, count, buffers); +} + +static void r300_set_vertex_elements(struct pipe_context* pipe, + unsigned count, + const struct pipe_vertex_element* elements) +{ + struct r300_context* r300 = r300_context(pipe); + /* XXX Draw */ + draw_flush(r300->draw); + draw_set_vertex_elements(r300->draw, count, elements); +} + +static void* r300_create_vs_state(struct pipe_context* pipe, + const struct pipe_shader_state* state) +{ + struct r300_context* context = r300_context(pipe); + /* XXX handing this off to Draw for now */ + return draw_create_vertex_shader(context->draw, state); +} + +static void r300_bind_vs_state(struct pipe_context* pipe, void* state) { + struct r300_context* context = r300_context(pipe); + /* XXX handing this off to Draw for now */ + draw_bind_vertex_shader(context->draw, (struct draw_vertex_shader*)state); +} + +static void r300_delete_vs_state(struct pipe_context* pipe, void* state) +{ + struct r300_context* context = r300_context(pipe); + /* XXX handing this off to Draw for now */ + draw_delete_vertex_shader(context->draw, (struct draw_vertex_shader*)state); +} + +void r300_init_state_functions(struct r300_context* r300) +{ + r300->context.create_blend_state = r300_create_blend_state; + r300->context.bind_blend_state = r300_bind_blend_state; + r300->context.delete_blend_state = r300_delete_blend_state; + + r300->context.set_blend_color = r300_set_blend_color; + + r300->context.set_clip_state = r300_set_clip_state; + + r300->context.set_constant_buffer = r300_set_constant_buffer; + + r300->context.create_depth_stencil_alpha_state = r300_create_dsa_state; + r300->context.bind_depth_stencil_alpha_state = r300_bind_dsa_state; + r300->context.delete_depth_stencil_alpha_state = r300_delete_dsa_state; + + r300->context.set_edgeflags = r300_set_edgeflags; + + r300->context.set_framebuffer_state = r300_set_framebuffer_state; + + r300->context.create_fs_state = r300_create_fs_state; + r300->context.bind_fs_state = r300_bind_fs_state; + r300->context.delete_fs_state = r300_delete_fs_state; + + r300->context.set_polygon_stipple = r300_set_polygon_stipple; + + r300->context.create_rasterizer_state = r300_create_rs_state; + r300->context.bind_rasterizer_state = r300_bind_rs_state; + r300->context.delete_rasterizer_state = r300_delete_rs_state; + + r300->context.create_sampler_state = r300_create_sampler_state; + r300->context.bind_sampler_states = r300_bind_sampler_states; + r300->context.delete_sampler_state = r300_delete_sampler_state; + + r300->context.set_sampler_textures = r300_set_sampler_textures; + + r300->context.set_scissor_state = r300_set_scissor_state; + + r300->context.set_viewport_state = r300_set_viewport_state; + + r300->context.set_vertex_buffers = r300_set_vertex_buffers; + r300->context.set_vertex_elements = r300_set_vertex_elements; + + r300->context.create_vs_state = r300_create_vs_state; + r300->context.bind_vs_state = r300_bind_vs_state; + r300->context.delete_vs_state = r300_delete_vs_state; +} diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c new file mode 100644 index 0000000000..a51904096f --- /dev/null +++ b/src/gallium/drivers/r300/r300_state_derived.c @@ -0,0 +1,196 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "r300_state_derived.h" + +/* r300_state_derived: Various bits of state which are dependent upon + * currently bound CSO data. */ + +static uint32_t translate_vertex_data_type(int type) { + switch (type) { + case EMIT_1F: + case EMIT_1F_PSIZE: + return R300_DATA_TYPE_FLOAT_1; + break; + case EMIT_2F: + return R300_DATA_TYPE_FLOAT_2; + break; + case EMIT_3F: + return R300_DATA_TYPE_FLOAT_3; + break; + case EMIT_4F: + return R300_DATA_TYPE_FLOAT_4; + break; + default: + debug_printf("r300: Implementation error: " + "Bad vertex data type!\n"); + break; + } + + return 0; +} + +/* Update the vertex_info struct in our r300_context. + * + * The vertex_info struct describes the post-TCL format of vertices. It is + * required for Draw when doing SW TCL, and also for describing the + * dreaded RS block on R300 chipsets. */ +/* XXX this function should be able to handle vert shaders as well as draw */ +static void r300_update_vertex_layout(struct r300_context* r300) +{ + struct vertex_info vinfo; + boolean pos = false, psize = false, fog = false; + int i, texs = 0, cols = 0; + + struct tgsi_shader_info* info = &r300->fs->info; + memset(&vinfo, 0, sizeof(vinfo)); + + assert(info->num_inputs <= 16); + + /* This is rather lame. Since draw_find_vs_output doesn't return an error + * when it can't find an output, we have to pre-iterate and count each + * output ourselves. */ + for (i = 0; i < info->num_inputs; i++) { + switch (info->input_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + pos = true; + break; + case TGSI_SEMANTIC_COLOR: + cols++; + break; + case TGSI_SEMANTIC_FOG: + fog = true; + break; + case TGSI_SEMANTIC_PSIZE: + psize = true; + break; + case TGSI_SEMANTIC_GENERIC: + texs++; + break; + default: + debug_printf("r300: Unknown vertex input %d\n", + info->input_semantic_name[i]); + break; + } + } + + /* Do the actual vertex_info setup. + * + * vertex_info has four uints of hardware-specific data in it. + * vinfo.hwfmt[0] is R300_VAP_VTX_STATE_CNTL + * vinfo.hwfmt[1] is R300_VAP_VSM_VTX_ASSM + * vinfo.hwfmt[2] is R300_VAP_OUTPUT_VTX_FMT_0 + * vinfo.hwfmt[3] is R300_VAP_OUTPUT_VTX_FMT_1 */ + + vinfo.hwfmt[0] = 0x5555; /* XXX this is classic Mesa bonghits */ + + if (!pos) { + debug_printf("r300: Forcing vertex position attribute emit...\n"); + } + + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_POS, + draw_find_vs_output(r300->draw, TGSI_SEMANTIC_POSITION, 0)); + vinfo.hwfmt[1] |= R300_INPUT_CNTL_POS; + vinfo.hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__POS_PRESENT; + + if (psize) { + draw_emit_vertex_attr(&vinfo, EMIT_1F_PSIZE, INTERP_LINEAR, + draw_find_vs_output(r300->draw, TGSI_SEMANTIC_PSIZE, 0)); + vinfo.hwfmt[2] |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT; + } + + for (i = 0; i < cols; i++) { + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, + draw_find_vs_output(r300->draw, TGSI_SEMANTIC_COLOR, i)); + vinfo.hwfmt[1] |= R300_INPUT_CNTL_COLOR; + vinfo.hwfmt[2] |= (R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << i); + } + + if (fog) { + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_PERSPECTIVE, + draw_find_vs_output(r300->draw, TGSI_SEMANTIC_FOG, 0)); + vinfo.hwfmt[2] |= + (R300_VAP_OUTPUT_VTX_FMT_0__COLOR_0_PRESENT << cols); + } + + for (i = 0; i < texs; i++) { + draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, + draw_find_vs_output(r300->draw, TGSI_SEMANTIC_GENERIC, i)); + vinfo.hwfmt[1] |= (R300_INPUT_CNTL_TC0 << i); + vinfo.hwfmt[3] |= (4 << (3 * i)); + } + + draw_compute_vertex_size(&vinfo); + + if (memcmp(&r300->vertex_info, &vinfo, sizeof(struct vertex_info))) { + uint32_t temp; + +#define BORING_SWIZZLE \ + ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_SHIFT) | \ + (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_SHIFT) | \ + (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_SHIFT) | \ + (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_SHIFT) | \ + (0xf << R300_WRITE_ENA_SHIFT)) + + for (i = 0; i < vinfo.num_attribs; i++) { + temp = translate_vertex_data_type(vinfo.attrib[i].emit) | + R300_SIGNED; + if (i & 1) { + r300->vertex_info.vap_prog_stream_cntl[i >> 1] &= 0xffff0000; + r300->vertex_info.vap_prog_stream_cntl[i >> 1] |= + (translate_vertex_data_type(vinfo.attrib[i].emit) | + R300_SIGNED) << 16; + } else { + r300->vertex_info.vap_prog_stream_cntl[i >> 1] &= 0xffff; + r300->vertex_info.vap_prog_stream_cntl[i >> 1] |= + translate_vertex_data_type(vinfo.attrib[i].emit) | + R300_SIGNED; + } + + r300->vertex_info.vap_prog_stream_cntl_ext[i >> 1] |= + (BORING_SWIZZLE << (i & 1 ? 16 : 0)); + } + r300->vertex_info.vap_prog_stream_cntl[i >> 1] |= (R300_LAST_VEC << + (i & 1 ? 16 : 0)); + + memcpy(&r300->vertex_info, &vinfo, sizeof(struct vertex_info)); + r300->dirty_state |= R300_NEW_VERTEX_FORMAT; + } +} + +/* Set up the RS block. This is the part of the chipset that actually does + * the rasterization of vertices into fragments. This is also the part of the + * chipset that locks up if any part of it is even slightly wrong. */ +void r300_update_rs_block(struct r300_context* r300) +{ +} + +void r300_update_derived_state(struct r300_context* r300) +{ + if (r300->dirty_state & R300_NEW_FRAGMENT_SHADER) { + r300_update_vertex_layout(r300); + } + + if (r300->dirty_state & R300_NEW_VERTEX_FORMAT) { + r300_update_rs_block(r300); + } +} diff --git a/src/gallium/drivers/r300/r300_state_derived.h b/src/gallium/drivers/r300/r300_state_derived.h new file mode 100644 index 0000000000..72ba6b928d --- /dev/null +++ b/src/gallium/drivers/r300/r300_state_derived.h @@ -0,0 +1,33 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_STATE_DERIVED_H +#define R300_STATE_DERIVED_H + +#include "draw/draw_vertex.h" + +#include "r300_context.h" +#include "r300_reg.h" + +void r300_update_derived_state(struct r300_context* r300); + +#endif /* R300_STATE_DERIVED_H */ diff --git a/src/gallium/drivers/r300/r300_state_inlines.h b/src/gallium/drivers/r300/r300_state_inlines.h new file mode 100644 index 0000000000..005fb74ed6 --- /dev/null +++ b/src/gallium/drivers/r300/r300_state_inlines.h @@ -0,0 +1,83 @@ +/* + * Copyright 2009 Joakim Sindholt <opensource@zhasha.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_STATE_INLINES_H +#define R300_STATE_INLINES_H + +#include "pipe/p_format.h" + +#include "r300_reg.h" + +static INLINE uint32_t r300_translate_colorformat(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + return R300_COLOR_FORMAT_ARGB8888; + case PIPE_FORMAT_I8_UNORM: + return R300_COLOR_FORMAT_I8; + case PIPE_FORMAT_A1R5G5B5_UNORM: + return R300_COLOR_FORMAT_ARGB1555; + case PIPE_FORMAT_R5G6B5_UNORM: + return R300_COLOR_FORMAT_RGB565; + /* XXX Not in pipe_format + case PIPE_FORMAT_A32R32G32B32: + return R300_COLOR_FORMAT_ARGB32323232; + case PIPE_FORMAT_A16R16G16B16: + return R300_COLOR_FORMAT_ARGB16161616; */ + case PIPE_FORMAT_A4R4G4B4_UNORM: + return R300_COLOR_FORMAT_ARGB4444; + /* XXX Not in pipe_format + case PIPE_FORMAT_A10R10G10B10_UNORM: + return R500_COLOR_FORMAT_ARGB10101010; + case PIPE_FORMAT_A2R10G10B10_UNORM: + return R500_COLOR_FORMAT_ARGB2101010; + case PIPE_FORMAT_I10_UNORM: + return R500_COLOR_FORMAT_I10; */ + default: + debug_printf("r300: Implementation error: " \ + "Got unsupported color format %s in %s\n", + pf_name(format), __FUNCTION__); + break; + } + + return 0; +} + +static INLINE uint32_t r300_translate_zsformat(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + return R300_DEPTHFORMAT_16BIT_INT_Z; + /* XXX R300_DEPTHFORMAT_16BIT_13E3 anyone? */ + case PIPE_FORMAT_Z24S8_UNORM: + return R300_DEPTHFORMAT_24BIT_INT_Z_8BIT_STENCIL; + default: + debug_printf("r300: Implementation error: " \ + "Got unsupported ZS format %s in %s\n", + pf_name(format), __FUNCTION__); + break; + } + + return 0; +} + +#endif /* R300_STATE_INLINES_H */ diff --git a/src/gallium/drivers/r300/r300_state_shader.c b/src/gallium/drivers/r300/r300_state_shader.c new file mode 100644 index 0000000000..d10ac55580 --- /dev/null +++ b/src/gallium/drivers/r300/r300_state_shader.c @@ -0,0 +1,53 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "r300_state_shader.h" + +static void r300_copy_passthrough_shader(struct r300_fragment_shader* fs) +{ + struct r300_fragment_shader* pt = &r300_passthrough_fragment_shader; + fs->shader.stack_size = pt->shader.stack_size; + fs->alu_instruction_count = pt->alu_instruction_count; + fs->tex_instruction_count = pt->tex_instruction_count; + fs->indirections = pt->indirections; + fs->instructions[0] = pt->instructions[0]; +} + +static void r500_copy_passthrough_shader(struct r500_fragment_shader* fs) +{ + struct r500_fragment_shader* pt = &r500_passthrough_fragment_shader; + fs->shader.stack_size = pt->shader.stack_size; + fs->instruction_count = pt->instruction_count; + fs->instructions[0] = pt->instructions[0]; +} + +void r300_translate_fragment_shader(struct r300_context* r300, + struct r300_fragment_shader* fs) +{ + r300_copy_passthrough_shader(fs); +} + +void r500_translate_fragment_shader(struct r300_context* r300, + struct r500_fragment_shader* fs) +{ + r500_copy_passthrough_shader(fs); +} diff --git a/src/gallium/drivers/r300/r300_state_shader.h b/src/gallium/drivers/r300/r300_state_shader.h new file mode 100644 index 0000000000..73025b2dcc --- /dev/null +++ b/src/gallium/drivers/r300/r300_state_shader.h @@ -0,0 +1,90 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_STATE_SHADER_H +#define R300_STATE_SHADER_H + +#include "r300_context.h" +#include "r300_reg.h" +#include "r300_screen.h" + +void r300_translate_fragment_shader(struct r300_context* r300, + struct r300_fragment_shader* fs); + +void r500_translate_fragment_shader(struct r300_context* r300, + struct r500_fragment_shader* fs); + +static const struct r300_fragment_shader r300_passthrough_fragment_shader = { + /* XXX This is the emission code. TODO: decode + OUT_CS_REG(R300_US_CONFIG, 0); + OUT_CS_REG(R300_US_CODE_OFFSET, 0x0); + OUT_CS_REG(R300_US_CODE_ADDR_0, 0x0); + OUT_CS_REG(R300_US_CODE_ADDR_1, 0x0); + OUT_CS_REG(R300_US_CODE_ADDR_2, 0x0); + OUT_CS_REG(R300_US_CODE_ADDR_3, 0x400000); +*/ + .alu_instruction_count = 1, + .tex_instruction_count = 0, + .indirections = 1, + .shader.stack_size = 2, + + .instructions[0].alu_rgb_inst = R300_RGB_SWIZA(R300_ALU_ARGC_SRC0C_XYZ) | + R300_RGB_SWIZB(R300_ALU_ARGC_ONE) | + R300_RGB_SWIZC(R300_ALU_ARGC_ZERO) | + R300_ALU_OUTC_MAD, + .instructions[0].alu_rgb_addr = R300_RGB_ADDR0(0) | R300_RGB_ADDR1(0) | + R300_RGB_ADDR2(0) | R300_ALU_DSTC_OUTPUT_XYZ, + .instructions[0].alu_alpha_inst = R300_ALPHA_SWIZA(R300_ALU_ARGA_SRC0A) | + R300_ALPHA_SWIZB(R300_ALU_ARGA_ONE) | + R300_ALPHA_SWIZC(R300_ALU_ARGA_ZERO) | + R300_ALU_OUTA_MAD, + .instructions[0].alu_alpha_addr = R300_ALPHA_ADDR0(0) | + R300_ALPHA_ADDR1(0) | R300_ALPHA_ADDR2(0) | R300_ALU_DSTA_OUTPUT, +}; + +static const struct r500_fragment_shader r500_passthrough_fragment_shader = { + .shader.stack_size = 0, + .instruction_count = 1, + .instructions[0].inst0 = R500_INST_TYPE_OUT | + R500_INST_TEX_SEM_WAIT | R500_INST_LAST | + R500_INST_RGB_OMASK_RGB | R500_INST_ALPHA_OMASK | + R500_INST_RGB_CLAMP | R500_INST_ALPHA_CLAMP, + .instructions[0].inst1 = + R500_RGB_ADDR0(0) | R500_RGB_ADDR1(0) | R500_RGB_ADDR1_CONST | + R500_RGB_ADDR2(0) | R500_RGB_ADDR2_CONST, + .instructions[0].inst2 = + R500_ALPHA_ADDR0(0) | R500_ALPHA_ADDR1(0) | R500_ALPHA_ADDR1_CONST | + R500_ALPHA_ADDR2(0) | R500_ALPHA_ADDR2_CONST, + .instructions[0].inst3 = + R500_ALU_RGB_SEL_A_SRC0 | R500_ALU_RGB_R_SWIZ_A_R | + R500_ALU_RGB_G_SWIZ_A_G | R500_ALU_RGB_B_SWIZ_A_B | + R500_ALU_RGB_SEL_B_SRC0 | R500_ALU_RGB_R_SWIZ_B_R | + R500_ALU_RGB_B_SWIZ_B_G | R500_ALU_RGB_G_SWIZ_B_B, + .instructions[0].inst4 = + R500_ALPHA_OP_CMP | R500_ALPHA_SWIZ_A_A | R500_ALPHA_SWIZ_B_A, + .instructions[0].inst5 = + R500_ALU_RGBA_OP_CMP | R500_ALU_RGBA_R_SWIZ_0 | + R500_ALU_RGBA_G_SWIZ_0 | R500_ALU_RGBA_B_SWIZ_0 | + R500_ALU_RGBA_A_SWIZ_0, +}; + +#endif /* R300_STATE_SHADER_H */ diff --git a/src/gallium/drivers/r300/r300_surface.c b/src/gallium/drivers/r300/r300_surface.c new file mode 100644 index 0000000000..49e4a96f83 --- /dev/null +++ b/src/gallium/drivers/r300/r300_surface.c @@ -0,0 +1,326 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * Joakim Sindholt <opensource@zhasha.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "r300_surface.h" + +/* Provides pipe_context's "surface_fill". Commonly used for clearing + * buffers. */ +static void r300_surface_fill(struct pipe_context* pipe, + struct pipe_surface* dest, + unsigned x, unsigned y, + unsigned w, unsigned h, + unsigned color) +{ + struct r300_context* r300 = r300_context(pipe); + CS_LOCALS(r300); + struct r300_capabilities* caps = ((struct r300_screen*)pipe->screen)->caps; + struct r300_texture* tex = (struct r300_texture*)dest->texture; + int i; + float r, g, b, a; + unsigned pixpitch = tex->stride / tex->tex.block.size; + r = (float)((color >> 16) & 0xff) / 255.0f; + g = (float)((color >> 8) & 0xff) / 255.0f; + b = (float)((color >> 0) & 0xff) / 255.0f; + debug_printf("r300: Filling surface %p at (%d,%d)," + " dimensions %dx%d (pixel pitch %d), color 0x%x\n", + dest, x, y, w, h, pixpitch, color); + + /* Fallback? */ + /*if (0) { + debug_printf("r300: Falling back on surface clear..."); + void* map = pipe->screen->surface_map(pipe->screen, dest, + PIPE_BUFFER_USAGE_CPU_WRITE); + pipe_fill_rect(map, &dest->block, &dest->stride, x, y, w, h, color); + pipe->screen->surface_unmap(pipe->screen, dest); + return; + }*/ + + BEGIN_CS(163 + (caps->is_r500 ? 22 : 14) + (caps->has_tcl ? 4 : 2)); + /* Flush PVS. */ + OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0); + + OUT_CS_REG(R300_SE_VTE_CNTL, R300_VPORT_X_SCALE_ENA | + R300_VPORT_X_OFFSET_ENA | R300_VPORT_Y_SCALE_ENA | + R300_VPORT_Y_OFFSET_ENA | R300_VPORT_Z_SCALE_ENA | + R300_VPORT_Z_OFFSET_ENA | R300_VTX_W0_FMT); + /* Vertex size. */ + OUT_CS_REG(R300_VAP_VTX_SIZE, 0x8); + /* Max and min vertex index clamp. */ + OUT_CS_REG(R300_VAP_VF_MAX_VTX_INDX, 0xFFFFFF); + OUT_CS_REG(R300_VAP_VF_MIN_VTX_INDX, 0x0); + /* XXX endian */ + OUT_CS_REG(R300_VAP_CNTL_STATUS, R300_VC_NO_SWAP); + OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_0, 0x0); + /* XXX magic number not in r300_reg */ + OUT_CS_REG(R300_VAP_PSC_SGN_NORM_CNTL, 0xAAAAAAAA); + OUT_CS_REG(R300_VAP_CLIP_CNTL, 0x0); + OUT_CS_REG_SEQ(R300_VAP_GB_VERT_CLIP_ADJ, 4); + OUT_CS_32F(1.0); + OUT_CS_32F(1.0); + OUT_CS_32F(1.0); + OUT_CS_32F(1.0); + /* XXX is this too long? */ + OUT_CS_REG(VAP_PVS_VTX_TIMEOUT_REG, 0xFFFF); + OUT_CS_REG(R300_GB_ENABLE, R300_GB_POINT_STUFF_ENABLE | + R300_GB_LINE_STUFF_ENABLE | R300_GB_TRIANGLE_STUFF_ENABLE); + /* XXX more magic numbers */ + OUT_CS_REG(R300_GB_MSPOS0, 0x66666666); + OUT_CS_REG(R300_GB_MSPOS1, 0x66666666); + /* XXX why doesn't classic Mesa write the number of pipes, too? */ + OUT_CS_REG(R300_GB_TILE_CONFIG, R300_GB_TILE_ENABLE | + R300_GB_TILE_SIZE_16); + OUT_CS_REG(R300_GB_SELECT, R300_GB_FOG_SELECT_1_1_W); + OUT_CS_REG(R300_GB_AA_CONFIG, 0x0); + /* XXX point tex stuffing */ + OUT_CS_REG_SEQ(R300_GA_POINT_S0, 1); + OUT_CS_32F(0.0); + OUT_CS_REG_SEQ(R300_GA_POINT_S1, 1); + OUT_CS_32F(1.0); + OUT_CS_REG(R300_GA_TRIANGLE_STIPPLE, 0x5 | + (0x5 << R300_GA_TRIANGLE_STIPPLE_Y_SHIFT_SHIFT)); + /* XXX should this be related to the actual point size? */ + OUT_CS_REG(R300_GA_POINT_MINMAX, 0x6 | + (0x1800 << R300_GA_POINT_MINMAX_MAX_SHIFT)); + /* XXX this big chunk should be refactored into rs_state */ + OUT_CS_REG(R300_GA_LINE_CNTL, 0x00030006); + OUT_CS_REG(R300_GA_LINE_STIPPLE_CONFIG, 0x3BAAAAAB); + OUT_CS_REG(R300_GA_LINE_STIPPLE_VALUE, 0x00000000); + OUT_CS_REG(R300_GA_LINE_S0, 0x00000000); + OUT_CS_REG(R300_GA_LINE_S1, 0x3F800000); + OUT_CS_REG(R300_GA_ENHANCE, 0x00000002); + OUT_CS_REG(R300_GA_COLOR_CONTROL, 0x0003AAAA); + OUT_CS_REG(R300_GA_SOLID_RG, 0x00000000); + OUT_CS_REG(R300_GA_SOLID_BA, 0x00000000); + OUT_CS_REG(R300_GA_POLY_MODE, 0x00000000); + OUT_CS_REG(R300_GA_ROUND_MODE, 0x00000001); + OUT_CS_REG(R300_GA_OFFSET, 0x00000000); + OUT_CS_REG(R300_GA_FOG_SCALE, 0x3DBF1412); + OUT_CS_REG(R300_GA_FOG_OFFSET, 0x00000000); + OUT_CS_REG(R300_SU_TEX_WRAP, 0x00000000); + OUT_CS_REG(R300_SU_POLY_OFFSET_FRONT_SCALE, 0x00000000); + OUT_CS_REG(R300_SU_POLY_OFFSET_FRONT_OFFSET, 0x00000000); + OUT_CS_REG(R300_SU_POLY_OFFSET_BACK_SCALE, 0x00000000); + OUT_CS_REG(R300_SU_POLY_OFFSET_BACK_OFFSET, 0x00000000); + OUT_CS_REG(R300_SU_POLY_OFFSET_ENABLE, 0x00000000); + OUT_CS_REG(R300_SU_CULL_MODE, 0x00000000); + OUT_CS_REG(R300_SU_DEPTH_SCALE, 0x4B7FFFFF); + OUT_CS_REG(R300_SU_DEPTH_OFFSET, 0x00000000); + OUT_CS_REG(R300_SC_HYPERZ, 0x0000001C); + OUT_CS_REG(R300_SC_EDGERULE, 0x2DA49525); + OUT_CS_REG(R300_FG_FOG_BLEND, 0x00000002); + OUT_CS_REG(R300_FG_FOG_COLOR_R, 0x00000000); + OUT_CS_REG(R300_FG_FOG_COLOR_G, 0x00000000); + OUT_CS_REG(R300_FG_FOG_COLOR_B, 0x00000000); + OUT_CS_REG(R300_FG_DEPTH_SRC, 0x00000000); + OUT_CS_REG(R300_FG_DEPTH_SRC, 0x00000000); + OUT_CS_REG(R300_RB3D_CCTL, 0x00000000); + OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0x0000000F); + + /* XXX: Oh the wonderful unknown. + * Not writing these 8 regs seems to make no difference at all and seeing + * as how they're not documented, we should leave them out for now. + OUT_CS_REG_SEQ(0x4E54, 8); + for (i = 0; i < 8; i++) { + OUT_CS(0x00000000); + } */ + OUT_CS_REG(R300_RB3D_AARESOLVE_CTL, 0x00000000); + OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_LTE_THRESHOLD, 0x00000000); + OUT_CS_REG(R500_RB3D_DISCARD_SRC_PIXEL_GTE_THRESHOLD, 0xFFFFFFFF); + OUT_CS_REG(R300_ZB_FORMAT, 0x00000002); + OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT, 0x00000003); + OUT_CS_REG(R300_ZB_BW_CNTL, 0x00000000); + OUT_CS_REG(R300_ZB_DEPTHCLEARVALUE, 0x00000000); + /* XXX Moar unknown that should probably be left out. + OUT_CS_REG(0x4F30, 0x00000000); + OUT_CS_REG(0x4F34, 0x00000000); */ + OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0x00000000); + OUT_CS_REG(R300_ZB_HIZ_PITCH, 0x00000000); + if (caps->has_tcl) { + OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_0, + (R300_DATA_TYPE_FLOAT_4 << R300_DATA_TYPE_0_SHIFT) | + ((R300_LAST_VEC | (1 << R300_DST_VEC_LOC_SHIFT) | + R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT)); + } else { + OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_0, + (R300_DATA_TYPE_FLOAT_4 << R300_DATA_TYPE_0_SHIFT) | + ((R300_LAST_VEC | (2 << R300_DST_VEC_LOC_SHIFT) | + R300_DATA_TYPE_FLOAT_4) << R300_DATA_TYPE_1_SHIFT)); + } + OUT_CS_REG(R300_FG_FOG_BLEND, 0x00000000); + OUT_CS_REG(R300_VAP_PROG_STREAM_CNTL_EXT_0, 0xF688F688); + OUT_CS_REG(R300_VAP_VTX_STATE_CNTL, 0x1); + OUT_CS_REG(R300_VAP_VSM_VTX_ASSM, 0x405); + OUT_CS_REG(R300_SE_VTE_CNTL, 0x0000043F); + OUT_CS_REG(R300_VAP_VTX_SIZE, 0x00000008); + OUT_CS_REG(R300_VAP_PSC_SGN_NORM_CNTL, 0xAAAAAAAA); + OUT_CS_REG(R300_VAP_OUTPUT_VTX_FMT_0, 0x00000003); + OUT_CS_REG(R300_VAP_OUTPUT_VTX_FMT_1, 0x00000000); + OUT_CS_REG(R300_TX_ENABLE, 0x0); + /* XXX viewport setup */ + OUT_CS_REG_SEQ(R300_SE_VPORT_XSCALE, 6); + OUT_CS_32F(1.0); + OUT_CS_32F((float)x); + OUT_CS_32F(1.0); + OUT_CS_32F((float)y); + OUT_CS_32F(1.0); + OUT_CS_32F(0.0); + + if (caps->has_tcl) { + OUT_CS_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE | + R300_PS_UCP_MODE_CLIP_AS_TRIFAN); + } + + /* The size of the point we're about to draw, in sixths of pixels */ + OUT_CS_REG(R300_GA_POINT_SIZE, + ((h * 6) & R300_POINTSIZE_Y_MASK) | + ((w * 6) << R300_POINTSIZE_X_SHIFT)); + + /* XXX */ + OUT_CS_REG(R300_SC_CLIP_RULE, 0xaaaa); + + /* Pixel scissors */ + OUT_CS_REG_SEQ(R300_SC_SCISSORS_TL, 2); + OUT_CS((x << R300_SCISSORS_X_SHIFT) | (y << R300_SCISSORS_Y_SHIFT)); + OUT_CS((w << R300_SCISSORS_X_SHIFT) | (h << R300_SCISSORS_Y_SHIFT)); + + /* RS block setup */ + if (caps->is_r500) { + /* XXX We seem to be in disagreement about how many of these we have + * RS:RS_IP_[0-15] [R/W] 32 bits Access: 8/16/32 MMReg:0x4074-0x40b0 + * Now that's from the docs. I don't care what the mesa driver says */ + OUT_CS_REG_SEQ(R500_RS_IP_0, 16); + for (i = 0; i < 16; i++) { + OUT_CS((R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) | + (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) | + (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) | + (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT)); + } + OUT_CS_REG_SEQ(R300_RS_COUNT, 2); + OUT_CS((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); + OUT_CS(0x00000000); + OUT_CS_REG(R500_RS_INST_0, R500_RS_INST_COL_CN_WRITE); + } else { + OUT_CS_REG_SEQ(R300_RS_IP_0, 8); + for (i = 0; i < 8; i++) { + OUT_CS(R300_RS_SEL_T(R300_RS_SEL_K0) | + R300_RS_SEL_R(R300_RS_SEL_K0) | R300_RS_SEL_Q(R300_RS_SEL_K1)); + } + OUT_CS_REG_SEQ(R300_RS_COUNT, 2); + OUT_CS((1 << R300_IC_COUNT_SHIFT) | R300_HIRES_EN); + /* XXX Shouldn't this be 0? */ + OUT_CS(1); + OUT_CS_REG(R300_RS_INST_0, R300_RS_INST_COL_CN_WRITE); + } + END_CS; + + /* Fragment shader setup */ + if (caps->is_r500) { + r500_emit_fragment_shader(r300, &r500_passthrough_fragment_shader); + } else { + r300_emit_fragment_shader(r300, &r300_passthrough_fragment_shader); + } + + BEGIN_CS(8 + (caps->has_tcl ? 20 : 2)); + OUT_CS_REG_SEQ(R300_US_OUT_FMT_0, 4); + OUT_CS(R300_C0_SEL_B | R300_C1_SEL_G | R300_C2_SEL_R | R300_C3_SEL_A); + OUT_CS(R300_US_OUT_FMT_UNUSED); + OUT_CS(R300_US_OUT_FMT_UNUSED); + OUT_CS(R300_US_OUT_FMT_UNUSED); + OUT_CS_REG(R300_US_W_FMT, R300_W_FMT_W0); + /* XXX these magic numbers should be explained when + * this becomes a cached state object */ + if (caps->has_tcl) { + OUT_CS_REG(R300_VAP_CNTL, 0xA | + (0x5 << R300_PVS_NUM_CNTLRS_SHIFT) | + (0xB << R300_VF_MAX_VTX_NUM_SHIFT) | + (caps->num_vert_fpus << R300_PVS_NUM_FPUS_SHIFT)); + OUT_CS_REG(R300_VAP_PVS_CODE_CNTL_0, 0x00100000); + OUT_CS_REG(R300_VAP_PVS_CONST_CNTL, 0x00000000); + OUT_CS_REG(R300_VAP_PVS_CODE_CNTL_1, 0x00000001); + /* XXX translate these back into normal instructions */ + OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x1); + OUT_CS_REG(R300_VAP_PVS_VECTOR_INDX_REG, 0x0); + OUT_CS_ONE_REG(R300_VAP_PVS_UPLOAD_DATA, 8); + OUT_CS(0x00F00203); + OUT_CS(0x00D10001); + OUT_CS(0x01248001); + OUT_CS(0x00000000); + OUT_CS(0x00F02203); + OUT_CS(0x00D10021); + OUT_CS(0x01248021); + OUT_CS(0x00000000); + } else { + OUT_CS_REG(R300_VAP_CNTL, 0xA | + (0x5 << R300_PVS_NUM_CNTLRS_SHIFT) | + (0x5 << R300_VF_MAX_VTX_NUM_SHIFT) | + (caps->num_vert_fpus << R300_PVS_NUM_FPUS_SHIFT)); + } + END_CS; + + r300_emit_blend_state(r300, &blend_clear_state); + r300_emit_blend_color_state(r300, &blend_color_clear_state); + r300_emit_dsa_state(r300, &dsa_clear_state); + + BEGIN_CS(24); + /* Flush colorbuffer and blend caches. */ + OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, + R300_RB3D_DSTCACHE_CTLSTAT_DC_FLUSH_FLUSH_DIRTY_3D | + R300_RB3D_DSTCACHE_CTLSTAT_DC_FINISH_SIGNAL); + OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT, + R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | + R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); + + OUT_CS_REG_SEQ(R300_RB3D_COLOROFFSET0, 1); + OUT_CS_RELOC(tex->buffer, 0, 0, RADEON_GEM_DOMAIN_VRAM, 0); + OUT_CS_REG(R300_RB3D_COLORPITCH0, pixpitch | + r300_translate_colorformat(tex->tex.format)); + OUT_CS_REG(RB3D_COLOR_CHANNEL_MASK, 0x0000000F); + /* XXX Packet3 */ + OUT_CS(CP_PACKET3(R200_3D_DRAW_IMMD_2, 8)); + OUT_CS(R300_PRIM_TYPE_POINT | R300_PRIM_WALK_RING | + (1 << R300_PRIM_NUM_VERTICES_SHIFT)); + OUT_CS_32F(w / 2.0); + OUT_CS_32F(h / 2.0); + /* XXX this should be the depth value to clear to */ + OUT_CS_32F(1.0); + OUT_CS_32F(1.0); + OUT_CS_32F(r); + OUT_CS_32F(g); + OUT_CS_32F(b); + OUT_CS_32F(1.0); + + /* XXX figure out why this is 0xA and not 0x2 */ + OUT_CS_REG(R300_RB3D_DSTCACHE_CTLSTAT, 0xA); + /* XXX OUT_CS_REG(R300_ZB_ZCACHE_CTLSTAT, + R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE | + R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE); */ + + END_CS; + + r300->dirty_hw++; +} + +void r300_init_surface_functions(struct r300_context* r300) +{ + r300->context.surface_fill = r300_surface_fill; +} diff --git a/src/gallium/drivers/r300/r300_surface.h b/src/gallium/drivers/r300/r300_surface.h new file mode 100644 index 0000000000..442eac2cf2 --- /dev/null +++ b/src/gallium/drivers/r300/r300_surface.h @@ -0,0 +1,60 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_SURFACE_H +#define R300_SURFACE_H + +#include "pipe/p_context.h" +#include "pipe/p_screen.h" + +#include "util/u_rect.h" + +#include "r300_context.h" +#include "r300_cs.h" +#include "r300_emit.h" +#include "r300_state_shader.h" +#include "r300_state_inlines.h" + +const struct r300_blend_state blend_clear_state = { + .blend_control = 0x0, + .alpha_blend_control = 0x0, + .rop = 0x0, + .dither = 0x0, +}; + +const struct r300_blend_color_state blend_color_clear_state = { + .blend_color = 0x0, + .blend_color_red_alpha = 0x0, + .blend_color_green_blue = 0x0, +}; + +const struct r300_dsa_state dsa_clear_state = { + .alpha_function = 0x0, + .alpha_reference = 0x0, + .z_buffer_control = 0x0, + .z_stencil_control = 0x0, + .stencil_ref_mask = R300_STENCILWRITEMASK_MASK, + .z_buffer_top = R300_ZTOP_ENABLE, + .stencil_ref_bf = 0x0, +}; + +#endif /* R300_SURFACE_H */ diff --git a/src/gallium/drivers/r300/r300_swtcl_emit.c b/src/gallium/drivers/r300/r300_swtcl_emit.c new file mode 100644 index 0000000000..5b028aaf7b --- /dev/null +++ b/src/gallium/drivers/r300/r300_swtcl_emit.c @@ -0,0 +1,327 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "draw/draw_pipe.h" +#include "draw/draw_vbuf.h" +#include "util/u_memory.h" + +#include "r300_cs.h" +#include "r300_context.h" +#include "r300_reg.h" + +/* r300_swtcl_emit: Vertex and index buffer primitive emission. No HW TCL. */ + +struct r300_swtcl_render { + /* Parent class */ + struct vbuf_render base; + + /* Pipe context */ + struct r300_context* r300; + + /* Vertex information */ + size_t vertex_size; + unsigned prim; + unsigned hwprim; + + /* VBO */ + struct pipe_buffer* vbo; + size_t vbo_size; + size_t vbo_offset; + void* vbo_map; + size_t vbo_alloc_size; + size_t vbo_max_used; +}; + +static INLINE struct r300_swtcl_render* +r300_swtcl_render(struct vbuf_render* render) +{ + return (struct r300_swtcl_render*)render; +} + +static const struct vertex_info* +r300_swtcl_render_get_vertex_info(struct vbuf_render* render) +{ + struct r300_swtcl_render* r300render = r300_swtcl_render(render); + struct r300_context* r300 = r300render->r300; + + r300_update_derived_state(r300); + + return &r300->vertex_info; +} + +static boolean r300_swtcl_render_allocate_vertices(struct vbuf_render* render, + ushort vertex_size, + ushort count) +{ + struct r300_swtcl_render* r300render = r300_swtcl_render(render); + struct r300_context* r300 = r300render->r300; + struct pipe_screen* screen = r300->context.screen; + size_t size = (size_t)vertex_size * (size_t)count; + + if (r300render->vbo) { + pipe_buffer_reference(screen, &r300render->vbo, NULL); + } + + r300render->vbo_size = MAX2(size, r300render->vbo_alloc_size); + r300render->vbo_offset = 0; + r300render->vbo = pipe_buffer_create(screen, + 64, + PIPE_BUFFER_USAGE_VERTEX, + r300render->vbo_size); + + r300render->vertex_size = vertex_size; + + if (r300render->vbo) { + return true; + } else { + return false; + } +} + +static void* r300_swtcl_render_map_vertices(struct vbuf_render* render) +{ + struct r300_swtcl_render* r300render = r300_swtcl_render(render); + struct pipe_screen* screen = r300render->r300->context.screen; + + r300render->vbo_map = pipe_buffer_map(screen, r300render->vbo, + PIPE_BUFFER_USAGE_CPU_WRITE); + + return (unsigned char*)r300render->vbo_map + r300render->vbo_offset; +} + +static void r300_swtcl_render_unmap_vertices(struct vbuf_render* render, + ushort min, + ushort max) +{ + struct r300_swtcl_render* r300render = r300_swtcl_render(render); + struct pipe_screen* screen = r300render->r300->context.screen; + + r300render->vbo_max_used = MAX2(r300render->vbo_max_used, + r300render->vertex_size * (max + 1)); + + pipe_buffer_unmap(screen, r300render->vbo); +} + +static void r300_swtcl_render_release_vertices(struct vbuf_render* render) +{ + struct r300_swtcl_render* r300render = r300_swtcl_render(render); + struct pipe_screen* screen = r300render->r300->context.screen; + + pipe_buffer_reference(screen, &r300render->vbo, NULL); +} + +static boolean r300_swtcl_render_set_primitive(struct vbuf_render* render, + unsigned prim) +{ + struct r300_swtcl_render* r300render = r300_swtcl_render(render); + r300render->prim = prim; + + switch (prim) { + case PIPE_PRIM_POINTS: + r300render->hwprim = R300_VAP_VF_CNTL__PRIM_POINTS; + break; + case PIPE_PRIM_LINES: + r300render->hwprim = R300_VAP_VF_CNTL__PRIM_LINES; + break; + case PIPE_PRIM_LINE_LOOP: + r300render->hwprim = R300_VAP_VF_CNTL__PRIM_LINE_LOOP; + break; + case PIPE_PRIM_LINE_STRIP: + r300render->hwprim = R300_VAP_VF_CNTL__PRIM_LINE_STRIP; + break; + case PIPE_PRIM_TRIANGLES: + r300render->hwprim = R300_VAP_VF_CNTL__PRIM_TRIANGLES; + break; + case PIPE_PRIM_TRIANGLE_STRIP: + r300render->hwprim = R300_VAP_VF_CNTL__PRIM_TRIANGLE_STRIP; + break; + case PIPE_PRIM_TRIANGLE_FAN: + r300render->hwprim = R300_VAP_VF_CNTL__PRIM_TRIANGLE_FAN; + break; + case PIPE_PRIM_QUADS: + r300render->hwprim = R300_VAP_VF_CNTL__PRIM_QUADS; + break; + case PIPE_PRIM_QUAD_STRIP: + r300render->hwprim = R300_VAP_VF_CNTL__PRIM_QUAD_STRIP; + break; + case PIPE_PRIM_POLYGON: + r300render->hwprim = R300_VAP_VF_CNTL__PRIM_POLYGON; + break; + default: + return false; + break; + } + + return true; +} + +static void prepare_render(struct r300_swtcl_render* render) +{ + struct r300_context* r300 = render->r300; + int i; + + CS_LOCALS(r300); + + /* Make sure that all possible state is emitted. */ + r300_emit_dirty_state(r300); + + debug_printf("r300: Preparing vertex buffer %p for render, " + "vertex size %d, vertex count %d\n", render->vbo, + r300->vertex_info.vinfo.size, render->vbo_size); + /* Set the pointer to our vertex buffer. The emitted values are this: + * PACKET3 [3D_LOAD_VBPNTR] + * COUNT [1] + * FORMAT [size | stride << 8] + * VBPNTR [relocated BO] + */ + BEGIN_CS(5); + OUT_CS(CP_PACKET3(R300_PACKET3_3D_LOAD_VBPNTR, 3)); + OUT_CS(1); + OUT_CS(r300->vertex_info.vinfo.size | (r300->vertex_info.vinfo.size << 8)); + OUT_CS_RELOC(render->vbo, 0, RADEON_GEM_DOMAIN_GTT, 0, 0); + END_CS; +} + +static void r300_swtcl_render_draw_arrays(struct vbuf_render* render, + unsigned start, + unsigned count) +{ + struct r300_swtcl_render* r300render = r300_swtcl_render(render); + struct r300_context* r300 = r300render->r300; + struct pipe_screen* screen = r300->context.screen; + + CS_LOCALS(r300); + + prepare_render(r300render); + + debug_printf("r300: Doing vbuf render, count %d\n", count); + + BEGIN_CS(2); + OUT_CS(CP_PACKET3(R300_PACKET3_3D_DRAW_VBUF_2, 0)); + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_VERTEX_LIST | (count << 16) | + r300render->hwprim | R300_VAP_VF_CNTL__INDEX_SIZE_32bit); + END_CS; +} + +static void r300_swtcl_render_draw(struct vbuf_render* render, + const ushort* indices, + uint count) +{ + struct r300_swtcl_render* r300render = r300_swtcl_render(render); + struct r300_context* r300 = r300render->r300; + struct pipe_screen* screen = r300->context.screen; + struct pipe_buffer* index_buffer; + void* index_map; + + CS_LOCALS(r300); + + prepare_render(r300render); + + /* Send our indices into an index buffer. */ + index_buffer = pipe_buffer_create(screen, 64, PIPE_BUFFER_USAGE_VERTEX, + count * 4); + if (!index_buffer) { + return; + } + + index_map = pipe_buffer_map(screen, index_buffer, + PIPE_BUFFER_USAGE_CPU_WRITE); + memcpy(index_map, indices, count * 4); + pipe_buffer_unmap(screen, index_buffer); + + debug_printf("r300: Doing indexbuf render, count %d\n", count); + + BEGIN_CS(5); + OUT_CS(CP_PACKET3(R300_PACKET3_3D_DRAW_INDX_2, 0)); + OUT_CS(R300_VAP_VF_CNTL__PRIM_WALK_INDICES | (count << 16) | + r300render->hwprim | R300_VAP_VF_CNTL__INDEX_SIZE_32bit); + + OUT_CS(CP_PACKET3(R300_PACKET3_INDX_BUFFER, 2)); + OUT_CS(R300_INDX_BUFFER_ONE_REG_WR | (R300_VAP_PORT_IDX0 >> 2)); + OUT_CS_RELOC(index_buffer, 0, RADEON_GEM_DOMAIN_GTT, 0, 0); + END_CS; +} + +static void r300_swtcl_render_destroy(struct vbuf_render* render) +{ + FREE(render); +} + +static struct vbuf_render* r300_swtcl_render_create(struct r300_context* r300) +{ + struct r300_swtcl_render* r300render = CALLOC_STRUCT(r300_swtcl_render); + struct pipe_screen* screen = r300->context.screen; + + r300render->r300 = r300; + + /* XXX find real numbers plz */ + r300render->base.max_vertex_buffer_bytes = 128 * 1024; + r300render->base.max_indices = 16 * 1024; + + r300render->base.get_vertex_info = r300_swtcl_render_get_vertex_info; + r300render->base.allocate_vertices = r300_swtcl_render_allocate_vertices; + r300render->base.map_vertices = r300_swtcl_render_map_vertices; + r300render->base.unmap_vertices = r300_swtcl_render_unmap_vertices; + r300render->base.set_primitive = r300_swtcl_render_set_primitive; + r300render->base.draw = r300_swtcl_render_draw; + r300render->base.draw_arrays = r300_swtcl_render_draw_arrays; + r300render->base.release_vertices = r300_swtcl_render_release_vertices; + r300render->base.destroy = r300_swtcl_render_destroy; + + /* XXX bonghits ahead + r300render->vbo_alloc_size = 128 * 4096; + r300render->vbo_size = r300render->vbo_alloc_size; + r300render->vbo_offset = 0; + r300render->vbo = pipe_buffer_create(screen, + 64, + PIPE_BUFFER_USAGE_VERTEX, + r300render->vbo_size); + r300render->vbo_map = pipe_buffer_map(screen, + r300render->vbo, + PIPE_BUFFER_USAGE_CPU_WRITE); + pipe_buffer_unmap(screen, r300render->vbo); */ + + return &r300render->base; +} + +struct draw_stage* r300_draw_swtcl_stage(struct r300_context* r300) +{ + struct vbuf_render* render; + struct draw_stage* stage; + + render = r300_swtcl_render_create(r300); + + if (!render) { + return NULL; + } + + stage = draw_vbuf_stage(r300->draw, render); + + if (!stage) { + render->destroy(render); + return NULL; + } + + draw_set_render(r300->draw, render); + + return stage; +} diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c new file mode 100644 index 0000000000..edd4370663 --- /dev/null +++ b/src/gallium/drivers/r300/r300_texture.c @@ -0,0 +1,187 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "r300_texture.h" + +static int minify(int i) +{ + return MAX2(1, i >> 1); +} + +static void r300_setup_miptree(struct r300_texture* tex) +{ + struct pipe_texture* base = &tex->tex; + int stride, size, offset; + + for (int i = 0; i <= base->last_level; i++) { + if (i > 0) { + base->width[i] = minify(base->width[i-1]); + base->height[i] = minify(base->height[i-1]); + base->depth[i] = minify(base->depth[i-1]); + } + + base->nblocksx[i] = pf_get_nblocksx(&base->block, base->width[i]); + base->nblocksy[i] = pf_get_nblocksy(&base->block, base->width[i]); + + /* Radeons enjoy things in multiples of 32. */ + /* XXX NPOT -> 64, not 32 */ + stride = (base->nblocksx[i] * base->block.size + 63) & ~63; + size = stride * base->nblocksy[i] * base->depth[i]; + + /* XXX 64 for NPOT */ + tex->offset[i] = (tex->size + 63) & ~63; + tex->size = tex->offset[i] + size; + } +} + +/* Create a new texture. */ +static struct pipe_texture* + r300_texture_create(struct pipe_screen* screen, + const struct pipe_texture* template) +{ + /* XXX struct r300_screen* r300screen = r300_screen(screen); */ + + struct r300_texture* tex = CALLOC_STRUCT(r300_texture); + + if (!tex) { + return NULL; + } + + tex->tex = *template; + tex->tex.refcount = 1; + tex->tex.screen = screen; + + r300_setup_miptree(tex); + + tex->buffer = screen->buffer_create(screen, 64, + PIPE_BUFFER_USAGE_PIXEL, + tex->size); + + if (!tex->buffer) { + FREE(tex); + return NULL; + } + + return (struct pipe_texture*)tex; +} + +static void r300_texture_release(struct pipe_screen* screen, + struct pipe_texture** texture) +{ + if (!*texture) { + return; + } + + (*texture)->refcount--; + + if ((*texture)->refcount <= 0) { + struct r300_texture* tex = (struct r300_texture*)*texture; + + pipe_buffer_reference(screen, &tex->buffer, NULL); + + FREE(tex); + } + + *texture = NULL; +} + +static struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen, + struct pipe_texture* texture, + unsigned face, + unsigned level, + unsigned zslice, + unsigned flags) +{ + struct r300_texture* tex = (struct r300_texture*)texture; + struct pipe_surface* surface = CALLOC_STRUCT(pipe_surface); + unsigned offset; + + /* XXX this is certainly dependent on tex target */ + offset = tex->offset[level]; + + if (surface) { + surface->refcount = 1; + pipe_texture_reference(&surface->texture, texture); + surface->format = texture->format; + surface->width = texture->width[level]; + surface->height = texture->height[level]; + surface->offset = offset; + surface->usage = flags; + surface->status = PIPE_SURFACE_STATUS_DEFINED; + } + + return surface; +} + +static void r300_tex_surface_release(struct pipe_screen* screen, + struct pipe_surface** surface) +{ + struct pipe_surface* s = *surface; + + s->refcount--; + + if (s->refcount <= 0) { + pipe_texture_reference(&s->texture, NULL); + FREE(s); + } + + *surface = NULL; +} + +static struct pipe_texture* + r300_texture_blanket(struct pipe_screen* screen, + const struct pipe_texture* base, + const unsigned* stride, + struct pipe_buffer* buffer) +{ + struct r300_texture* tex; + + if (base->target != PIPE_TEXTURE_2D || + base->last_level != 0 || + base->depth[0] != 1) { + return NULL; + } + + tex = CALLOC_STRUCT(r300_texture); + if (!tex) { + return NULL; + } + + tex->tex = *base; + tex->tex.refcount = 1; + tex->tex.screen = screen; + + tex->stride = *stride; + + pipe_buffer_reference(screen, &tex->buffer, buffer); + + return (struct pipe_texture*)tex; +} + +void r300_init_screen_texture_functions(struct pipe_screen* screen) +{ + screen->texture_create = r300_texture_create; + screen->texture_release = r300_texture_release; + screen->get_tex_surface = r300_get_tex_surface; + screen->tex_surface_release = r300_tex_surface_release; + screen->texture_blanket = r300_texture_blanket; +} diff --git a/src/gallium/drivers/r300/r300_texture.h b/src/gallium/drivers/r300/r300_texture.h new file mode 100644 index 0000000000..7964229a94 --- /dev/null +++ b/src/gallium/drivers/r300/r300_texture.h @@ -0,0 +1,34 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_TEXTURE_H +#define R300_TEXTURE_H + +#include "pipe/p_screen.h" + +#include "util/u_math.h" + +#include "r300_context.h" + +void r300_init_screen_texture_functions(struct pipe_screen* screen); + +#endif /* R300_TEXTURE_H */ diff --git a/src/gallium/drivers/r300/r300_winsys.h b/src/gallium/drivers/r300/r300_winsys.h new file mode 100644 index 0000000000..5a3a212892 --- /dev/null +++ b/src/gallium/drivers/r300/r300_winsys.h @@ -0,0 +1,94 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef R300_WINSYS_H +#define R300_WINSYS_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* The public interface header for the r300 pipe driver. + * Any winsys hosting this pipe needs to implement r300_winsys and then + * call r300_create_context to start things. */ + +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +struct radeon_cs; + +struct r300_winsys { + + /* PCI ID */ + uint32_t pci_id; + + /* GB pipe count */ + uint32_t gb_pipes; + + /* CS object. This is very much like Intel's batchbuffer. + * Fill it full of dwords and relocs and then submit. + * Repeat as needed. */ + /* Note: Unlike Mesa's version of this, we don't keep a copy of the CSM + * that was used to create this CS. Is this a good idea? */ + /* Note: The pipe driver doesn't know how to use this. This is purely + * for the winsys. */ + struct radeon_cs* cs; + + /* Check to see if there's room for commands. */ + boolean (*check_cs)(struct radeon_cs* cs, int size); + + /* Start a command emit. */ + void (*begin_cs)(struct radeon_cs* cs, + int size, + const char* file, + const char* function, + int line); + + /* Write a dword to the command buffer. */ + void (*write_cs_dword)(struct radeon_cs* cs, uint32_t dword); + + /* Write a relocated dword to the command buffer. */ + void (*write_cs_reloc)(struct radeon_cs* cs, + struct pipe_buffer* bo, + uint32_t rd, + uint32_t wd, + uint32_t flags); + + /* Finish a command emit. */ + void (*end_cs)(struct radeon_cs* cs, + const char* file, + const char* function, + int line); + + /* Flush the CS. */ + void (*flush_cs)(struct radeon_cs* cs); +}; + +struct pipe_context* r300_create_context(struct pipe_screen* screen, + struct pipe_winsys* winsys, + struct r300_winsys* r300_winsys); + +#ifdef __cplusplus +} +#endif + +#endif /* R300_WINSYS_H */ diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile index 120bdfd9dd..516e3992fd 100644 --- a/src/gallium/drivers/softpipe/Makefile +++ b/src/gallium/drivers/softpipe/Makefile @@ -14,7 +14,7 @@ C_SOURCES = \ sp_draw_arrays.c \ sp_prim_setup.c \ sp_prim_vbuf.c \ - sp_quad.c \ + sp_quad_pipe.c \ sp_quad_alpha_test.c \ sp_quad_blend.c \ sp_quad_colormask.c \ @@ -42,6 +42,3 @@ C_SOURCES = \ sp_surface.c include ../../Makefile.template - -symlinks: - diff --git a/src/gallium/drivers/softpipe/SConscript b/src/gallium/drivers/softpipe/SConscript index c1f7daa8ab..f8720638a7 100644 --- a/src/gallium/drivers/softpipe/SConscript +++ b/src/gallium/drivers/softpipe/SConscript @@ -17,7 +17,7 @@ softpipe = env.ConvenienceLibrary( 'sp_setup.c', 'sp_quad_alpha_test.c', 'sp_quad_blend.c', - 'sp_quad.c', + 'sp_quad_pipe.c', 'sp_quad_colormask.c', 'sp_quad_coverage.c', 'sp_quad_depth_test.c', diff --git a/src/gallium/drivers/softpipe/sp_clear.c b/src/gallium/drivers/softpipe/sp_clear.c index dfa46c9fb7..ad108ec446 100644 --- a/src/gallium/drivers/softpipe/sp_clear.c +++ b/src/gallium/drivers/softpipe/sp_clear.c @@ -85,7 +85,7 @@ softpipe_clear(struct pipe_context *pipe, struct pipe_surface *ps, #endif } - for (i = 0; i < softpipe->framebuffer.num_cbufs; i++) { + for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) { if (ps == sp_tile_cache_get_surface(softpipe->cbuf_cache[i])) { unsigned cv; if (ps->format != PIPE_FORMAT_A8R8G8B8_UNORM) { diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index cd1e6663d8..ff5d1b54a4 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -2,6 +2,7 @@ * * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. + * Copyright 2008 VMware, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -52,15 +53,15 @@ * Map any drawing surfaces which aren't already mapped */ void -softpipe_map_surfaces(struct softpipe_context *sp) +softpipe_map_transfers(struct softpipe_context *sp) { unsigned i; - for (i = 0; i < sp->framebuffer.num_cbufs; i++) { - sp_tile_cache_map_surfaces(sp->cbuf_cache[i]); + for (i = 0; i < sp->framebuffer.nr_cbufs; i++) { + sp_tile_cache_map_transfers(sp->cbuf_cache[i]); } - sp_tile_cache_map_surfaces(sp->zsbuf_cache); + sp_tile_cache_map_transfers(sp->zsbuf_cache); } @@ -68,25 +69,25 @@ softpipe_map_surfaces(struct softpipe_context *sp) * Unmap any mapped drawing surfaces */ void -softpipe_unmap_surfaces(struct softpipe_context *sp) +softpipe_unmap_transfers(struct softpipe_context *sp) { uint i; - for (i = 0; i < sp->framebuffer.num_cbufs; i++) + for (i = 0; i < sp->framebuffer.nr_cbufs; i++) sp_flush_tile_cache(sp, sp->cbuf_cache[i]); sp_flush_tile_cache(sp, sp->zsbuf_cache); - for (i = 0; i < sp->framebuffer.num_cbufs; i++) { - sp_tile_cache_unmap_surfaces(sp->cbuf_cache[i]); + for (i = 0; i < sp->framebuffer.nr_cbufs; i++) { + sp_tile_cache_unmap_transfers(sp->cbuf_cache[i]); } - sp_tile_cache_unmap_surfaces(sp->zsbuf_cache); + sp_tile_cache_unmap_transfers(sp->zsbuf_cache); } static void softpipe_destroy( struct pipe_context *pipe ) { struct softpipe_context *softpipe = softpipe_context( pipe ); - struct pipe_winsys *ws = pipe->winsys; + struct pipe_screen *screen = pipe->screen; uint i; if (softpipe->draw) @@ -115,7 +116,7 @@ static void softpipe_destroy( struct pipe_context *pipe ) for (i = 0; i < Elements(softpipe->constants); i++) { if (softpipe->constants[i].buffer) { - winsys_buffer_reference(ws, &softpipe->constants[i].buffer, NULL); + pipe_buffer_reference(screen, &softpipe->constants[i].buffer, NULL); } } @@ -221,6 +222,24 @@ softpipe_create( struct pipe_screen *screen, softpipe->quad[i].output = sp_quad_output_stage(softpipe); } + /* vertex shader samplers */ + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + softpipe->tgsi.vert_samplers[i].base.get_samples = sp_get_samples_vertex; + softpipe->tgsi.vert_samplers[i].unit = i; + softpipe->tgsi.vert_samplers[i].sp = softpipe; + softpipe->tgsi.vert_samplers[i].cache = softpipe->tex_cache[i]; + softpipe->tgsi.vert_samplers_list[i] = &softpipe->tgsi.vert_samplers[i]; + } + + /* fragment shader samplers */ + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + softpipe->tgsi.frag_samplers[i].base.get_samples = sp_get_samples_fragment; + softpipe->tgsi.frag_samplers[i].unit = i; + softpipe->tgsi.frag_samplers[i].sp = softpipe; + softpipe->tgsi.frag_samplers[i].cache = softpipe->tex_cache[i]; + softpipe->tgsi.frag_samplers_list[i] = &softpipe->tgsi.frag_samplers[i]; + } + /* * Create drawing context and plug our rendering stage into it. */ @@ -228,6 +247,11 @@ softpipe_create( struct pipe_screen *screen, if (!softpipe->draw) goto fail; + draw_texture_samplers(softpipe->draw, + PIPE_MAX_SAMPLERS, + (struct tgsi_sampler **) + softpipe->tgsi.vert_samplers_list); + softpipe->setup = sp_draw_render_stage(softpipe); if (!softpipe->setup) goto fail; diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index 2b9a2a8ee5..59d6df8f2d 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -32,11 +32,11 @@ #define SP_CONTEXT_H #include "pipe/p_context.h" -#include "pipe/p_defines.h" #include "draw/draw_vertex.h" -#include "sp_quad.h" +#include "sp_quad_pipe.h" +#include "sp_tex_sample.h" /** @@ -50,7 +50,6 @@ */ #define SP_NUM_QUAD_THREADS 1 -struct softpipe_winsys; struct softpipe_vbuf_render; struct draw_context; struct draw_stage; @@ -62,15 +61,15 @@ struct sp_vertex_shader; struct softpipe_context { struct pipe_context pipe; /**< base class */ - /* The most recent drawing state as set by the driver: - */ - const struct pipe_blend_state *blend; + /** Constant state objects */ + const struct pipe_blend_state *blend; const struct pipe_sampler_state *sampler[PIPE_MAX_SAMPLERS]; - const struct pipe_depth_stencil_alpha_state *depth_stencil; + const struct pipe_depth_stencil_alpha_state *depth_stencil; const struct pipe_rasterizer_state *rasterizer; const struct sp_fragment_shader *fs; const struct sp_vertex_shader *vs; + /** Other rendering state */ struct pipe_blend_color blend_color; struct pipe_clip_state clip; struct pipe_constant_buffer constants[PIPE_SHADER_TYPES]; @@ -81,23 +80,20 @@ struct softpipe_context { struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; - unsigned dirty; unsigned num_samplers; unsigned num_textures; unsigned num_vertex_elements; unsigned num_vertex_buffers; - boolean no_rast; + unsigned dirty; /**< Mask of SP_NEW_x flags */ /* Counter for occlusion queries. Note this supports overlapping * queries. */ - uint64 occlusion_count; + uint64_t occlusion_count; - /* - * Mapped vertex buffers - */ + /** Mapped vertex buffers */ ubyte *mapped_vbuffer[PIPE_MAX_ATTRIBS]; /** Mapped constant buffers */ @@ -107,16 +103,11 @@ struct softpipe_context { struct vertex_info vertex_info; struct vertex_info vertex_info_vbuf; + /** Which vertex shader output slot contains point size */ int psize_slot; unsigned reduced_api_prim; /**< PIPE_PRIM_POINTS, _LINES or _TRIANGLES */ -#if 0 - /* Stipple derived state: - */ - ubyte stipple_masks[16][16]; -#endif - /** Derived from scissor and surface bounds: */ struct pipe_scissor_state cliprect; @@ -139,6 +130,14 @@ struct softpipe_context { struct quad_stage *first; /**< points to one of the above stages */ } quad[SP_NUM_QUAD_THREADS]; + /** TGSI exec things */ + struct { + struct sp_shader_sampler vert_samplers[PIPE_MAX_SAMPLERS]; + struct sp_shader_sampler *vert_samplers_list[PIPE_MAX_SAMPLERS]; + struct sp_shader_sampler frag_samplers[PIPE_MAX_SAMPLERS]; + struct sp_shader_sampler *frag_samplers_list[PIPE_MAX_SAMPLERS]; + } tgsi; + /** The primitive drawing context */ struct draw_context *draw; struct draw_stage *setup; @@ -150,8 +149,9 @@ struct softpipe_context { struct softpipe_tile_cache *tex_cache[PIPE_MAX_SAMPLERS]; - int use_sse : 1; - int dump_fs : 1; + unsigned use_sse : 1; + unsigned dump_fs : 1; + unsigned no_rast : 1; }; diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 424bd56846..f117096bf7 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -33,7 +33,7 @@ #include "pipe/p_defines.h" #include "pipe/p_context.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_inlines.h" #include "sp_context.h" @@ -47,16 +47,22 @@ static void softpipe_map_constant_buffers(struct softpipe_context *sp) { struct pipe_winsys *ws = sp->pipe.winsys; - uint i; + uint i, size; + for (i = 0; i < PIPE_SHADER_TYPES; i++) { - if (sp->constants[i].size) + if (sp->constants[i].buffer && sp->constants[i].buffer->size) sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i].buffer, PIPE_BUFFER_USAGE_CPU_READ); } + if (sp->constants[PIPE_SHADER_VERTEX].buffer) + size = sp->constants[PIPE_SHADER_VERTEX].buffer->size; + else + size = 0; + draw_set_mapped_constant_buffer(sp->draw, sp->mapped_constants[PIPE_SHADER_VERTEX], - sp->constants[PIPE_SHADER_VERTEX].size); + size); } static void @@ -73,7 +79,7 @@ softpipe_unmap_constant_buffers(struct softpipe_context *sp) draw_set_mapped_constant_buffer(sp->draw, NULL, 0); for (i = 0; i < 2; i++) { - if (sp->constants[i].size) + if (sp->constants[i].buffer && sp->constants[i].buffer->size) ws->buffer_unmap(ws, sp->constants[i].buffer); sp->mapped_constants[i] = NULL; } @@ -128,7 +134,7 @@ softpipe_draw_range_elements(struct pipe_context *pipe, if (sp->dirty) softpipe_update_derived( sp ); - softpipe_map_surfaces(sp); + softpipe_map_transfers(sp); softpipe_map_constant_buffers(sp); /* diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c index 401764bb43..035f4b963e 100644 --- a/src/gallium/drivers/softpipe/sp_flush.c +++ b/src/gallium/drivers/softpipe/sp_flush.c @@ -57,7 +57,7 @@ softpipe_flush( struct pipe_context *pipe, } if (flags & PIPE_FLUSH_RENDER_CACHE) { - for (i = 0; i < softpipe->framebuffer.num_cbufs; i++) + for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) if (softpipe->cbuf_cache[i]) sp_flush_tile_cache(softpipe, softpipe->cbuf_cache[i]); @@ -70,7 +70,7 @@ softpipe_flush( struct pipe_context *pipe, * that's called before swapbuffers because we don't always want * to unmap surfaces when flushing. */ - softpipe_unmap_surfaces(softpipe); + softpipe_unmap_transfers(softpipe); } /* Enable to dump BMPs of the color/depth buffers each frame */ diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c index 701ee4c72f..3c7ba565d6 100644 --- a/src/gallium/drivers/softpipe/sp_fs_exec.c +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -29,7 +29,7 @@ #include "sp_context.h" #include "sp_state.h" #include "sp_fs.h" -#include "sp_headers.h" +#include "sp_quad.h" #include "pipe/p_state.h" @@ -39,11 +39,19 @@ #include "tgsi/tgsi_exec.h" #include "tgsi/tgsi_parse.h" -struct sp_exec_fragment_shader { +struct sp_exec_fragment_shader +{ struct sp_fragment_shader base; }; +/** cast wrapper */ +static INLINE struct sp_exec_fragment_shader * +sp_exec_fragment_shader(const struct sp_fragment_shader *base) +{ + return (struct sp_exec_fragment_shader *) base; +} + /** * Compute quad X,Y,Z,W for the four fragments in a quad. @@ -84,12 +92,18 @@ sp_setup_pos_vector(const struct tgsi_interp_coef *coef, static void exec_prepare( const struct sp_fragment_shader *base, struct tgsi_exec_machine *machine, - struct tgsi_sampler *samplers ) + struct tgsi_sampler **samplers ) { - tgsi_exec_machine_bind_shader( machine, - base->shader.tokens, - PIPE_MAX_SAMPLERS, - samplers ); + /* + * Bind tokens/shader to the interpreter's machine state. + * Avoid redundant binding. + */ + if (machine->Tokens != base->shader.tokens) { + tgsi_exec_machine_bind_shader( machine, + base->shader.tokens, + PIPE_MAX_SAMPLERS, + samplers ); + } } diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index 50eb2c07bc..7e22081132 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -29,7 +29,7 @@ #include "sp_context.h" #include "sp_state.h" #include "sp_fs.h" -#include "sp_headers.h" +#include "sp_quad.h" #include "pipe/p_state.h" @@ -40,7 +40,7 @@ #include "tgsi/tgsi_sse2.h" -#ifdef PIPE_ARCH_X86 +#if defined(PIPE_ARCH_X86) #include "rtasm/rtasm_x86sse.h" @@ -69,7 +69,7 @@ struct sp_sse_fragment_shader { static void fs_sse_prepare( const struct sp_fragment_shader *base, struct tgsi_exec_machine *machine, - struct tgsi_sampler *samplers ) + struct tgsi_sampler **samplers ) { } diff --git a/src/gallium/drivers/softpipe/sp_headers.h b/src/gallium/drivers/softpipe/sp_headers.h deleted file mode 100644 index 4a42cb3c19..0000000000 --- a/src/gallium/drivers/softpipe/sp_headers.h +++ /dev/null @@ -1,95 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: Keith Whitwell <keith@tungstengraphics.com> - */ - -#ifndef SP_HEADERS_H -#define SP_HEADERS_H - -#include "pipe/p_state.h" -#include "tgsi/tgsi_exec.h" - -#define PRIM_POINT 1 -#define PRIM_LINE 2 -#define PRIM_TRI 3 - - -/* The rasterizer generates 2x2 quads of fragment and feeds them to - * the current fp_machine (see below). - * Remember that Y=0=top with Y increasing down the window. - */ -#define QUAD_TOP_LEFT 0 -#define QUAD_TOP_RIGHT 1 -#define QUAD_BOTTOM_LEFT 2 -#define QUAD_BOTTOM_RIGHT 3 - -#define MASK_TOP_LEFT (1 << QUAD_TOP_LEFT) -#define MASK_TOP_RIGHT (1 << QUAD_TOP_RIGHT) -#define MASK_BOTTOM_LEFT (1 << QUAD_BOTTOM_LEFT) -#define MASK_BOTTOM_RIGHT (1 << QUAD_BOTTOM_RIGHT) -#define MASK_ALL 0xf - - -/** - * Encodes everything we need to know about a 2x2 pixel block. Uses - * "Channel-Serial" or "SoA" layout. - */ -struct quad_header_input -{ - int x0; - int y0; - float coverage[QUAD_SIZE]; /** fragment coverage for antialiasing */ - unsigned facing:1; /**< Front (0) or back (1) facing? */ - unsigned prim:2; /**< PRIM_POINT, LINE, TRI */ -}; - -struct quad_header_inout -{ - unsigned mask:4; -}; - -struct quad_header_output -{ - /** colors in SOA format (rrrr, gggg, bbbb, aaaa) */ - float color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE]; - float depth[QUAD_SIZE]; -}; - -struct quad_header { - struct quad_header_input input; - struct quad_header_inout inout; - struct quad_header_output output; - - const struct tgsi_interp_coef *coef; - const struct tgsi_interp_coef *posCoef; - - unsigned nr_attrs; -}; - -#endif /* SP_HEADERS_H */ - diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index 425e13cd28..d56eed80a4 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -26,10 +26,10 @@ **************************************************************************/ /** - * Post-transform vertex buffering. This is an optional part of the - * softpipe rendering pipeline. - * Probably not desired in general, but useful for testing/debuggin. - * Enabled/Disabled with SP_VBUF env var. + * Interface between 'draw' module's output and the softpipe rasterizer/setup + * code. When the 'draw' module has finished filling a vertex buffer, the + * draw_arrays() functions below will be called. Loop over the vertices and + * call the point/line/tri setup functions. * * Authors * Brian Paul @@ -60,6 +60,7 @@ struct softpipe_vbuf_render struct softpipe_context *softpipe; uint prim; uint vertex_size; + uint vertex_buffer_size; void *vertex_buffer; }; @@ -80,26 +81,44 @@ sp_vbuf_get_vertex_info(struct vbuf_render *vbr) } -static void * +static boolean sp_vbuf_allocate_vertices(struct vbuf_render *vbr, - ushort vertex_size, ushort nr_vertices) + ushort vertex_size, ushort nr_vertices) { struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); - assert(!cvbr->vertex_buffer); - cvbr->vertex_buffer = align_malloc(vertex_size * nr_vertices, 16); + unsigned size = vertex_size * nr_vertices; + + if (cvbr->vertex_buffer_size < size) { + align_free(cvbr->vertex_buffer); + cvbr->vertex_buffer = align_malloc(size, 16); + cvbr->vertex_buffer_size = size; + } + cvbr->vertex_size = vertex_size; - return cvbr->vertex_buffer; + return cvbr->vertex_buffer != NULL; } - static void -sp_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices, - unsigned vertex_size, unsigned vertices_used) +sp_vbuf_release_vertices(struct vbuf_render *vbr) +{ + /* keep the old allocation for next time */ +} + +static void * +sp_vbuf_map_vertices(struct vbuf_render *vbr) +{ + struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); + return cvbr->vertex_buffer; +} + +static void +sp_vbuf_unmap_vertices(struct vbuf_render *vbr, + ushort min_index, + ushort max_index ) { struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); - align_free(vertices); - assert(vertices == cvbr->vertex_buffer); - cvbr->vertex_buffer = NULL; + assert( cvbr->vertex_buffer_size >= (max_index+1) * cvbr->vertex_size ); + /* do nothing */ } @@ -115,8 +134,6 @@ sp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) setup_prepare( setup_ctx ); - - cvbr->prim = prim; return TRUE; @@ -131,21 +148,23 @@ static INLINE cptrf4 get_vert( const void *vertex_buffer, } +/** + * draw elements / indexed primitives + */ static void sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr) { struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); struct softpipe_context *softpipe = cvbr->softpipe; - unsigned stride = softpipe->vertex_info_vbuf.size * sizeof(float); - unsigned i; + const unsigned stride = softpipe->vertex_info_vbuf.size * sizeof(float); const void *vertex_buffer = cvbr->vertex_buffer; + unsigned i; /* XXX: break this dependency - make setup_context live under * softpipe, rename the old "setup" draw stage to something else. */ struct draw_stage *setup = softpipe->setup; - struct setup_context *setup_ctx = sp_draw_setup_context(softpipe->setup); - + struct setup_context *setup_ctx = sp_draw_setup_context(setup); switch (cvbr->prim) { case PIPE_PRIM_POINTS: @@ -258,13 +277,16 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) { struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); struct softpipe_context *softpipe = cvbr->softpipe; - struct draw_stage *setup = softpipe->setup; - const void *vertex_buffer = NULL; const unsigned stride = softpipe->vertex_info_vbuf.size * sizeof(float); + const void *vertex_buffer = + (void *) get_vert(cvbr->vertex_buffer, start, stride); unsigned i; - struct setup_context *setup_ctx = sp_draw_setup_context(setup); - vertex_buffer = (void *)get_vert(cvbr->vertex_buffer, start, stride); + /* XXX: break this dependency - make setup_context live under + * softpipe, rename the old "setup" draw stage to something else. + */ + struct draw_stage *setup = softpipe->setup; + struct setup_context *setup_ctx = sp_draw_setup_context(setup); switch (cvbr->prim) { case PIPE_PRIM_POINTS: @@ -389,6 +411,8 @@ sp_init_vbuf(struct softpipe_context *sp) sp->vbuf_render->base.get_vertex_info = sp_vbuf_get_vertex_info; sp->vbuf_render->base.allocate_vertices = sp_vbuf_allocate_vertices; + sp->vbuf_render->base.map_vertices = sp_vbuf_map_vertices; + sp->vbuf_render->base.unmap_vertices = sp_vbuf_unmap_vertices; sp->vbuf_render->base.set_primitive = sp_vbuf_set_primitive; sp->vbuf_render->base.draw = sp_vbuf_draw; sp->vbuf_render->base.draw_arrays = sp_vbuf_draw_arrays; diff --git a/src/gallium/drivers/softpipe/sp_quad.h b/src/gallium/drivers/softpipe/sp_quad.h index 08513cb95f..bd6c6cb912 100644 --- a/src/gallium/drivers/softpipe/sp_quad.h +++ b/src/gallium/drivers/softpipe/sp_quad.h @@ -31,39 +31,76 @@ #ifndef SP_QUAD_H #define SP_QUAD_H +#include "pipe/p_state.h" +#include "tgsi/tgsi_exec.h" -struct softpipe_context; -struct quad_header; +#define QUAD_PRIM_POINT 1 +#define QUAD_PRIM_LINE 2 +#define QUAD_PRIM_TRI 3 -struct quad_stage { - struct softpipe_context *softpipe; - struct quad_stage *next; +/* The rasterizer generates 2x2 quads of fragment and feeds them to + * the current fp_machine (see below). + * Remember that Y=0=top with Y increasing down the window. + */ +#define QUAD_TOP_LEFT 0 +#define QUAD_TOP_RIGHT 1 +#define QUAD_BOTTOM_LEFT 2 +#define QUAD_BOTTOM_RIGHT 3 + +#define MASK_TOP_LEFT (1 << QUAD_TOP_LEFT) +#define MASK_TOP_RIGHT (1 << QUAD_TOP_RIGHT) +#define MASK_BOTTOM_LEFT (1 << QUAD_BOTTOM_LEFT) +#define MASK_BOTTOM_RIGHT (1 << QUAD_BOTTOM_RIGHT) +#define MASK_ALL 0xf + + +/** + * Quad stage inputs (pos, coverage, front/back face, etc) + */ +struct quad_header_input +{ + int x0, y0; /**< quad window pos, always even */ + float coverage[QUAD_SIZE]; /**< fragment coverage for antialiasing */ + unsigned facing:1; /**< Front (0) or back (1) facing? */ + unsigned prim:2; /**< QUAD_PRIM_POINT, LINE, TRI */ +}; - void (*begin)(struct quad_stage *qs); - /** the stage action */ - void (*run)(struct quad_stage *qs, struct quad_header *quad); +/** + * Quad stage inputs/outputs. + */ +struct quad_header_inout +{ + unsigned mask:4; +}; + - void (*destroy)(struct quad_stage *qs); +/** + * Quad stage outputs (color & depth). + */ +struct quad_header_output +{ + /** colors in SOA format (rrrr, gggg, bbbb, aaaa) */ + float color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE]; + float depth[QUAD_SIZE]; }; -struct quad_stage *sp_quad_polygon_stipple_stage( struct softpipe_context *softpipe ); -struct quad_stage *sp_quad_earlyz_stage( struct softpipe_context *softpipe ); -struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe ); -struct quad_stage *sp_quad_alpha_test_stage( struct softpipe_context *softpipe ); -struct quad_stage *sp_quad_stencil_test_stage( struct softpipe_context *softpipe ); -struct quad_stage *sp_quad_depth_test_stage( struct softpipe_context *softpipe ); -struct quad_stage *sp_quad_occlusion_stage( struct softpipe_context *softpipe ); -struct quad_stage *sp_quad_coverage_stage( struct softpipe_context *softpipe ); -struct quad_stage *sp_quad_blend_stage( struct softpipe_context *softpipe ); -struct quad_stage *sp_quad_colormask_stage( struct softpipe_context *softpipe ); -struct quad_stage *sp_quad_output_stage( struct softpipe_context *softpipe ); +/** + * Encodes everything we need to know about a 2x2 pixel block. Uses + * "Channel-Serial" or "SoA" layout. + */ +struct quad_header { + struct quad_header_input input; + struct quad_header_inout inout; + struct quad_header_output output; -void sp_build_quad_pipeline(struct softpipe_context *sp); + const struct tgsi_interp_coef *coef; + const struct tgsi_interp_coef *posCoef; -void sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad); + unsigned nr_attrs; +}; #endif /* SP_QUAD_H */ diff --git a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c index 5bebd141e9..0845bae0e6 100644 --- a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c @@ -4,8 +4,8 @@ */ #include "sp_context.h" -#include "sp_headers.h" #include "sp_quad.h" +#include "sp_quad_pipe.h" #include "pipe/p_defines.h" #include "util/u_memory.h" @@ -14,7 +14,7 @@ static void alpha_test_quad(struct quad_stage *qs, struct quad_header *quad) { struct softpipe_context *softpipe = qs->softpipe; - const float ref = softpipe->depth_stencil->alpha.ref; + const float ref = softpipe->depth_stencil->alpha.ref_value; unsigned passMask = 0x0, j; const uint cbuf = 0; /* only output[0].alpha is tested */ const float *aaaa = quad->output.color[cbuf][3]; diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index 6f64c6e584..e134e44337 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -34,10 +34,10 @@ #include "util/u_math.h" #include "util/u_memory.h" #include "sp_context.h" -#include "sp_headers.h" +#include "sp_quad.h" #include "sp_surface.h" #include "sp_tile_cache.h" -#include "sp_quad.h" +#include "sp_quad_pipe.h" #define VEC4_COPY(DST, SRC) \ @@ -105,7 +105,7 @@ logicop_quad(struct quad_stage *qs, struct quad_header *quad) uint cbuf; /* loop over colorbuffer outputs */ - for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) { + for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { float dest[4][QUAD_SIZE]; ubyte src[4][4], dst[4][4], res[4][4]; uint *src4 = (uint *) src; @@ -239,7 +239,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) } /* loop over colorbuffer outputs */ - for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) { + for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { float source[4][QUAD_SIZE], dest[4][QUAD_SIZE]; struct softpipe_cached_tile *tile = sp_get_cached_tile(softpipe, diff --git a/src/gallium/drivers/softpipe/sp_quad_bufloop.c b/src/gallium/drivers/softpipe/sp_quad_bufloop.c index 92e9af09c1..953d8516b9 100644 --- a/src/gallium/drivers/softpipe/sp_quad_bufloop.c +++ b/src/gallium/drivers/softpipe/sp_quad_bufloop.c @@ -1,9 +1,9 @@ #include "util/u_memory.h" #include "sp_context.h" -#include "sp_headers.h" -#include "sp_surface.h" #include "sp_quad.h" +#include "sp_surface.h" +#include "sp_quad_pipe.h" /** @@ -17,7 +17,7 @@ cbuf_loop_quad(struct quad_stage *qs, struct quad_header *quad) unsigned i; assert(sizeof(quad->outputs.color) == sizeof(tmp)); - assert(softpipe->framebuffer.num_cbufs <= PIPE_MAX_COLOR_BUFS); + assert(softpipe->framebuffer.nr_cbufs <= PIPE_MAX_COLOR_BUFS); /* make copy of original colors since they can get modified * by blending and masking. @@ -28,7 +28,7 @@ cbuf_loop_quad(struct quad_stage *qs, struct quad_header *quad) */ memcpy(tmp, quad->outputs.color, sizeof(tmp)); - for (i = 0; i < softpipe->framebuffer.num_cbufs; i++) { + for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) { /* set current cbuffer */ #if 0 /* obsolete & going away */ softpipe->current_cbuf = i; diff --git a/src/gallium/drivers/softpipe/sp_quad_colormask.c b/src/gallium/drivers/softpipe/sp_quad_colormask.c index f32bdfab78..dc90e5d5e9 100644 --- a/src/gallium/drivers/softpipe/sp_quad_colormask.c +++ b/src/gallium/drivers/softpipe/sp_quad_colormask.c @@ -34,9 +34,9 @@ #include "util/u_math.h" #include "util/u_memory.h" #include "sp_context.h" -#include "sp_headers.h" -#include "sp_surface.h" #include "sp_quad.h" +#include "sp_surface.h" +#include "sp_quad_pipe.h" #include "sp_tile_cache.h" @@ -51,7 +51,7 @@ colormask_quad(struct quad_stage *qs, struct quad_header *quad) uint cbuf; /* loop over colorbuffer outputs */ - for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) { + for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { float dest[4][QUAD_SIZE]; struct softpipe_cached_tile *tile = sp_get_cached_tile(softpipe, diff --git a/src/gallium/drivers/softpipe/sp_quad_coverage.c b/src/gallium/drivers/softpipe/sp_quad_coverage.c index ee29aa7dfe..4aeee85870 100644 --- a/src/gallium/drivers/softpipe/sp_quad_coverage.c +++ b/src/gallium/drivers/softpipe/sp_quad_coverage.c @@ -35,8 +35,8 @@ #include "pipe/p_defines.h" #include "util/u_memory.h" #include "sp_context.h" -#include "sp_headers.h" #include "sp_quad.h" +#include "sp_quad_pipe.h" /** @@ -46,14 +46,15 @@ static void coverage_quad(struct quad_stage *qs, struct quad_header *quad) { struct softpipe_context *softpipe = qs->softpipe; + const uint prim = quad->input.prim; - if ((softpipe->rasterizer->poly_smooth && quad->input.prim == PRIM_TRI) || - (softpipe->rasterizer->line_smooth && quad->input.prim == PRIM_LINE) || - (softpipe->rasterizer->point_smooth && quad->input.prim == PRIM_POINT)) { + if ((softpipe->rasterizer->poly_smooth && prim == QUAD_PRIM_TRI) || + (softpipe->rasterizer->line_smooth && prim == QUAD_PRIM_LINE) || + (softpipe->rasterizer->point_smooth && prim == QUAD_PRIM_POINT)) { uint cbuf; /* loop over colorbuffer outputs */ - for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) { + for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { float (*quadColor)[4] = quad->output.color[cbuf]; unsigned j; for (j = 0; j < QUAD_SIZE; j++) { diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index 523bd3e080..d463930bae 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -32,9 +32,9 @@ #include "pipe/p_defines.h" #include "util/u_memory.h" #include "sp_context.h" -#include "sp_headers.h" -#include "sp_surface.h" #include "sp_quad.h" +#include "sp_surface.h" +#include "sp_quad_pipe.h" #include "sp_tile_cache.h" diff --git a/src/gallium/drivers/softpipe/sp_quad_earlyz.c b/src/gallium/drivers/softpipe/sp_quad_earlyz.c index 6e2dde304e..496fd39ed1 100644 --- a/src/gallium/drivers/softpipe/sp_quad_earlyz.c +++ b/src/gallium/drivers/softpipe/sp_quad_earlyz.c @@ -31,8 +31,8 @@ #include "pipe/p_defines.h" #include "util/u_memory.h" -#include "sp_headers.h" #include "sp_quad.h" +#include "sp_quad_pipe.h" /** diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index 1f0cb3e035..adca5df73d 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -2,6 +2,7 @@ * * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. + * Copyright 2008 VMware, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -42,16 +43,15 @@ #include "sp_context.h" #include "sp_state.h" -#include "sp_headers.h" #include "sp_quad.h" +#include "sp_quad_pipe.h" #include "sp_texture.h" #include "sp_tex_sample.h" struct quad_shade_stage { - struct quad_stage stage; - struct tgsi_sampler samplers[PIPE_MAX_SAMPLERS]; + struct quad_stage stage; /**< base class */ struct tgsi_exec_machine machine; struct tgsi_exec_vector *inputs, *outputs; }; @@ -147,18 +147,11 @@ static void shade_begin(struct quad_stage *qs) { struct quad_shade_stage *qss = quad_shade_stage(qs); struct softpipe_context *softpipe = qs->softpipe; - unsigned i; - unsigned num = MAX2(softpipe->num_textures, softpipe->num_samplers); - - /* set TGSI sampler state that varies */ - for (i = 0; i < num; i++) { - qss->samplers[i].state = softpipe->sampler[i]; - qss->samplers[i].texture = softpipe->texture[i]; - } softpipe->fs->prepare( softpipe->fs, &qss->machine, - qss->samplers ); + (struct tgsi_sampler **) + softpipe->tgsi.frag_samplers_list ); qs->next->begin(qs->next); } @@ -178,7 +171,6 @@ static void shade_destroy(struct quad_stage *qs) struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe ) { struct quad_shade_stage *qss = CALLOC_STRUCT(quad_shade_stage); - uint i; /* allocate storage for program inputs/outputs, aligned to 16 bytes */ qss->inputs = MALLOC(PIPE_MAX_ATTRIBS * sizeof(*qss->inputs) + 16); @@ -191,14 +183,6 @@ struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe ) qss->stage.run = shade_quad; qss->stage.destroy = shade_destroy; - /* set TGSI sampler state that's constant */ - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - assert(softpipe->tex_cache[i]); - qss->samplers[i].get_samples = sp_get_samples; - qss->samplers[i].pipe = &softpipe->pipe; - qss->samplers[i].cache = softpipe->tex_cache[i]; - } - tgsi_exec_machine_init( &qss->machine ); return &qss->stage; diff --git a/src/gallium/drivers/softpipe/sp_quad_occlusion.c b/src/gallium/drivers/softpipe/sp_quad_occlusion.c index 169bd82876..dfa7ff3b1d 100644 --- a/src/gallium/drivers/softpipe/sp_quad_occlusion.c +++ b/src/gallium/drivers/softpipe/sp_quad_occlusion.c @@ -35,9 +35,9 @@ #include "pipe/p_defines.h" #include "util/u_memory.h" #include "sp_context.h" -#include "sp_headers.h" -#include "sp_surface.h" #include "sp_quad.h" +#include "sp_surface.h" +#include "sp_quad_pipe.h" static unsigned count_bits( unsigned val ) { diff --git a/src/gallium/drivers/softpipe/sp_quad_output.c b/src/gallium/drivers/softpipe/sp_quad_output.c index b7aac7f84a..92d5f9f3c1 100644 --- a/src/gallium/drivers/softpipe/sp_quad_output.c +++ b/src/gallium/drivers/softpipe/sp_quad_output.c @@ -27,9 +27,9 @@ #include "util/u_memory.h" #include "sp_context.h" -#include "sp_headers.h" -#include "sp_surface.h" #include "sp_quad.h" +#include "sp_surface.h" +#include "sp_quad_pipe.h" #include "sp_tile_cache.h" @@ -48,7 +48,7 @@ output_quad(struct quad_stage *qs, struct quad_header *quad) uint cbuf; /* loop over colorbuffer outputs */ - for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) { + for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { struct softpipe_cached_tile *tile = sp_get_cached_tile(softpipe, softpipe->cbuf_cache[cbuf], diff --git a/src/gallium/drivers/softpipe/sp_quad.c b/src/gallium/drivers/softpipe/sp_quad_pipe.c index 892ef87ee9..892ef87ee9 100644 --- a/src/gallium/drivers/softpipe/sp_quad.c +++ b/src/gallium/drivers/softpipe/sp_quad_pipe.c diff --git a/src/gallium/drivers/softpipe/sp_quad_pipe.h b/src/gallium/drivers/softpipe/sp_quad_pipe.h new file mode 100644 index 0000000000..0e40586ffc --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_quad_pipe.h @@ -0,0 +1,74 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifndef SP_QUAD_PIPE_H +#define SP_QUAD_PIPE_H + + +struct softpipe_context; +struct quad_header; + + +/** + * Fragment processing is performed on 2x2 blocks of pixels called "quads". + * Quad processing is performed with a pipeline of stages represented by + * this type. + */ +struct quad_stage { + struct softpipe_context *softpipe; + + struct quad_stage *next; + + void (*begin)(struct quad_stage *qs); + + /** the stage action */ + void (*run)(struct quad_stage *qs, struct quad_header *quad); + + void (*destroy)(struct quad_stage *qs); +}; + + +struct quad_stage *sp_quad_polygon_stipple_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_earlyz_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_alpha_test_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_stencil_test_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_depth_test_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_occlusion_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_coverage_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_blend_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_colormask_stage( struct softpipe_context *softpipe ); +struct quad_stage *sp_quad_output_stage( struct softpipe_context *softpipe ); + +void sp_build_quad_pipeline(struct softpipe_context *sp); + +void sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad); + +#endif /* SP_QUAD_PIPE_H */ diff --git a/src/gallium/drivers/softpipe/sp_quad_stencil.c b/src/gallium/drivers/softpipe/sp_quad_stencil.c index abb5487748..5e9d447737 100644 --- a/src/gallium/drivers/softpipe/sp_quad_stencil.c +++ b/src/gallium/drivers/softpipe/sp_quad_stencil.c @@ -5,10 +5,10 @@ #include "sp_context.h" -#include "sp_headers.h" +#include "sp_quad.h" #include "sp_surface.h" #include "sp_tile_cache.h" -#include "sp_quad.h" +#include "sp_quad_pipe.h" #include "pipe/p_defines.h" #include "util/u_memory.h" @@ -222,8 +222,8 @@ stencil_test_quad(struct quad_stage *qs, struct quad_header *quad) zFailOp = softpipe->depth_stencil->stencil[face].zfail_op; zPassOp = softpipe->depth_stencil->stencil[face].zpass_op; ref = softpipe->depth_stencil->stencil[face].ref_value; - wrtMask = softpipe->depth_stencil->stencil[face].write_mask; - valMask = softpipe->depth_stencil->stencil[face].value_mask; + wrtMask = softpipe->depth_stencil->stencil[face].writemask; + valMask = softpipe->depth_stencil->stencil[face].valuemask; assert(ps); /* shouldn't get here if there's no stencil buffer */ diff --git a/src/gallium/drivers/softpipe/sp_quad_stipple.c b/src/gallium/drivers/softpipe/sp_quad_stipple.c index ccf37f6be5..05e862f097 100644 --- a/src/gallium/drivers/softpipe/sp_quad_stipple.c +++ b/src/gallium/drivers/softpipe/sp_quad_stipple.c @@ -4,8 +4,8 @@ */ #include "sp_context.h" -#include "sp_headers.h" #include "sp_quad.h" +#include "sp_quad_pipe.h" #include "pipe/p_defines.h" #include "util/u_memory.h" @@ -19,11 +19,13 @@ stipple_quad(struct quad_stage *qs, struct quad_header *quad) static const uint bit31 = 1 << 31; static const uint bit30 = 1 << 30; - if (quad->input.prim == PRIM_TRI) { + if (quad->input.prim == QUAD_PRIM_TRI) { struct softpipe_context *softpipe = qs->softpipe; /* need to invert Y to index into OpenGL's stipple pattern */ int y0, y1; uint stipple0, stipple1; + const int col0 = quad->input.x0 % 32; + if (softpipe->rasterizer->origin_lower_left) { y0 = softpipe->framebuffer.height - 1 - quad->input.y0; y1 = y0 - 1; @@ -32,12 +34,11 @@ stipple_quad(struct quad_stage *qs, struct quad_header *quad) y0 = quad->input.y0; y1 = y0 + 1; } + stipple0 = softpipe->poly_stipple.stipple[y0 % 32]; stipple1 = softpipe->poly_stipple.stipple[y1 % 32]; -#if 1 - { - const int col0 = quad->input.x0 % 32; + /* turn off quad mask bits that fail the stipple test */ if ((stipple0 & (bit31 >> col0)) == 0) quad->inout.mask &= ~MASK_TOP_LEFT; @@ -49,19 +50,11 @@ stipple_quad(struct quad_stage *qs, struct quad_header *quad) if ((stipple1 & (bit30 >> col0)) == 0) quad->inout.mask &= ~MASK_BOTTOM_RIGHT; - } -#else - /* We'd like to use this code, but we'd need to redefine - * MASK_TOP_LEFT to be (1 << 1) and MASK_TOP_RIGHT to be (1 << 0), - * and similarly for the BOTTOM bits. But that may have undesirable - * side effects elsewhere. - */ - const int col0 = 30 - (quad->input.x0 % 32); - quad->inout.mask &= (((stipple0 >> col0) & 0x3) | - (((stipple1 >> col0) & 0x3) << 2)); -#endif - if (!quad->inout.mask) + + if (!quad->inout.mask) { + /* all fragments failed stipple test, end of quad pipeline */ return; + } } qs->next->run(qs->next, quad); diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c index 2106ee1d23..b0d8e01426 100644 --- a/src/gallium/drivers/softpipe/sp_query.c +++ b/src/gallium/drivers/softpipe/sp_query.c @@ -37,8 +37,8 @@ #include "sp_query.h" struct softpipe_query { - uint64 start; - uint64 end; + uint64_t start; + uint64_t end; }; @@ -87,7 +87,7 @@ static boolean softpipe_get_query_result(struct pipe_context *pipe, struct pipe_query *q, boolean wait, - uint64 *result ) + uint64_t *result ) { struct softpipe_query *sq = softpipe_query(q); *result = sq->end - sq->start; diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 9644dbd168..7380a6ae2b 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -27,7 +27,8 @@ #include "util/u_memory.h" -#include "pipe/p_winsys.h" +#include "util/u_simple_screen.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_defines.h" #include "pipe/p_screen.h" @@ -55,7 +56,9 @@ softpipe_get_param(struct pipe_screen *screen, int param) { switch (param) { case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS: - return 8; + return PIPE_MAX_SAMPLERS; + case PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS: + return PIPE_MAX_SAMPLERS; case PIPE_CAP_NPOT_TEXTURES: return 1; case PIPE_CAP_TWO_SIDED_STENCIL: @@ -172,6 +175,7 @@ softpipe_create_screen(struct pipe_winsys *winsys) screen->base.is_format_supported = softpipe_is_format_supported; softpipe_init_screen_texture_funcs(&screen->base); + u_simple_screen_init(&screen->base); return &screen->base; } diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 13d8017393..0925653b5d 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -32,13 +32,12 @@ * \author Brian Paul */ -#include "sp_setup.h" - #include "sp_context.h" -#include "sp_headers.h" +#include "sp_prim_setup.h" #include "sp_quad.h" +#include "sp_quad_pipe.h" +#include "sp_setup.h" #include "sp_state.h" -#include "sp_prim_setup.h" #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_vertex.h" @@ -265,17 +264,20 @@ is_inf_or_nan(float x) } -static boolean cull_tri( struct setup_context *setup, - float det ) +/** + * Do triangle cull test using tri determinant (sign indicates orientation) + * \return true if triangle is to be culled. + */ +static INLINE boolean +cull_tri(const struct setup_context *setup, float det) { - if (det != 0) - { + if (det != 0) { /* if (det < 0 then Z points toward camera and triangle is * counter-clockwise winding. */ unsigned winding = (det < 0) ? PIPE_WINDING_CCW : PIPE_WINDING_CW; - - if ((winding & setup->winding) == 0) + + if ((winding & setup->winding) == 0) return FALSE; } @@ -968,7 +970,7 @@ void setup_tri( struct setup_context *setup, setup_tri_coefficients( setup ); setup_tri_edges( setup ); - setup->quad.input.prim = PRIM_TRI; + setup->quad.input.prim = QUAD_PRIM_TRI; setup->span.y = 0; setup->span.y_flags = 0; @@ -1009,7 +1011,7 @@ void setup_tri( struct setup_context *setup, * for a line. */ static void -line_linear_coeff(struct setup_context *setup, +line_linear_coeff(const struct setup_context *setup, struct tgsi_interp_coef *coef, uint vertSlot, uint i) { @@ -1029,9 +1031,9 @@ line_linear_coeff(struct setup_context *setup, * for a line. */ static void -line_persp_coeff(struct setup_context *setup, - struct tgsi_interp_coef *coef, - uint vertSlot, uint i) +line_persp_coeff(const struct setup_context *setup, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) { /* XXX double-check/verify this arithmetic */ const float a0 = setup->vmin[vertSlot][i] * setup->vmin[0][3]; @@ -1206,7 +1208,7 @@ setup_line(struct setup_context *setup, setup->quad.input.x0 = setup->quad.input.y0 = -1; setup->quad.inout.mask = 0x0; - setup->quad.input.prim = PRIM_LINE; + setup->quad.input.prim = QUAD_PRIM_LINE; /* XXX temporary: set coverage to 1.0 so the line appears * if AA mode happens to be enabled. */ @@ -1266,7 +1268,7 @@ setup_line(struct setup_context *setup, static void -point_persp_coeff(struct setup_context *setup, +point_persp_coeff(const struct setup_context *setup, const float (*vert)[4], struct tgsi_interp_coef *coef, uint vertSlot, uint i) @@ -1361,7 +1363,7 @@ setup_point( struct setup_context *setup, } } - setup->quad.input.prim = PRIM_POINT; + setup->quad.input.prim = QUAD_PRIM_POINT; if (halfSize <= 0.5 && !round) { /* special case for 1-pixel points */ @@ -1497,7 +1499,7 @@ void setup_prepare( struct setup_context *setup ) } /* Mark surfaces as defined now */ - for (i = 0; i < sp->framebuffer.num_cbufs; i++){ + for (i = 0; i < sp->framebuffer.nr_cbufs; i++){ if (sp->framebuffer.cbufs[i]) { sp->framebuffer.cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED; } diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 476ef3dc8f..6f558e6da5 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -69,7 +69,7 @@ struct sp_fragment_shader { void (*prepare)( const struct sp_fragment_shader *shader, struct tgsi_exec_machine *machine, - struct tgsi_sampler *samplers); + struct tgsi_sampler **samplers); /* Run the shader - this interface will get cleaned up in the * future: @@ -184,10 +184,10 @@ softpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags); void -softpipe_map_surfaces(struct softpipe_context *sp); +softpipe_map_transfers(struct softpipe_context *sp); void -softpipe_unmap_surfaces(struct softpipe_context *sp); +softpipe_unmap_transfers(struct softpipe_context *sp); void softpipe_map_texture_surfaces(struct softpipe_context *sp); diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index e5b609cf6c..4d01a9dbe1 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -32,7 +32,7 @@ #include "pipe/p_defines.h" #include "util/u_memory.h" #include "pipe/p_inlines.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" #include "tgsi/tgsi_dump.h" @@ -146,16 +146,15 @@ softpipe_set_constant_buffer(struct pipe_context *pipe, const struct pipe_constant_buffer *buf) { struct softpipe_context *softpipe = softpipe_context(pipe); - struct pipe_winsys *ws = pipe->winsys; + struct pipe_screen *screen = pipe->screen; assert(shader < PIPE_SHADER_TYPES); assert(index == 0); /* note: reference counting */ - winsys_buffer_reference(ws, + pipe_buffer_reference(screen, &softpipe->constants[shader].buffer, buf ? buf->buffer : NULL); - softpipe->constants[shader].size = buf ? buf->size : 0; softpipe->dirty |= SP_NEW_CONSTANTS; } diff --git a/src/gallium/drivers/softpipe/sp_state_surface.c b/src/gallium/drivers/softpipe/sp_state_surface.c index b5376e522d..1493c65884 100644 --- a/src/gallium/drivers/softpipe/sp_state_surface.c +++ b/src/gallium/drivers/softpipe/sp_state_surface.c @@ -64,7 +64,7 @@ softpipe_set_framebuffer_state(struct pipe_context *pipe, } } - sp->framebuffer.num_cbufs = fb->num_cbufs; + sp->framebuffer.nr_cbufs = fb->nr_cbufs; /* zbuf changing? */ if (sp->framebuffer.zsbuf != fb->zsbuf) { diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 49250ec084..adbd0cb7f0 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -2,6 +2,7 @@ * * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. + * Copyright 2008 VMware, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -33,17 +34,18 @@ */ #include "sp_context.h" -#include "sp_headers.h" +#include "sp_quad.h" #include "sp_surface.h" +#include "sp_texture.h" #include "sp_tex_sample.h" #include "sp_tile_cache.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" -#include "tgsi/tgsi_exec.h" #include "util/u_math.h" #include "util/u_memory.h" + /* * Note, the FRAC macro has to work perfectly. Otherwise you'll sometimes * see 1-pixel bands of improperly weighted linear-filtered textures. @@ -57,7 +59,11 @@ /** * Linear interpolation macro */ -#define LERP(T, A, B) ( (A) + (T) * ((B) - (A)) ) +static INLINE float +lerp(float a, float v0, float v1) +{ + return v0 + a * (v1 - v0); +} /** @@ -72,13 +78,28 @@ static INLINE float lerp_2d(float a, float b, float v00, float v10, float v01, float v11) { - const float temp0 = LERP(a, v00, v10); - const float temp1 = LERP(a, v01, v11); - return LERP(b, temp0, temp1); + const float temp0 = lerp(a, v00, v10); + const float temp1 = lerp(a, v01, v11); + return lerp(b, temp0, temp1); } /** + * As above, but 3D interpolation of 8 values. + */ +static INLINE float +lerp_3d(float a, float b, float c, + float v000, float v100, float v010, float v110, + float v001, float v101, float v011, float v111) +{ + const float temp0 = lerp_2d(a, b, v000, v100, v010, v110); + const float temp1 = lerp_2d(a, b, v001, v101, v011, v111); + return lerp(c, temp0, temp1); +} + + + +/** * If A is a signed integer, A % B doesn't give the right value for A < 0 * (in terms of texture repeat). Just casting to unsigned fixes that. */ @@ -86,250 +107,275 @@ lerp_2d(float a, float b, /** - * Apply texture coord wrapping mode and return integer texture index. + * Apply texture coord wrapping mode and return integer texture indexes + * for a vector of four texcoords (S or T or P). * \param wrapMode PIPE_TEX_WRAP_x - * \param s the texcoord + * \param s the incoming texcoords * \param size the texture image size + * \param icoord returns the integer texcoords * \return integer texture index */ -static INLINE int -nearest_texcoord(unsigned wrapMode, float s, unsigned size) +static INLINE void +nearest_texcoord_4(unsigned wrapMode, const float s[4], unsigned size, + int icoord[4]) { - int i; + uint ch; switch (wrapMode) { case PIPE_TEX_WRAP_REPEAT: /* s limited to [0,1) */ /* i limited to [0,size-1] */ - i = util_ifloor(s * size); - i = REMAINDER(i, size); - return i; + for (ch = 0; ch < 4; ch++) { + int i = util_ifloor(s[ch] * size); + icoord[ch] = REMAINDER(i, size); + } + return; case PIPE_TEX_WRAP_CLAMP: /* s limited to [0,1] */ /* i limited to [0,size-1] */ - if (s <= 0.0F) - i = 0; - else if (s >= 1.0F) - i = size - 1; - else - i = util_ifloor(s * size); - return i; + for (ch = 0; ch < 4; ch++) { + if (s[ch] <= 0.0F) + icoord[ch] = 0; + else if (s[ch] >= 1.0F) + icoord[ch] = size - 1; + else + icoord[ch] = util_ifloor(s[ch] * size); + } + return; case PIPE_TEX_WRAP_CLAMP_TO_EDGE: { /* s limited to [min,max] */ /* i limited to [0, size-1] */ const float min = 1.0F / (2.0F * size); const float max = 1.0F - min; - if (s < min) - i = 0; - else if (s > max) - i = size - 1; - else - i = util_ifloor(s * size); + for (ch = 0; ch < 4; ch++) { + if (s[ch] < min) + icoord[ch] = 0; + else if (s[ch] > max) + icoord[ch] = size - 1; + else + icoord[ch] = util_ifloor(s[ch] * size); + } } - return i; + return; case PIPE_TEX_WRAP_CLAMP_TO_BORDER: { /* s limited to [min,max] */ /* i limited to [-1, size] */ const float min = -1.0F / (2.0F * size); const float max = 1.0F - min; - if (s <= min) - i = -1; - else if (s >= max) - i = size; - else - i = util_ifloor(s * size); + for (ch = 0; ch < 4; ch++) { + if (s[ch] <= min) + icoord[ch] = -1; + else if (s[ch] >= max) + icoord[ch] = size; + else + icoord[ch] = util_ifloor(s[ch] * size); + } } - return i; + return; case PIPE_TEX_WRAP_MIRROR_REPEAT: { const float min = 1.0F / (2.0F * size); const float max = 1.0F - min; - const int flr = util_ifloor(s); - float u; - if (flr & 1) - u = 1.0F - (s - (float) flr); - else - u = s - (float) flr; - if (u < min) - i = 0; - else if (u > max) - i = size - 1; - else - i = util_ifloor(u * size); + for (ch = 0; ch < 4; ch++) { + const int flr = util_ifloor(s[ch]); + float u; + if (flr & 1) + u = 1.0F - (s[ch] - (float) flr); + else + u = s[ch] - (float) flr; + if (u < min) + icoord[ch] = 0; + else if (u > max) + icoord[ch] = size - 1; + else + icoord[ch] = util_ifloor(u * size); + } } - return i; + return; case PIPE_TEX_WRAP_MIRROR_CLAMP: - { + for (ch = 0; ch < 4; ch++) { /* s limited to [0,1] */ /* i limited to [0,size-1] */ - const float u = fabsf(s); + const float u = fabsf(s[ch]); if (u <= 0.0F) - i = 0; + icoord[ch] = 0; else if (u >= 1.0F) - i = size - 1; + icoord[ch] = size - 1; else - i = util_ifloor(u * size); + icoord[ch] = util_ifloor(u * size); } - return i; + return; case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: { /* s limited to [min,max] */ /* i limited to [0, size-1] */ const float min = 1.0F / (2.0F * size); const float max = 1.0F - min; - const float u = fabsf(s); - if (u < min) - i = 0; - else if (u > max) - i = size - 1; - else - i = util_ifloor(u * size); + for (ch = 0; ch < 4; ch++) { + const float u = fabsf(s[ch]); + if (u < min) + icoord[ch] = 0; + else if (u > max) + icoord[ch] = size - 1; + else + icoord[ch] = util_ifloor(u * size); + } } - return i; + return; case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: { /* s limited to [min,max] */ /* i limited to [0, size-1] */ const float min = -1.0F / (2.0F * size); const float max = 1.0F - min; - const float u = fabsf(s); - if (u < min) - i = -1; - else if (u > max) - i = size; - else - i = util_ifloor(u * size); + for (ch = 0; ch < 4; ch++) { + const float u = fabsf(s[ch]); + if (u < min) + icoord[ch] = -1; + else if (u > max) + icoord[ch] = size; + else + icoord[ch] = util_ifloor(u * size); + } } - return i; + return; default: assert(0); - return 0; } } /** - * Used to compute texel locations for linear sampling. + * Used to compute texel locations for linear sampling for four texcoords. * \param wrapMode PIPE_TEX_WRAP_x - * \param s the texcoord + * \param s the texcoords * \param size the texture image size - * \param i0 returns first texture index - * \param i1 returns second texture index (usually *i0 + 1) - * \param a returns blend factor/weight between texture indexes + * \param icoord0 returns first texture indexes + * \param icoord1 returns second texture indexes (usually icoord0 + 1) + * \param w returns blend factor/weight between texture indexes + * \param icoord returns the computed integer texture coords */ static INLINE void -linear_texcoord(unsigned wrapMode, float s, unsigned size, - int *i0, int *i1, float *a) +linear_texcoord_4(unsigned wrapMode, const float s[4], unsigned size, + int icoord0[4], int icoord1[4], float w[4]) { - float u; + uint ch; + switch (wrapMode) { case PIPE_TEX_WRAP_REPEAT: - u = s * size - 0.5F; - *i0 = REMAINDER(util_ifloor(u), size); - *i1 = REMAINDER(*i0 + 1, size); - break; + for (ch = 0; ch < 4; ch++) { + float u = s[ch] * size - 0.5F; + icoord0[ch] = REMAINDER(util_ifloor(u), size); + icoord1[ch] = REMAINDER(icoord0[ch] + 1, size); + w[ch] = FRAC(u); + } + break;; case PIPE_TEX_WRAP_CLAMP: - if (s <= 0.0F) - u = 0.0F; - else if (s >= 1.0F) - u = (float) size; - else - u = s * size; - u -= 0.5F; - *i0 = util_ifloor(u); - *i1 = *i0 + 1; - break; + for (ch = 0; ch < 4; ch++) { + float u = CLAMP(s[ch], 0.0F, 1.0F); + u = u * size - 0.5f; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + w[ch] = FRAC(u); + } + break;; case PIPE_TEX_WRAP_CLAMP_TO_EDGE: - if (s <= 0.0F) - u = 0.0F; - else if (s >= 1.0F) - u = (float) size; - else - u = s * size; - u -= 0.5F; - *i0 = util_ifloor(u); - *i1 = *i0 + 1; - if (*i0 < 0) - *i0 = 0; - if (*i1 >= (int) size) - *i1 = size - 1; - break; + for (ch = 0; ch < 4; ch++) { + float u = CLAMP(s[ch], 0.0F, 1.0F); + u = u * size - 0.5f; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + if (icoord0[ch] < 0) + icoord0[ch] = 0; + if (icoord1[ch] >= (int) size) + icoord1[ch] = size - 1; + w[ch] = FRAC(u); + } + break;; case PIPE_TEX_WRAP_CLAMP_TO_BORDER: { const float min = -1.0F / (2.0F * size); const float max = 1.0F - min; - if (s <= min) - u = min * size; - else if (s >= max) - u = max * size; - else - u = s * size; - u -= 0.5F; - *i0 = util_ifloor(u); - *i1 = *i0 + 1; + for (ch = 0; ch < 4; ch++) { + float u = CLAMP(s[ch], min, max); + u = u * size - 0.5f; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + w[ch] = FRAC(u); + } } - break; + break;; case PIPE_TEX_WRAP_MIRROR_REPEAT: - { - const int flr = util_ifloor(s); + for (ch = 0; ch < 4; ch++) { + const int flr = util_ifloor(s[ch]); + float u; if (flr & 1) - u = 1.0F - (s - (float) flr); + u = 1.0F - (s[ch] - (float) flr); else - u = s - (float) flr; - u = (u * size) - 0.5F; - *i0 = util_ifloor(u); - *i1 = *i0 + 1; - if (*i0 < 0) - *i0 = 0; - if (*i1 >= (int) size) - *i1 = size - 1; + u = s[ch] - (float) flr; + u = u * size - 0.5F; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + if (icoord0[ch] < 0) + icoord0[ch] = 0; + if (icoord1[ch] >= (int) size) + icoord1[ch] = size - 1; + w[ch] = FRAC(u); } - break; + break;; case PIPE_TEX_WRAP_MIRROR_CLAMP: - u = fabsf(s); - if (u >= 1.0F) - u = (float) size; - else - u *= size; - u -= 0.5F; - *i0 = util_ifloor(u); - *i1 = *i0 + 1; - break; + for (ch = 0; ch < 4; ch++) { + float u = fabsf(s[ch]); + if (u >= 1.0F) + u = (float) size; + else + u *= size; + u -= 0.5F; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + w[ch] = FRAC(u); + } + break;; case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: - u = fabsf(s); - if (u >= 1.0F) - u = (float) size; - else - u *= size; - u -= 0.5F; - *i0 = util_ifloor(u); - *i1 = *i0 + 1; - if (*i0 < 0) - *i0 = 0; - if (*i1 >= (int) size) - *i1 = size - 1; - break; + for (ch = 0; ch < 4; ch++) { + float u = fabsf(s[ch]); + if (u >= 1.0F) + u = (float) size; + else + u *= size; + u -= 0.5F; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + if (icoord0[ch] < 0) + icoord0[ch] = 0; + if (icoord1[ch] >= (int) size) + icoord1[ch] = size - 1; + w[ch] = FRAC(u); + } + break;; case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: { const float min = -1.0F / (2.0F * size); const float max = 1.0F - min; - u = fabsf(s); - if (u <= min) - u = min * size; - else if (u >= max) - u = max * size; - else - u *= size; - u -= 0.5F; - *i0 = util_ifloor(u); - *i1 = *i0 + 1; + for (ch = 0; ch < 4; ch++) { + float u = fabsf(s[ch]); + if (u <= min) + u = min * size; + else if (u >= max) + u = max * size; + else + u *= size; + u -= 0.5F; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + w[ch] = FRAC(u); + } } - break; + break;; default: assert(0); } - *a = FRAC(u); } @@ -337,21 +383,27 @@ linear_texcoord(unsigned wrapMode, float s, unsigned size, * For RECT textures / unnormalized texcoords * Only a subset of wrap modes supported. */ -static INLINE int -nearest_texcoord_unnorm(unsigned wrapMode, float s, unsigned size) +static INLINE void +nearest_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size, + int icoord[4]) { - int i; + uint ch; switch (wrapMode) { case PIPE_TEX_WRAP_CLAMP: - i = util_ifloor(s); - return CLAMP(i, 0, (int) size-1); + for (ch = 0; ch < 4; ch++) { + int i = util_ifloor(s[ch]); + icoord[ch]= CLAMP(i, 0, (int) size-1); + } + return; case PIPE_TEX_WRAP_CLAMP_TO_EDGE: /* fall-through */ case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - return util_ifloor( CLAMP(s, 0.5F, (float) size - 0.5F) ); + for (ch = 0; ch < 4; ch++) { + icoord[ch]= util_ifloor( CLAMP(s[ch], 0.5F, (float) size - 0.5F) ); + } + return; default: assert(0); - return 0; } } @@ -361,30 +413,36 @@ nearest_texcoord_unnorm(unsigned wrapMode, float s, unsigned size) * Only a subset of wrap modes supported. */ static INLINE void -linear_texcoord_unnorm(unsigned wrapMode, float s, unsigned size, - int *i0, int *i1, float *a) +linear_texcoord_unnorm_4(unsigned wrapMode, const float s[4], unsigned size, + int icoord0[4], int icoord1[4], float w[4]) { + uint ch; switch (wrapMode) { case PIPE_TEX_WRAP_CLAMP: - /* Not exactly what the spec says, but it matches NVIDIA output */ - s = CLAMP(s - 0.5F, 0.0f, (float) size - 1.0f); - *i0 = util_ifloor(s); - *i1 = *i0 + 1; - break; + for (ch = 0; ch < 4; ch++) { + /* Not exactly what the spec says, but it matches NVIDIA output */ + float u = CLAMP(s[ch] - 0.5F, 0.0f, (float) size - 1.0f); + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + w[ch] = FRAC(u); + } + return; case PIPE_TEX_WRAP_CLAMP_TO_EDGE: /* fall-through */ case PIPE_TEX_WRAP_CLAMP_TO_BORDER: - s = CLAMP(s, 0.5F, (float) size - 0.5F); - s -= 0.5F; - *i0 = util_ifloor(s); - *i1 = *i0 + 1; - if (*i1 > (int) size - 1) - *i1 = size - 1; + for (ch = 0; ch < 4; ch++) { + float u = CLAMP(s[ch], 0.5F, (float) size - 0.5F); + u -= 0.5F; + icoord0[ch] = util_ifloor(u); + icoord1[ch] = icoord0[ch] + 1; + if (icoord1[ch] > (int) size - 1) + icoord1[ch] = size - 1; + w[ch] = FRAC(u); + } break; default: assert(0); } - *a = FRAC(s); } @@ -463,7 +521,8 @@ choose_cube_face(float rx, float ry, float rz, float *newS, float *newT) * This is only done for fragment shaders, not vertex shaders. */ static float -compute_lambda(struct tgsi_sampler *sampler, +compute_lambda(const struct pipe_texture *tex, + const struct pipe_sampler_state *sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], @@ -471,7 +530,7 @@ compute_lambda(struct tgsi_sampler *sampler, { float rho, lambda; - assert(sampler->state->normalized_coords); + assert(sampler->normalized_coords); assert(s); { @@ -479,7 +538,7 @@ compute_lambda(struct tgsi_sampler *sampler, float dsdy = s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]; dsdx = fabsf(dsdx); dsdy = fabsf(dsdy); - rho = MAX2(dsdx, dsdy) * sampler->texture->width[0]; + rho = MAX2(dsdx, dsdy) * tex->width[0]; } if (t) { float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]; @@ -487,7 +546,7 @@ compute_lambda(struct tgsi_sampler *sampler, float max; dtdx = fabsf(dtdx); dtdy = fabsf(dtdy); - max = MAX2(dtdx, dtdy) * sampler->texture->height[0]; + max = MAX2(dtdx, dtdy) * tex->height[0]; rho = MAX2(rho, max); } if (p) { @@ -496,13 +555,13 @@ compute_lambda(struct tgsi_sampler *sampler, float max; dpdx = fabsf(dpdx); dpdy = fabsf(dpdy); - max = MAX2(dpdx, dpdy) * sampler->texture->depth[0]; + max = MAX2(dpdx, dpdy) * tex->depth[0]; rho = MAX2(rho, max); } lambda = util_fast_log2(rho); - lambda += lodbias + sampler->state->lod_bias; - lambda = CLAMP(lambda, sampler->state->min_lod, sampler->state->max_lod); + lambda += lodbias + sampler->lod_bias; + lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod); return lambda; } @@ -514,68 +573,74 @@ compute_lambda(struct tgsi_sampler *sampler, * 2. Determine if we're minifying or magnifying * 3. If minifying, choose mipmap levels * 4. Return image filter to use within mipmap images + * \param level0 Returns first mipmap level to sample from + * \param level1 Returns second mipmap level to sample from + * \param levelBlend Returns blend factor between levels, in [0,1] + * \param imgFilter Returns either the min or mag filter, depending on lambda */ static void -choose_mipmap_levels(struct tgsi_sampler *sampler, +choose_mipmap_levels(const struct pipe_texture *texture, + const struct pipe_sampler_state *sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], + boolean computeLambda, float lodbias, unsigned *level0, unsigned *level1, float *levelBlend, unsigned *imgFilter) { - if (sampler->state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { + if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { /* no mipmap selection needed */ - *level0 = *level1 = CLAMP((int) sampler->state->min_lod, - 0, (int) sampler->texture->last_level); + *level0 = *level1 = CLAMP((int) sampler->min_lod, + 0, (int) texture->last_level); - if (sampler->state->min_img_filter != sampler->state->mag_img_filter) { + if (sampler->min_img_filter != sampler->mag_img_filter) { /* non-mipmapped texture, but still need to determine if doing * minification or magnification. */ - float lambda = compute_lambda(sampler, s, t, p, lodbias); + float lambda = compute_lambda(texture, sampler, s, t, p, lodbias); if (lambda <= 0.0) { - *imgFilter = sampler->state->mag_img_filter; + *imgFilter = sampler->mag_img_filter; } else { - *imgFilter = sampler->state->min_img_filter; + *imgFilter = sampler->min_img_filter; } } else { - *imgFilter = sampler->state->mag_img_filter; + *imgFilter = sampler->mag_img_filter; } } else { float lambda; - if (1) + if (computeLambda) /* fragment shader */ - lambda = compute_lambda(sampler, s, t, p, lodbias); + lambda = compute_lambda(texture, sampler, s, t, p, lodbias); else /* vertex shader */ lambda = lodbias; /* not really a bias, but absolute LOD */ if (lambda <= 0.0) { /* XXX threshold depends on the filter */ /* magnifying */ - *imgFilter = sampler->state->mag_img_filter; + *imgFilter = sampler->mag_img_filter; *level0 = *level1 = 0; } else { /* minifying */ - *imgFilter = sampler->state->min_img_filter; + *imgFilter = sampler->min_img_filter; /* choose mipmap level(s) and compute the blend factor between them */ - if (sampler->state->min_mip_filter == PIPE_TEX_MIPFILTER_NEAREST) { + if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NEAREST) { /* Nearest mipmap level */ const int lvl = (int) (lambda + 0.5); *level0 = - *level1 = CLAMP(lvl, 0, (int) sampler->texture->last_level); + *level1 = CLAMP(lvl, 0, (int) texture->last_level); } else { /* Linear interpolation between mipmap levels */ const int lvl = (int) lambda; - *level0 = CLAMP(lvl, 0, (int) sampler->texture->last_level); - *level1 = CLAMP(lvl + 1, 0, (int) sampler->texture->last_level); + *level0 = CLAMP(lvl, 0, (int) texture->last_level); + *level1 = CLAMP(lvl + 1, 0, (int) texture->last_level); *levelBlend = FRAC(lambda); /* blending weight between levels */ } } @@ -598,23 +663,29 @@ choose_mipmap_levels(struct tgsi_sampler *sampler, * sp_get_cached_tile_tex() function. Also, get 4 texels instead of 1... */ static void -get_texel(struct tgsi_sampler *sampler, +get_texel(const struct tgsi_sampler *tgsi_sampler, unsigned face, unsigned level, int x, int y, int z, float rgba[NUM_CHANNELS][QUAD_SIZE], unsigned j) { - if (x < 0 || x >= (int) sampler->texture->width[level] || - y < 0 || y >= (int) sampler->texture->height[level] || - z < 0 || z >= (int) sampler->texture->depth[level]) { - rgba[0][j] = sampler->state->border_color[0]; - rgba[1][j] = sampler->state->border_color[1]; - rgba[2][j] = sampler->state->border_color[2]; - rgba[3][j] = sampler->state->border_color[3]; + const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); + struct softpipe_context *sp = samp->sp; + const uint unit = samp->unit; + const struct pipe_texture *texture = sp->texture[unit]; + const struct pipe_sampler_state *sampler = sp->sampler[unit]; + + if (x < 0 || x >= (int) texture->width[level] || + y < 0 || y >= (int) texture->height[level] || + z < 0 || z >= (int) texture->depth[level]) { + rgba[0][j] = sampler->border_color[0]; + rgba[1][j] = sampler->border_color[1]; + rgba[2][j] = sampler->border_color[2]; + rgba[3][j] = sampler->border_color[3]; } else { const int tx = x % TILE_SIZE; const int ty = y % TILE_SIZE; const struct softpipe_cached_tile *tile - = sp_get_cached_tile_tex(sampler->pipe, sampler->cache, + = sp_get_cached_tile_tex(sp, samp->cache, x, y, z, face, level); rgba[0][j] = tile->data.color[ty][tx][0]; rgba[1][j] = tile->data.color[ty][tx][1]; @@ -624,7 +695,7 @@ get_texel(struct tgsi_sampler *sampler, { debug_printf("Get texel %f %f %f %f from %s\n", rgba[0][j], rgba[1][j], rgba[2][j], rgba[3][j], - pf_name(sampler->texture->format)); + pf_name(texture->format)); } } } @@ -682,103 +753,124 @@ shadow_compare(uint compare_func, * Could probably extend for 3D... */ static void -sp_get_samples_2d_common(struct tgsi_sampler *sampler, +sp_get_samples_2d_common(const struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], + boolean computeLambda, float lodbias, float rgba[NUM_CHANNELS][QUAD_SIZE], const unsigned faces[4]) { - const uint compare_func = sampler->state->compare_func; + const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); + const struct softpipe_context *sp = samp->sp; + const uint unit = samp->unit; + const struct pipe_texture *texture = sp->texture[unit]; + const struct pipe_sampler_state *sampler = sp->sampler[unit]; + const uint compare_func = sampler->compare_func; unsigned level0, level1, j, imgFilter; int width, height; float levelBlend; - choose_mipmap_levels(sampler, s, t, p, lodbias, + choose_mipmap_levels(texture, sampler, s, t, p, computeLambda, lodbias, &level0, &level1, &levelBlend, &imgFilter); - assert(sampler->state->normalized_coords); + assert(sampler->normalized_coords); - width = sampler->texture->width[level0]; - height = sampler->texture->height[level0]; + width = texture->width[level0]; + height = texture->height[level0]; assert(width > 0); switch (imgFilter) { case PIPE_TEX_FILTER_NEAREST: - for (j = 0; j < QUAD_SIZE; j++) { - int x = nearest_texcoord(sampler->state->wrap_s, s[j], width); - int y = nearest_texcoord(sampler->state->wrap_t, t[j], height); - get_texel(sampler, faces[j], level0, x, y, 0, rgba, j); - if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - shadow_compare(compare_func, rgba, p, j); - } - - if (level0 != level1) { - /* get texels from second mipmap level and blend */ - float rgba2[4][4]; - unsigned c; - x = x / 2; - y = y / 2; - get_texel(sampler, faces[j], level1, x, y, 0, rgba2, j); - if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){ - shadow_compare(compare_func, rgba2, p, j); + { + int x[4], y[4]; + nearest_texcoord_4(sampler->wrap_s, s, width, x); + nearest_texcoord_4(sampler->wrap_t, t, height, y); + + for (j = 0; j < QUAD_SIZE; j++) { + get_texel(tgsi_sampler, faces[j], level0, x[j], y[j], 0, rgba, j); + if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + shadow_compare(compare_func, rgba, p, j); } - for (c = 0; c < NUM_CHANNELS; c++) { - rgba[c][j] = LERP(levelBlend, rgba[c][j], rgba2[c][j]); + if (level0 != level1) { + /* get texels from second mipmap level and blend */ + float rgba2[4][4]; + unsigned c; + x[j] /= 2; + y[j] /= 2; + get_texel(tgsi_sampler, faces[j], level1, x[j], y[j], 0, + rgba2, j); + if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){ + shadow_compare(compare_func, rgba2, p, j); + } + + for (c = 0; c < NUM_CHANNELS; c++) { + rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); + } } } } break; case PIPE_TEX_FILTER_LINEAR: case PIPE_TEX_FILTER_ANISO: - for (j = 0; j < QUAD_SIZE; j++) { - float tx[4][4], a, b; - int x0, y0, x1, y1, c; - linear_texcoord(sampler->state->wrap_s, s[j], width, &x0, &x1, &a); - linear_texcoord(sampler->state->wrap_t, t[j], height, &y0, &y1, &b); - get_texel(sampler, faces[j], level0, x0, y0, 0, tx, 0); - get_texel(sampler, faces[j], level0, x1, y0, 0, tx, 1); - get_texel(sampler, faces[j], level0, x0, y1, 0, tx, 2); - get_texel(sampler, faces[j], level0, x1, y1, 0, tx, 3); - if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - shadow_compare(compare_func, tx, p, 0); - shadow_compare(compare_func, tx, p, 1); - shadow_compare(compare_func, tx, p, 2); - shadow_compare(compare_func, tx, p, 3); - } - - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp_2d(a, b, tx[c][0], tx[c][1], tx[c][2], tx[c][3]); - } - - if (level0 != level1) { - /* get texels from second mipmap level and blend */ - float rgba2[4][4]; - x0 = x0 / 2; - y0 = y0 / 2; - x1 = x1 / 2; - y1 = y1 / 2; - get_texel(sampler, faces[j], level1, x0, y0, 0, tx, 0); - get_texel(sampler, faces[j], level1, x1, y0, 0, tx, 1); - get_texel(sampler, faces[j], level1, x0, y1, 0, tx, 2); - get_texel(sampler, faces[j], level1, x1, y1, 0, tx, 3); - if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){ + { + int x0[4], y0[4], x1[4], y1[4]; + float xw[4], yw[4]; /* weights */ + + linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw); + linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw); + + for (j = 0; j < QUAD_SIZE; j++) { + float tx[4][4]; /* texels */ + int c; + get_texel(tgsi_sampler, faces[j], level0, x0[j], y0[j], 0, tx, 0); + get_texel(tgsi_sampler, faces[j], level0, x1[j], y0[j], 0, tx, 1); + get_texel(tgsi_sampler, faces[j], level0, x0[j], y1[j], 0, tx, 2); + get_texel(tgsi_sampler, faces[j], level0, x1[j], y1[j], 0, tx, 3); + if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { shadow_compare(compare_func, tx, p, 0); shadow_compare(compare_func, tx, p, 1); shadow_compare(compare_func, tx, p, 2); shadow_compare(compare_func, tx, p, 3); } + /* interpolate R, G, B, A */ for (c = 0; c < 4; c++) { - rgba2[c][j] = lerp_2d(a, b, - tx[c][0], tx[c][1], tx[c][2], tx[c][3]); + rgba[c][j] = lerp_2d(xw[j], yw[j], + tx[c][0], tx[c][1], + tx[c][2], tx[c][3]); } - for (c = 0; c < NUM_CHANNELS; c++) { - rgba[c][j] = LERP(levelBlend, rgba[c][j], rgba2[c][j]); + if (level0 != level1) { + /* get texels from second mipmap level and blend */ + float rgba2[4][4]; + x0[j] /= 2; + y0[j] /= 2; + x1[j] /= 2; + y1[j] /= 2; + get_texel(tgsi_sampler, faces[j], level1, x0[j], y0[j], 0, tx, 0); + get_texel(tgsi_sampler, faces[j], level1, x1[j], y0[j], 0, tx, 1); + get_texel(tgsi_sampler, faces[j], level1, x0[j], y1[j], 0, tx, 2); + get_texel(tgsi_sampler, faces[j], level1, x1[j], y1[j], 0, tx, 3); + if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){ + shadow_compare(compare_func, tx, p, 0); + shadow_compare(compare_func, tx, p, 1); + shadow_compare(compare_func, tx, p, 2); + shadow_compare(compare_func, tx, p, 3); + } + + /* interpolate R, G, B, A */ + for (c = 0; c < 4; c++) { + rgba2[c][j] = lerp_2d(xw[j], yw[j], + tx[c][0], tx[c][1], tx[c][2], tx[c][3]); + } + + for (c = 0; c < NUM_CHANNELS; c++) { + rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); + } } } } @@ -789,55 +881,65 @@ sp_get_samples_2d_common(struct tgsi_sampler *sampler, } -static void -sp_get_samples_1d(struct tgsi_sampler *sampler, +static INLINE void +sp_get_samples_1d(const struct tgsi_sampler *sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], + boolean computeLambda, float lodbias, float rgba[NUM_CHANNELS][QUAD_SIZE]) { static const unsigned faces[4] = {0, 0, 0, 0}; static const float tzero[4] = {0, 0, 0, 0}; - sp_get_samples_2d_common(sampler, s, tzero, NULL, lodbias, rgba, faces); + sp_get_samples_2d_common(sampler, s, tzero, NULL, + computeLambda, lodbias, rgba, faces); } -static void -sp_get_samples_2d(struct tgsi_sampler *sampler, +static INLINE void +sp_get_samples_2d(const struct tgsi_sampler *sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], + boolean computeLambda, float lodbias, float rgba[NUM_CHANNELS][QUAD_SIZE]) { static const unsigned faces[4] = {0, 0, 0, 0}; - sp_get_samples_2d_common(sampler, s, t, p, lodbias, rgba, faces); + sp_get_samples_2d_common(sampler, s, t, p, + computeLambda, lodbias, rgba, faces); } -static void -sp_get_samples_3d(struct tgsi_sampler *sampler, +static INLINE void +sp_get_samples_3d(const struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], + boolean computeLambda, float lodbias, float rgba[NUM_CHANNELS][QUAD_SIZE]) { + const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); + const struct softpipe_context *sp = samp->sp; + const uint unit = samp->unit; + const struct pipe_texture *texture = sp->texture[unit]; + const struct pipe_sampler_state *sampler = sp->sampler[unit]; /* get/map pipe_surfaces corresponding to 3D tex slices */ unsigned level0, level1, j, imgFilter; int width, height, depth; float levelBlend; const uint face = 0; - choose_mipmap_levels(sampler, s, t, p, lodbias, + choose_mipmap_levels(texture, sampler, s, t, p, computeLambda, lodbias, &level0, &level1, &levelBlend, &imgFilter); - assert(sampler->state->normalized_coords); + assert(sampler->normalized_coords); - width = sampler->texture->width[level0]; - height = sampler->texture->height[level0]; - depth = sampler->texture->depth[level0]; + width = texture->width[level0]; + height = texture->height[level0]; + depth = texture->depth[level0]; assert(width > 0); assert(height > 0); @@ -845,89 +947,89 @@ sp_get_samples_3d(struct tgsi_sampler *sampler, switch (imgFilter) { case PIPE_TEX_FILTER_NEAREST: - for (j = 0; j < QUAD_SIZE; j++) { - int x = nearest_texcoord(sampler->state->wrap_s, s[j], width); - int y = nearest_texcoord(sampler->state->wrap_t, t[j], height); - int z = nearest_texcoord(sampler->state->wrap_r, p[j], depth); - get_texel(sampler, face, level0, x, y, z, rgba, j); - - if (level0 != level1) { - /* get texels from second mipmap level and blend */ - float rgba2[4][4]; - unsigned c; - x /= 2; - y /= 2; - z /= 2; - get_texel(sampler, face, level1, x, y, z, rgba2, j); - for (c = 0; c < NUM_CHANNELS; c++) { - rgba[c][j] = LERP(levelBlend, rgba2[c][j], rgba[c][j]); + { + int x[4], y[4], z[4]; + nearest_texcoord_4(sampler->wrap_s, s, width, x); + nearest_texcoord_4(sampler->wrap_t, t, height, y); + nearest_texcoord_4(sampler->wrap_r, p, depth, z); + for (j = 0; j < QUAD_SIZE; j++) { + get_texel(tgsi_sampler, face, level0, x[j], y[j], z[j], rgba, j); + if (level0 != level1) { + /* get texels from second mipmap level and blend */ + float rgba2[4][4]; + unsigned c; + x[j] /= 2; + y[j] /= 2; + z[j] /= 2; + get_texel(tgsi_sampler, face, level1, x[j], y[j], z[j], rgba2, j); + for (c = 0; c < NUM_CHANNELS; c++) { + rgba[c][j] = lerp(levelBlend, rgba2[c][j], rgba[c][j]); + } } } } break; case PIPE_TEX_FILTER_LINEAR: case PIPE_TEX_FILTER_ANISO: - for (j = 0; j < QUAD_SIZE; j++) { - float texel0[4][4], texel1[4][4]; - float xw, yw, zw; /* interpolation weights */ - int x0, x1, y0, y1, z0, z1, c; - linear_texcoord(sampler->state->wrap_s, s[j], width, &x0, &x1, &xw); - linear_texcoord(sampler->state->wrap_t, t[j], height, &y0, &y1, &yw); - linear_texcoord(sampler->state->wrap_r, p[j], depth, &z0, &z1, &zw); - get_texel(sampler, face, level0, x0, y0, z0, texel0, 0); - get_texel(sampler, face, level0, x1, y0, z0, texel0, 1); - get_texel(sampler, face, level0, x0, y1, z0, texel0, 2); - get_texel(sampler, face, level0, x1, y1, z0, texel0, 3); - get_texel(sampler, face, level0, x0, y0, z1, texel1, 0); - get_texel(sampler, face, level0, x1, y0, z1, texel1, 1); - get_texel(sampler, face, level0, x0, y1, z1, texel1, 2); - get_texel(sampler, face, level0, x1, y1, z1, texel1, 3); - - /* 3D lerp */ - for (c = 0; c < 4; c++) { - float ctemp0[4][4], ctemp1[4][4]; - ctemp0[c][j] = lerp_2d(xw, yw, - texel0[c][0], texel0[c][1], - texel0[c][2], texel0[c][3]); - ctemp1[c][j] = lerp_2d(xw, yw, - texel1[c][0], texel1[c][1], - texel1[c][2], texel1[c][3]); - rgba[c][j] = LERP(zw, ctemp0[c][j], ctemp1[c][j]); - } - - if (level0 != level1) { - /* get texels from second mipmap level and blend */ - float rgba2[4][4]; - x0 /= 2; - y0 /= 2; - z0 /= 2; - x1 /= 2; - y1 /= 2; - z1 /= 2; - get_texel(sampler, face, level1, x0, y0, z0, texel0, 0); - get_texel(sampler, face, level1, x1, y0, z0, texel0, 1); - get_texel(sampler, face, level1, x0, y1, z0, texel0, 2); - get_texel(sampler, face, level1, x1, y1, z0, texel0, 3); - get_texel(sampler, face, level1, x0, y0, z1, texel1, 0); - get_texel(sampler, face, level1, x1, y0, z1, texel1, 1); - get_texel(sampler, face, level1, x0, y1, z1, texel1, 2); - get_texel(sampler, face, level1, x1, y1, z1, texel1, 3); - - /* 3D lerp */ + { + int x0[4], x1[4], y0[4], y1[4], z0[4], z1[4]; + float xw[4], yw[4], zw[4]; /* interpolation weights */ + linear_texcoord_4(sampler->wrap_s, s, width, x0, x1, xw); + linear_texcoord_4(sampler->wrap_t, t, height, y0, y1, yw); + linear_texcoord_4(sampler->wrap_r, p, depth, z0, z1, zw); + + for (j = 0; j < QUAD_SIZE; j++) { + int c; + float tx0[4][4], tx1[4][4]; + get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z0[j], tx0, 0); + get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z0[j], tx0, 1); + get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z0[j], tx0, 2); + get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z0[j], tx0, 3); + get_texel(tgsi_sampler, face, level0, x0[j], y0[j], z1[j], tx1, 0); + get_texel(tgsi_sampler, face, level0, x1[j], y0[j], z1[j], tx1, 1); + get_texel(tgsi_sampler, face, level0, x0[j], y1[j], z1[j], tx1, 2); + get_texel(tgsi_sampler, face, level0, x1[j], y1[j], z1[j], tx1, 3); + + /* interpolate R, G, B, A */ for (c = 0; c < 4; c++) { - float ctemp0[4][4], ctemp1[4][4]; - ctemp0[c][j] = lerp_2d(xw, yw, - texel0[c][0], texel0[c][1], - texel0[c][2], texel0[c][3]); - ctemp1[c][j] = lerp_2d(xw, yw, - texel1[c][0], texel1[c][1], - texel1[c][2], texel1[c][3]); - rgba2[c][j] = LERP(zw, ctemp0[c][j], ctemp1[c][j]); + rgba[c][j] = lerp_3d(xw[j], yw[j], zw[j], + tx0[c][0], tx0[c][1], + tx0[c][2], tx0[c][3], + tx1[c][0], tx1[c][1], + tx1[c][2], tx1[c][3]); } - /* blend mipmap levels */ - for (c = 0; c < NUM_CHANNELS; c++) { - rgba[c][j] = LERP(levelBlend, rgba[c][j], rgba2[c][j]); + if (level0 != level1) { + /* get texels from second mipmap level and blend */ + float rgba2[4][4]; + x0[j] /= 2; + y0[j] /= 2; + z0[j] /= 2; + x1[j] /= 2; + y1[j] /= 2; + z1[j] /= 2; + get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z0[j], tx0, 0); + get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z0[j], tx0, 1); + get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z0[j], tx0, 2); + get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z0[j], tx0, 3); + get_texel(tgsi_sampler, face, level1, x0[j], y0[j], z1[j], tx1, 0); + get_texel(tgsi_sampler, face, level1, x1[j], y0[j], z1[j], tx1, 1); + get_texel(tgsi_sampler, face, level1, x0[j], y1[j], z1[j], tx1, 2); + get_texel(tgsi_sampler, face, level1, x1[j], y1[j], z1[j], tx1, 3); + + /* interpolate R, G, B, A */ + for (c = 0; c < 4; c++) { + rgba2[c][j] = lerp_3d(xw[j], yw[j], zw[j], + tx0[c][0], tx0[c][1], + tx0[c][2], tx0[c][3], + tx1[c][0], tx1[c][1], + tx1[c][2], tx1[c][3]); + } + + /* blend mipmap levels */ + for (c = 0; c < NUM_CHANNELS; c++) { + rgba[c][j] = lerp(levelBlend, rgba[c][j], rgba2[c][j]); + } } } } @@ -939,10 +1041,11 @@ sp_get_samples_3d(struct tgsi_sampler *sampler, static void -sp_get_samples_cube(struct tgsi_sampler *sampler, +sp_get_samples_cube(const struct tgsi_sampler *sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], + boolean computeLambda, float lodbias, float rgba[NUM_CHANNELS][QUAD_SIZE]) { @@ -951,67 +1054,80 @@ sp_get_samples_cube(struct tgsi_sampler *sampler, for (j = 0; j < QUAD_SIZE; j++) { faces[j] = choose_cube_face(s[j], t[j], p[j], ssss + j, tttt + j); } - sp_get_samples_2d_common(sampler, ssss, tttt, NULL, lodbias, rgba, faces); + sp_get_samples_2d_common(sampler, ssss, tttt, NULL, + computeLambda, lodbias, rgba, faces); } static void -sp_get_samples_rect(struct tgsi_sampler *sampler, +sp_get_samples_rect(const struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], + boolean computeLambda, float lodbias, float rgba[NUM_CHANNELS][QUAD_SIZE]) { - //sp_get_samples_2d_common(sampler, s, t, p, lodbias, rgba, faces); - static const uint face = 0; - const uint compare_func = sampler->state->compare_func; + const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); + const struct softpipe_context *sp = samp->sp; + const uint unit = samp->unit; + const struct pipe_texture *texture = sp->texture[unit]; + const struct pipe_sampler_state *sampler = sp->sampler[unit]; + const uint face = 0; + const uint compare_func = sampler->compare_func; unsigned level0, level1, j, imgFilter; int width, height; float levelBlend; - choose_mipmap_levels(sampler, s, t, p, lodbias, + choose_mipmap_levels(texture, sampler, s, t, p, computeLambda, lodbias, &level0, &level1, &levelBlend, &imgFilter); /* texture RECTS cannot be mipmapped */ assert(level0 == level1); - width = sampler->texture->width[level0]; - height = sampler->texture->height[level0]; + width = texture->width[level0]; + height = texture->height[level0]; assert(width > 0); switch (imgFilter) { case PIPE_TEX_FILTER_NEAREST: - for (j = 0; j < QUAD_SIZE; j++) { - int x = nearest_texcoord_unnorm(sampler->state->wrap_s, s[j], width); - int y = nearest_texcoord_unnorm(sampler->state->wrap_t, t[j], height); - get_texel(sampler, face, level0, x, y, 0, rgba, j); - if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - shadow_compare(compare_func, rgba, p, j); + { + int x[4], y[4]; + nearest_texcoord_unnorm_4(sampler->wrap_s, s, width, x); + nearest_texcoord_unnorm_4(sampler->wrap_t, t, height, y); + for (j = 0; j < QUAD_SIZE; j++) { + get_texel(tgsi_sampler, face, level0, x[j], y[j], 0, rgba, j); + if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + shadow_compare(compare_func, rgba, p, j); + } } } break; case PIPE_TEX_FILTER_LINEAR: case PIPE_TEX_FILTER_ANISO: - for (j = 0; j < QUAD_SIZE; j++) { - float tx[4][4], a, b; - int x0, y0, x1, y1, c; - linear_texcoord_unnorm(sampler->state->wrap_s, s[j], width, &x0, &x1, &a); - linear_texcoord_unnorm(sampler->state->wrap_t, t[j], height, &y0, &y1, &b); - get_texel(sampler, face, level0, x0, y0, 0, tx, 0); - get_texel(sampler, face, level0, x1, y0, 0, tx, 1); - get_texel(sampler, face, level0, x0, y1, 0, tx, 2); - get_texel(sampler, face, level0, x1, y1, 0, tx, 3); - if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { - shadow_compare(compare_func, tx, p, 0); - shadow_compare(compare_func, tx, p, 1); - shadow_compare(compare_func, tx, p, 2); - shadow_compare(compare_func, tx, p, 3); - } - - for (c = 0; c < 4; c++) { - rgba[c][j] = lerp_2d(a, b, tx[c][0], tx[c][1], tx[c][2], tx[c][3]); + { + int x0[4], y0[4], x1[4], y1[4]; + float xw[4], yw[4]; /* weights */ + linear_texcoord_unnorm_4(sampler->wrap_s, s, width, x0, x1, xw); + linear_texcoord_unnorm_4(sampler->wrap_t, t, height, y0, y1, yw); + for (j = 0; j < QUAD_SIZE; j++) { + float tx[4][4]; /* texels */ + int c; + get_texel(tgsi_sampler, face, level0, x0[j], y0[j], 0, tx, 0); + get_texel(tgsi_sampler, face, level0, x1[j], y0[j], 0, tx, 1); + get_texel(tgsi_sampler, face, level0, x0[j], y1[j], 0, tx, 2); + get_texel(tgsi_sampler, face, level0, x1[j], y1[j], 0, tx, 3); + if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + shadow_compare(compare_func, tx, p, 0); + shadow_compare(compare_func, tx, p, 1); + shadow_compare(compare_func, tx, p, 2); + shadow_compare(compare_func, tx, p, 3); + } + for (c = 0; c < 4; c++) { + rgba[c][j] = lerp_2d(xw[j], yw[j], + tx[c][0], tx[c][1], tx[c][2], tx[c][3]); + } } } break; @@ -1021,49 +1137,45 @@ sp_get_samples_rect(struct tgsi_sampler *sampler, } - - /** - * Called via tgsi_sampler::get_samples() - * Use the sampler's state setting to get a filtered RGBA value - * from the sampler's texture. - * - * XXX we can implement many versions of this function, each - * tightly coded for a specific combination of sampler state - * (nearest + repeat), (bilinear mipmap + clamp), etc. - * - * The update_samplers() function in st_atom_sampler.c could create - * a new tgsi_sampler object for each state combo it finds.... + * Common code for vertex/fragment program texture sampling. */ -void -sp_get_samples(struct tgsi_sampler *sampler, +static INLINE void +sp_get_samples(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], + boolean computeLambda, float lodbias, float rgba[NUM_CHANNELS][QUAD_SIZE]) { - if (!sampler->texture) + const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); + const struct softpipe_context *sp = samp->sp; + const uint unit = samp->unit; + const struct pipe_texture *texture = sp->texture[unit]; + const struct pipe_sampler_state *sampler = sp->sampler[unit]; + + if (!texture) return; - switch (sampler->texture->target) { + switch (texture->target) { case PIPE_TEXTURE_1D: - assert(sampler->state->normalized_coords); - sp_get_samples_1d(sampler, s, t, p, lodbias, rgba); + assert(sampler->normalized_coords); + sp_get_samples_1d(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba); break; case PIPE_TEXTURE_2D: - if (sampler->state->normalized_coords) - sp_get_samples_2d(sampler, s, t, p, lodbias, rgba); + if (sampler->normalized_coords) + sp_get_samples_2d(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba); else - sp_get_samples_rect(sampler, s, t, p, lodbias, rgba); + sp_get_samples_rect(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba); break; case PIPE_TEXTURE_3D: - assert(sampler->state->normalized_coords); - sp_get_samples_3d(sampler, s, t, p, lodbias, rgba); + assert(sampler->normalized_coords); + sp_get_samples_3d(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba); break; case PIPE_TEXTURE_CUBE: - assert(sampler->state->normalized_coords); - sp_get_samples_cube(sampler, s, t, p, lodbias, rgba); + assert(sampler->normalized_coords); + sp_get_samples_cube(tgsi_sampler, s, t, p, computeLambda, lodbias, rgba); break; default: assert(0); @@ -1084,3 +1196,34 @@ sp_get_samples(struct tgsi_sampler *sampler, #endif } + +/** + * Called via tgsi_sampler::get_samples() when running a fragment shader. + * Get four filtered RGBA values from the sampler's texture. + */ +void +sp_get_samples_fragment(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + sp_get_samples(tgsi_sampler, s, t, p, TRUE, lodbias, rgba); +} + + +/** + * Called via tgsi_sampler::get_samples() when running a vertex shader. + * Get four filtered RGBA values from the sampler's texture. + */ +void +sp_get_samples_vertex(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]) +{ + sp_get_samples(tgsi_sampler, s, t, p, FALSE, lodbias, rgba); +} diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h index 404bfd0c36..40d8eb2c2a 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.h +++ b/src/gallium/drivers/softpipe/sp_tex_sample.h @@ -1,17 +1,73 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #ifndef SP_TEX_SAMPLE_H #define SP_TEX_SAMPLE_H -struct tgsi_sampler; +#include "tgsi/tgsi_exec.h" + + +/** + * Subclass of tgsi_sampler + */ +struct sp_shader_sampler +{ + struct tgsi_sampler base; /**< base class */ + + uint unit; + struct softpipe_context *sp; + struct softpipe_tile_cache *cache; +}; + +static INLINE const struct sp_shader_sampler * +sp_shader_sampler(const struct tgsi_sampler *sampler) +{ + return (const struct sp_shader_sampler *) sampler; +} + + +extern void +sp_get_samples_fragment(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]); + extern void -sp_get_samples(struct tgsi_sampler *sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]); +sp_get_samples_vertex(struct tgsi_sampler *tgsi_sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]); #endif /* SP_TEX_SAMPLE_H */ diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index a64dc89f43..28a9784b16 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -33,7 +33,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -94,49 +94,23 @@ softpipe_texture_layout(struct pipe_screen *screen, return spt->buffer != NULL; } -/* Hack it up to use the old winsys->surface_alloc_storage() - * method for now: - */ static boolean softpipe_displaytarget_layout(struct pipe_screen *screen, struct softpipe_texture * spt) { struct pipe_winsys *ws = screen->winsys; - struct pipe_surface surf; - unsigned flags = (PIPE_BUFFER_USAGE_CPU_READ | - PIPE_BUFFER_USAGE_CPU_WRITE | - PIPE_BUFFER_USAGE_GPU_READ | - PIPE_BUFFER_USAGE_GPU_WRITE); - int ret; - - - memset(&surf, 0, sizeof(surf)); - - ret =ws->surface_alloc_storage( ws, - &surf, - spt->base.width[0], - spt->base.height[0], - spt->base.format, - flags, - spt->base.tex_usage); - if(ret != 0) - return FALSE; - - if (!surf.buffer) { - /* allocation failed */ - return FALSE; - } + unsigned usage = (PIPE_BUFFER_USAGE_CPU_READ_WRITE | + PIPE_BUFFER_USAGE_GPU_READ_WRITE); - /* Now extract the goodies: - */ spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width[0]); spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height[0]); - spt->stride[0] = surf.stride; - /* Transfer the reference: - */ - spt->buffer = surf.buffer; - surf.buffer = NULL; + spt->buffer = ws->surface_buffer_create( ws, + spt->base.width[0], + spt->base.height[0], + spt->base.format, + usage, + &spt->stride[0]); return spt->buffer != NULL; } @@ -231,28 +205,21 @@ softpipe_get_tex_surface(struct pipe_screen *screen, unsigned face, unsigned level, unsigned zslice, unsigned usage) { - struct pipe_winsys *ws = screen->winsys; struct softpipe_texture *spt = softpipe_texture(pt); struct pipe_surface *ps; assert(level <= pt->last_level); ps = CALLOC_STRUCT(pipe_surface); - ps->refcount = 1; if (ps) { - assert(ps->refcount); + ps->refcount = 1; pipe_texture_reference(&ps->texture, pt); - pipe_buffer_reference(screen, &ps->buffer, spt->buffer); ps->format = pt->format; - ps->block = pt->block; ps->width = pt->width[level]; ps->height = pt->height[level]; - ps->nblocksx = pt->nblocksx[level]; - ps->nblocksy = pt->nblocksy[level]; - ps->stride = spt->stride[level]; ps->offset = spt->level_offset[level]; ps->usage = usage; - + /* Because we are softpipe, anything that the state tracker * thought was going to be done with the GPU will actually get * done with the CPU. Let's adjust the flags to take that into @@ -278,8 +245,7 @@ softpipe_get_tex_surface(struct pipe_screen *screen, if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) { ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) * - ps->nblocksy * - ps->stride; + pt->nblocksy[level] * spt->stride[level]; } else { assert(face == 0); @@ -299,37 +265,108 @@ softpipe_tex_surface_release(struct pipe_screen *screen, * needed post-processing to put them into hardware layout, this is * where it would happen. For softpipe, nothing to do. */ - assert ((*s)->texture); + assert(surf->texture); if (--surf->refcount == 0) { - pipe_texture_reference(&surf->texture, NULL); - pipe_buffer_reference(screen, &surf->buffer, NULL); + pipe_texture_reference(&surf->texture, NULL); FREE(surf); } *s = NULL; } +static struct pipe_transfer * +softpipe_get_tex_transfer(struct pipe_screen *screen, + struct pipe_texture *texture, + unsigned face, unsigned level, unsigned zslice, + enum pipe_transfer_usage usage, + unsigned x, unsigned y, unsigned w, unsigned h) +{ + struct softpipe_texture *sptex = softpipe_texture(texture); + struct softpipe_transfer *spt; + struct pipe_transfer *pt; + + assert(texture); + assert(level <= texture->last_level); + + spt = CALLOC_STRUCT(softpipe_transfer); + pt = &spt->base; + if (spt) { + pt->refcount = 1; + pipe_texture_reference(&pt->texture, texture); + pt->format = texture->format; + pt->block = texture->block; + pt->x = x; + pt->y = y; + pt->width = w; + pt->height = h; + pt->nblocksx = texture->nblocksx[level]; + pt->nblocksy = texture->nblocksy[level]; + pt->stride = sptex->stride[level]; + spt->offset = sptex->level_offset[level]; + pt->usage = usage; + pt->face = face; + pt->level = level; + pt->zslice = zslice; + + if (texture->target == PIPE_TEXTURE_CUBE || + texture->target == PIPE_TEXTURE_3D) { + spt->offset += ((texture->target == PIPE_TEXTURE_CUBE) ? face : + zslice) * pt->nblocksy * pt->stride; + } + else { + assert(face == 0); + assert(zslice == 0); + } + } + return pt; +} + + +static void +softpipe_tex_transfer_release(struct pipe_screen *screen, + struct pipe_transfer **t) +{ + struct softpipe_transfer *transfer = softpipe_transfer(*t); + /* Effectively do the texture_update work here - if texture images + * needed post-processing to put them into hardware layout, this is + * where it would happen. For softpipe, nothing to do. + */ + assert (transfer->base.texture); + if (--transfer->base.refcount == 0) { + pipe_texture_reference(&transfer->base.texture, NULL); + FREE(transfer); + } + *t = NULL; +} + + static void * -softpipe_surface_map( struct pipe_screen *screen, - struct pipe_surface *surface, - unsigned flags ) +softpipe_transfer_map( struct pipe_screen *screen, + struct pipe_transfer *transfer ) { ubyte *map; + struct softpipe_texture *spt; + unsigned flags = 0; - if (flags & ~surface->usage) { - assert(0); - return NULL; + assert(transfer->texture); + spt = softpipe_texture(transfer->texture); + + if (transfer->usage != PIPE_TRANSFER_READ) { + flags |= PIPE_BUFFER_USAGE_CPU_WRITE; + } + + if (transfer->usage != PIPE_TRANSFER_WRITE) { + flags |= PIPE_BUFFER_USAGE_CPU_READ; } - map = pipe_buffer_map( screen, surface->buffer, flags ); + map = pipe_buffer_map(screen, spt->buffer, flags); if (map == NULL) return NULL; /* May want to different things here depending on read/write nature * of the map: */ - if (surface->texture && - (flags & PIPE_BUFFER_USAGE_CPU_WRITE)) + if (transfer->texture && transfer->usage != PIPE_TRANSFER_READ) { /* Do something to notify sharing contexts of a texture change. * In softpipe, that would mean flushing the texture cache. @@ -337,15 +374,22 @@ softpipe_surface_map( struct pipe_screen *screen, softpipe_screen(screen)->timestamp++; } - return map + surface->offset; + return map + softpipe_transfer(transfer)->offset + + transfer->y / transfer->block.height * transfer->stride + + transfer->x / transfer->block.width * transfer->block.size; } static void -softpipe_surface_unmap(struct pipe_screen *screen, - struct pipe_surface *surface) +softpipe_transfer_unmap(struct pipe_screen *screen, + struct pipe_transfer *transfer) { - pipe_buffer_unmap( screen, surface->buffer ); + struct softpipe_texture *spt; + + assert(transfer->texture); + spt = softpipe_texture(transfer->texture); + + pipe_buffer_unmap( screen, spt->buffer ); } @@ -365,6 +409,8 @@ softpipe_init_screen_texture_funcs(struct pipe_screen *screen) screen->get_tex_surface = softpipe_get_tex_surface; screen->tex_surface_release = softpipe_tex_surface_release; - screen->surface_map = softpipe_surface_map; - screen->surface_unmap = softpipe_surface_unmap; + screen->get_tex_transfer = softpipe_get_tex_transfer; + screen->tex_transfer_release = softpipe_tex_transfer_release; + screen->transfer_map = softpipe_transfer_map; + screen->transfer_unmap = softpipe_transfer_unmap; } diff --git a/src/gallium/drivers/softpipe/sp_texture.h b/src/gallium/drivers/softpipe/sp_texture.h index bf437a7c61..893aa7d11d 100644 --- a/src/gallium/drivers/softpipe/sp_texture.h +++ b/src/gallium/drivers/softpipe/sp_texture.h @@ -42,7 +42,7 @@ struct softpipe_texture struct pipe_texture base; unsigned long level_offset[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long stride[PIPE_MAX_TEXTURE_LEVELS]; + unsigned stride[PIPE_MAX_TEXTURE_LEVELS]; /* The data is held here: */ @@ -51,14 +51,27 @@ struct softpipe_texture boolean modified; }; +struct softpipe_transfer +{ + struct pipe_transfer base; + + unsigned long offset; +}; + -/** cast wrapper */ +/** cast wrappers */ static INLINE struct softpipe_texture * softpipe_texture(struct pipe_texture *pt) { return (struct softpipe_texture *) pt; } +static INLINE struct softpipe_transfer * +softpipe_transfer(struct pipe_transfer *pt) +{ + return (struct softpipe_transfer *) pt; +} + extern void softpipe_init_texture_funcs( struct softpipe_context *softpipe ); diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index b50c984513..593360aab0 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -26,7 +26,7 @@ **************************************************************************/ /** - * Framebuffer/surface tile caching. + * Texture tile caching. * * Author: * Brian Paul @@ -40,7 +40,7 @@ #include "sp_texture.h" #include "sp_tile_cache.h" -#define NUM_ENTRIES 32 +#define NUM_ENTRIES 50 /** XXX move these */ @@ -52,7 +52,8 @@ struct softpipe_tile_cache { struct pipe_screen *screen; struct pipe_surface *surface; /**< the surface we're caching */ - void *surface_map; + struct pipe_transfer *transfer; + void *transfer_map; struct pipe_texture *texture; /**< if caching a texture */ struct softpipe_cached_tile entries[NUM_ENTRIES]; uint clear_flags[(MAX_WIDTH / TILE_SIZE) * (MAX_HEIGHT / TILE_SIZE) / 32]; @@ -60,8 +61,8 @@ struct softpipe_tile_cache uint clear_val; boolean depth_stencil; /** Is the surface a depth/stencil format? */ - struct pipe_surface *tex_surf; - void *tex_surf_map; + struct pipe_transfer *tex_trans; + void *tex_trans_map; int tex_face, tex_level, tex_z; struct softpipe_cached_tile tile; /**< scratch tile for clears */ @@ -131,16 +132,19 @@ sp_create_tile_cache( struct pipe_screen *screen ) void sp_destroy_tile_cache(struct softpipe_tile_cache *tc) { + struct pipe_screen *screen; uint pos; for (pos = 0; pos < NUM_ENTRIES; pos++) { /*assert(tc->entries[pos].x < 0);*/ } - if (tc->surface) { - pipe_surface_reference(&tc->surface, NULL); + if (tc->transfer) { + screen = tc->transfer->texture->screen; + screen->tex_transfer_release(screen, &tc->transfer); } - if (tc->tex_surf) { - pipe_surface_reference(&tc->tex_surf, NULL); + if (tc->tex_trans) { + screen = tc->tex_trans->texture->screen; + screen->tex_transfer_release(screen, &tc->tex_trans); } FREE( tc ); @@ -156,18 +160,29 @@ sp_tile_cache_set_surface(struct softpipe_tile_cache *tc, { assert(!tc->texture); - if (tc->surface_map) { - tc->screen->surface_unmap(tc->screen, tc->surface); - tc->surface_map = NULL; + if (tc->transfer) { + struct pipe_screen *screen = tc->transfer->texture->screen; + + if (ps == tc->surface) + return; + + if (tc->transfer_map) { + tc->screen->transfer_unmap(tc->screen, tc->transfer); + tc->transfer_map = NULL; + } + + screen->tex_transfer_release(screen, &tc->transfer); } - pipe_surface_reference(&tc->surface, ps); + tc->surface = ps; + + if (ps) { + struct pipe_screen *screen = ps->texture->screen; - if (tc->surface) { - if (tc->surface_map) /* XXX: this is always NULL!? */ - tc->surface_map = tc->screen->surface_map(tc->screen, tc->surface, - PIPE_BUFFER_USAGE_CPU_READ | - PIPE_BUFFER_USAGE_CPU_WRITE); + tc->transfer = screen->get_tex_transfer(screen, ps->texture, ps->face, + ps->level, ps->zslice, + PIPE_TRANSFER_READ_WRITE, + 0, 0, ps->width, ps->height); tc->depth_stencil = (ps->format == PIPE_FORMAT_S8Z24_UNORM || ps->format == PIPE_FORMAT_X8Z24_UNORM || @@ -181,7 +196,7 @@ sp_tile_cache_set_surface(struct softpipe_tile_cache *tc, /** - * Return the surface being cached. + * Return the transfer being cached. */ struct pipe_surface * sp_tile_cache_get_surface(struct softpipe_tile_cache *tc) @@ -191,30 +206,27 @@ sp_tile_cache_get_surface(struct softpipe_tile_cache *tc) void -sp_tile_cache_map_surfaces(struct softpipe_tile_cache *tc) +sp_tile_cache_map_transfers(struct softpipe_tile_cache *tc) { - if (tc->surface && !tc->surface_map) - tc->surface_map = tc->screen->surface_map(tc->screen, tc->surface, - PIPE_BUFFER_USAGE_CPU_WRITE | - PIPE_BUFFER_USAGE_CPU_READ); - - if (tc->tex_surf && !tc->tex_surf_map) - tc->tex_surf_map = tc->screen->surface_map(tc->screen, tc->tex_surf, - PIPE_BUFFER_USAGE_CPU_READ); + if (tc->transfer && !tc->transfer_map) + tc->transfer_map = tc->screen->transfer_map(tc->screen, tc->transfer); + + if (tc->tex_trans && !tc->tex_trans_map) + tc->tex_trans_map = tc->screen->transfer_map(tc->screen, tc->tex_trans); } void -sp_tile_cache_unmap_surfaces(struct softpipe_tile_cache *tc) +sp_tile_cache_unmap_transfers(struct softpipe_tile_cache *tc) { - if (tc->surface_map) { - tc->screen->surface_unmap(tc->screen, tc->surface); - tc->surface_map = NULL; + if (tc->transfer_map) { + tc->screen->transfer_unmap(tc->screen, tc->transfer); + tc->transfer_map = NULL; } - if (tc->tex_surf_map) { - tc->screen->surface_unmap(tc->screen, tc->tex_surf); - tc->tex_surf_map = NULL; + if (tc->tex_trans_map) { + tc->screen->transfer_unmap(tc->screen, tc->tex_trans); + tc->tex_trans_map = NULL; } } @@ -229,15 +241,20 @@ sp_tile_cache_set_texture(struct pipe_context *pipe, { uint i; - assert(!tc->surface); + assert(!tc->transfer); pipe_texture_reference(&tc->texture, texture); - if (tc->tex_surf_map) { - tc->screen->surface_unmap(tc->screen, tc->tex_surf); - tc->tex_surf_map = NULL; + if (tc->transfer) { + struct pipe_screen *screen = tc->transfer->texture->screen; + + if (tc->tex_trans_map) { + tc->screen->transfer_unmap(tc->screen, tc->tex_trans); + tc->tex_trans_map = NULL; + } + + screen->tex_transfer_release(screen, &tc->tex_trans); } - pipe_surface_reference(&tc->tex_surf, NULL); /* mark as entries as invalid/empty */ /* XXX we should try to avoid this when the teximage hasn't changed */ @@ -328,20 +345,20 @@ static void sp_tile_cache_flush_clear(struct pipe_context *pipe, struct softpipe_tile_cache *tc) { - struct pipe_surface *ps = tc->surface; - const uint w = tc->surface->width; - const uint h = tc->surface->height; + struct pipe_transfer *pt = tc->transfer; + const uint w = tc->transfer->width; + const uint h = tc->transfer->height; uint x, y; uint numCleared = 0; /* clear the scratch tile to the clear value */ - clear_tile(&tc->tile, ps->format, tc->clear_val); + clear_tile(&tc->tile, pt->format, tc->clear_val); /* push the tile to all positions marked as clear */ for (y = 0; y < h; y += TILE_SIZE) { for (x = 0; x < w; x += TILE_SIZE) { if (is_clear_flag_set(tc->clear_flags, x, y)) { - pipe_put_tile_raw(ps, + pipe_put_tile_raw(pt, x, y, TILE_SIZE, TILE_SIZE, tc->tile.data.color32, 0/*STRIDE*/); @@ -359,28 +376,28 @@ sp_tile_cache_flush_clear(struct pipe_context *pipe, /** - * Flush the tile cache: write all dirty tiles back to the surface. + * Flush the tile cache: write all dirty tiles back to the transfer. * any tiles "flagged" as cleared will be "really" cleared. */ void sp_flush_tile_cache(struct softpipe_context *softpipe, struct softpipe_tile_cache *tc) { - struct pipe_surface *ps = tc->surface; + struct pipe_transfer *pt = tc->transfer; int inuse = 0, pos; - if (ps && ps->buffer) { - /* caching a drawing surface */ + if (pt) { + /* caching a drawing transfer */ for (pos = 0; pos < NUM_ENTRIES; pos++) { struct softpipe_cached_tile *tile = tc->entries + pos; if (tile->x >= 0) { if (tc->depth_stencil) { - pipe_put_tile_raw(ps, + pipe_put_tile_raw(pt, tile->x, tile->y, TILE_SIZE, TILE_SIZE, tile->data.depth32, 0/*STRIDE*/); } else { - pipe_put_tile_rgba(ps, + pipe_put_tile_rgba(pt, tile->x, tile->y, TILE_SIZE, TILE_SIZE, (float *) tile->data.color); } @@ -415,7 +432,7 @@ struct softpipe_cached_tile * sp_get_cached_tile(struct softpipe_context *softpipe, struct softpipe_tile_cache *tc, int x, int y) { - struct pipe_surface *ps = tc->surface; + struct pipe_transfer *pt = tc->transfer; /* tile pos in framebuffer: */ const int tile_x = x & ~(TILE_SIZE - 1); @@ -431,12 +448,12 @@ sp_get_cached_tile(struct softpipe_context *softpipe, if (tile->x != -1) { /* put dirty tile back in framebuffer */ if (tc->depth_stencil) { - pipe_put_tile_raw(ps, + pipe_put_tile_raw(pt, tile->x, tile->y, TILE_SIZE, TILE_SIZE, tile->data.depth32, 0/*STRIDE*/); } else { - pipe_put_tile_rgba(ps, + pipe_put_tile_rgba(pt, tile->x, tile->y, TILE_SIZE, TILE_SIZE, (float *) tile->data.color); } @@ -448,22 +465,22 @@ sp_get_cached_tile(struct softpipe_context *softpipe, if (is_clear_flag_set(tc->clear_flags, x, y)) { /* don't get tile from framebuffer, just clear it */ if (tc->depth_stencil) { - clear_tile(tile, ps->format, tc->clear_val); + clear_tile(tile, pt->format, tc->clear_val); } else { - clear_tile_rgba(tile, ps->format, tc->clear_color); + clear_tile_rgba(tile, pt->format, tc->clear_color); } clear_clear_flag(tc->clear_flags, x, y); } else { - /* get new tile data from surface */ + /* get new tile data from transfer */ if (tc->depth_stencil) { - pipe_get_tile_raw(ps, + pipe_get_tile_raw(pt, tile->x, tile->y, TILE_SIZE, TILE_SIZE, tile->data.depth32, 0/*STRIDE*/); } else { - pipe_get_tile_rgba(ps, + pipe_get_tile_rgba(pt, tile->x, tile->y, TILE_SIZE, TILE_SIZE, (float *) tile->data.color); } @@ -484,7 +501,7 @@ sp_get_cached_tile(struct softpipe_context *softpipe, static INLINE uint tex_cache_pos(int x, int y, int z, int face, int level) { - uint entry = x + y * 5 + z * 4 + face + level; + uint entry = x + y * 9 + z * 3 + face + level * 7; return entry % NUM_ENTRIES; } @@ -494,11 +511,11 @@ tex_cache_pos(int x, int y, int z, int face, int level) * Tiles are read-only and indexed with more params. */ const struct softpipe_cached_tile * -sp_get_cached_tile_tex(struct pipe_context *pipe, +sp_get_cached_tile_tex(struct softpipe_context *sp, struct softpipe_tile_cache *tc, int x, int y, int z, int face, int level) { - struct pipe_screen *screen = pipe->screen; + struct pipe_screen *screen = sp->pipe.screen; /* tile pos in framebuffer: */ const int tile_x = x & ~(TILE_SIZE - 1); const int tile_y = y & ~(TILE_SIZE - 1); @@ -510,8 +527,12 @@ sp_get_cached_tile_tex(struct pipe_context *pipe, if (tc->texture) { struct softpipe_texture *spt = softpipe_texture(tc->texture); if (spt->modified) { - /* texture was modified, force a cache reload */ - tile->x = -1; + /* texture was modified, invalidate all cached tiles */ + uint p; + for (p = 0; p < NUM_ENTRIES; p++) { + tile = tc->entries + p; + tile->x = -1; + } spt->modified = FALSE; } } @@ -523,28 +544,37 @@ sp_get_cached_tile_tex(struct pipe_context *pipe, level != tile->level) { /* cache miss */ - /* check if we need to get a new surface */ - if (!tc->tex_surf || +#if 0 + printf("miss at %u x=%d y=%d z=%d face=%d level=%d\n", pos, + x/TILE_SIZE, y/TILE_SIZE, z, face, level); +#endif + /* check if we need to get a new transfer */ + if (!tc->tex_trans || tc->tex_face != face || tc->tex_level != level || tc->tex_z != z) { - /* get new surface (view into texture) */ + /* get new transfer (view into texture) */ + + if (tc->transfer) { + if (tc->tex_trans_map) + tc->screen->transfer_unmap(tc->screen, tc->tex_trans); - if (tc->tex_surf_map) - tc->screen->surface_unmap(tc->screen, tc->tex_surf); + screen->tex_transfer_release(screen, &tc->tex_trans); + } - tc->tex_surf = screen->get_tex_surface(screen, tc->texture, face, level, z, - PIPE_BUFFER_USAGE_CPU_READ); - tc->tex_surf_map = screen->surface_map(screen, tc->tex_surf, - PIPE_BUFFER_USAGE_CPU_READ); + tc->tex_trans = screen->get_tex_transfer(screen, tc->texture, face, level, z, + PIPE_TRANSFER_READ, 0, 0, + tc->texture->width[level], + tc->texture->height[level]); + tc->tex_trans_map = screen->transfer_map(screen, tc->tex_trans); tc->tex_face = face; tc->tex_level = level; tc->tex_z = z; } - /* get tile from the surface (view into texture) */ - pipe_get_tile_rgba(tc->tex_surf, + /* get tile from the transfer (view into texture) */ + pipe_get_tile_rgba(tc->tex_trans, tile_x, tile_y, TILE_SIZE, TILE_SIZE, (float *) tile->data.color); tile->x = tile_x; @@ -571,7 +601,7 @@ sp_tile_cache_clear(struct softpipe_tile_cache *tc, uint clearValue) tc->clear_val = clearValue; - switch (tc->surface->format) { + switch (tc->transfer->format) { case PIPE_FORMAT_R8G8B8A8_UNORM: r = (clearValue >> 24) & 0xff; g = (clearValue >> 16) & 0xff; diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.h b/src/gallium/drivers/softpipe/sp_tile_cache.h index bc96c941f6..9ac3fdda94 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.h +++ b/src/gallium/drivers/softpipe/sp_tile_cache.h @@ -74,10 +74,10 @@ extern struct pipe_surface * sp_tile_cache_get_surface(struct softpipe_tile_cache *tc); extern void -sp_tile_cache_map_surfaces(struct softpipe_tile_cache *tc); +sp_tile_cache_map_transfers(struct softpipe_tile_cache *tc); extern void -sp_tile_cache_unmap_surfaces(struct softpipe_tile_cache *tc); +sp_tile_cache_unmap_transfers(struct softpipe_tile_cache *tc); extern void sp_tile_cache_set_texture(struct pipe_context *pipe, @@ -96,7 +96,7 @@ sp_get_cached_tile(struct softpipe_context *softpipe, struct softpipe_tile_cache *tc, int x, int y); extern const struct softpipe_cached_tile * -sp_get_cached_tile_tex(struct pipe_context *pipe, +sp_get_cached_tile_tex(struct softpipe_context *softpipe, struct softpipe_tile_cache *tc, int x, int y, int z, int face, int level); diff --git a/src/gallium/drivers/trace/Makefile b/src/gallium/drivers/trace/Makefile new file mode 100644 index 0000000000..e1bd970937 --- /dev/null +++ b/src/gallium/drivers/trace/Makefile @@ -0,0 +1,14 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = trace + +C_SOURCES = \ + tr_context.c \ + tr_dump.c \ + tr_screen.c \ + tr_state.c \ + tr_texture.c \ + tr_winsys.c + +include ../../Makefile.template diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index 1dd7719379..ec8be27077 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -274,11 +274,11 @@ static INLINE boolean trace_context_get_query_result(struct pipe_context *_pipe, struct pipe_query *query, boolean wait, - uint64 *presult) + uint64_t *presult) { struct trace_context *tr_ctx = trace_context(_pipe); struct pipe_context *pipe = tr_ctx->pipe; - uint64 result; + uint64_t result; boolean _result; trace_dump_call_begin("pipe_context", "get_query_result"); @@ -722,9 +722,9 @@ trace_context_set_framebuffer_state(struct pipe_context *_pipe, /* Unwrap the input state */ memcpy(&unwrapped_state, state, sizeof(unwrapped_state)); - for(i = 0; i < state->num_cbufs; ++i) + for(i = 0; i < state->nr_cbufs; ++i) unwrapped_state.cbufs[i] = trace_surface_unwrap(tr_ctx, state->cbufs[i]); - for(i = state->num_cbufs; i < PIPE_MAX_COLOR_BUFS; ++i) + for(i = state->nr_cbufs; i < PIPE_MAX_COLOR_BUFS; ++i) unwrapped_state.cbufs[i] = NULL; unwrapped_state.zsbuf = trace_surface_unwrap(tr_ctx, state->zsbuf); state = &unwrapped_state; diff --git a/src/gallium/drivers/trace/tr_context.h b/src/gallium/drivers/trace/tr_context.h index 7831900ec2..6704175964 100644 --- a/src/gallium/drivers/trace/tr_context.h +++ b/src/gallium/drivers/trace/tr_context.h @@ -30,7 +30,7 @@ #include "pipe/p_compiler.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "pipe/p_context.h" diff --git a/src/gallium/drivers/trace/tr_dump.c b/src/gallium/drivers/trace/tr_dump.c index a0ead0ded3..d98cef221b 100644 --- a/src/gallium/drivers/trace/tr_dump.c +++ b/src/gallium/drivers/trace/tr_dump.c @@ -45,7 +45,7 @@ #endif #include "pipe/p_compiler.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "util/u_memory.h" #include "util/u_string.h" #include "util/u_stream.h" diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c index 8789f86b1a..164c6bbc4d 100644 --- a/src/gallium/drivers/trace/tr_screen.c +++ b/src/gallium/drivers/trace/tr_screen.c @@ -315,26 +315,101 @@ trace_screen_tex_surface_release(struct pipe_screen *_screen, } +static struct pipe_transfer * +trace_screen_get_tex_transfer(struct pipe_screen *_screen, + struct pipe_texture *texture, + unsigned face, unsigned level, + unsigned zslice, + enum pipe_transfer_usage usage, + unsigned x, unsigned y, unsigned w, unsigned h) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct pipe_screen *screen = tr_scr->screen; + struct trace_texture *tr_tex; + struct pipe_transfer *result; + + assert(texture); + tr_tex = trace_texture(tr_scr, texture); + texture = tr_tex->texture; + assert(texture->screen == screen); + + trace_dump_call_begin("pipe_screen", "get_tex_transfer"); + + trace_dump_arg(ptr, screen); + trace_dump_arg(ptr, texture); + trace_dump_arg(uint, face); + trace_dump_arg(uint, level); + trace_dump_arg(uint, zslice); + trace_dump_arg(uint, usage); + + result = screen->get_tex_transfer(screen, texture, face, level, zslice, usage, + x, y, w, h); + + trace_dump_ret(ptr, result); + + trace_dump_call_end(); + + result = trace_transfer_create(tr_tex, result); + + return result; +} + + +static void +trace_screen_tex_transfer_release(struct pipe_screen *_screen, + struct pipe_transfer **ptransfer) +{ + struct trace_screen *tr_scr = trace_screen(_screen); + struct pipe_screen *screen = tr_scr->screen; + struct trace_texture *tr_tex; + struct trace_transfer *tr_trans; + struct pipe_transfer *transfer; + + assert(ptransfer); + if(*ptransfer) { + tr_tex = trace_texture(tr_scr, (*ptransfer)->texture); + tr_trans = trace_transfer(tr_tex, *ptransfer); + transfer = tr_trans->transfer; + } + else + transfer = NULL; + + if (*ptransfer) { + if (!--(*ptransfer)->refcount) { + trace_dump_call_begin("pipe_screen", "tex_transfer_destroy"); + + trace_dump_arg(ptr, screen); + trace_dump_arg(ptr, transfer); + + trace_transfer_destroy(tr_tex, *ptransfer); + + trace_dump_call_end(); + } + + *ptransfer = NULL; + } +} + + static void * -trace_screen_surface_map(struct pipe_screen *_screen, - struct pipe_surface *surface, - unsigned flags) +trace_screen_transfer_map(struct pipe_screen *_screen, + struct pipe_transfer *transfer) { struct trace_screen *tr_scr = trace_screen(_screen); struct pipe_screen *screen = tr_scr->screen; struct trace_texture *tr_tex; - struct trace_surface *tr_surf; + struct trace_transfer *tr_trans; void *map; - tr_tex = trace_texture(tr_scr, surface->texture); - tr_surf = trace_surface(tr_tex, surface); - surface = tr_surf->surface; + tr_tex = trace_texture(tr_scr, transfer->texture); + tr_trans = trace_transfer(tr_tex, transfer); + transfer = tr_trans->transfer; - map = screen->surface_map(screen, surface, flags); + map = screen->transfer_map(screen, transfer); if(map) { - if(flags & PIPE_BUFFER_USAGE_CPU_WRITE) { - assert(!tr_surf->map); - tr_surf->map = map; + if(transfer->usage != PIPE_TRANSFER_READ) { + assert(!tr_trans->map); + tr_trans->map = map; } } @@ -343,33 +418,33 @@ trace_screen_surface_map(struct pipe_screen *_screen, static void -trace_screen_surface_unmap(struct pipe_screen *_screen, - struct pipe_surface *surface) +trace_screen_transfer_unmap(struct pipe_screen *_screen, + struct pipe_transfer *transfer) { struct trace_screen *tr_scr = trace_screen(_screen); struct pipe_screen *screen = tr_scr->screen; struct trace_texture *tr_tex; - struct trace_surface *tr_surf; + struct trace_transfer *tr_trans; - tr_tex = trace_texture(tr_scr, surface->texture); - tr_surf = trace_surface(tr_tex, surface); - surface = tr_surf->surface; + tr_tex = trace_texture(tr_scr, transfer->texture); + tr_trans = trace_transfer(tr_tex, transfer); + transfer = tr_trans->transfer; - if(tr_surf->map) { - size_t size = surface->nblocksy * surface->stride; + if(tr_trans->map) { + size_t size = transfer->nblocksy * transfer->stride; - trace_dump_call_begin("pipe_winsys", "surface_write"); + trace_dump_call_begin("pipe_winsys", "transfer_write"); trace_dump_arg(ptr, screen); - trace_dump_arg(ptr, surface); + trace_dump_arg(ptr, transfer); trace_dump_arg_begin("data"); - trace_dump_bytes(tr_surf->map, size); + trace_dump_bytes(tr_trans->map, size); trace_dump_arg_end(); trace_dump_arg_begin("stride"); - trace_dump_uint(surface->stride); + trace_dump_uint(transfer->stride); trace_dump_arg_end(); trace_dump_arg_begin("size"); @@ -378,10 +453,10 @@ trace_screen_surface_unmap(struct pipe_screen *_screen, trace_dump_call_end(); - tr_surf->map = NULL; + tr_trans->map = NULL; } - screen->surface_unmap(screen, surface); + screen->transfer_unmap(screen, transfer); } @@ -437,8 +512,10 @@ trace_screen_create(struct pipe_screen *screen) tr_scr->base.texture_release = trace_screen_texture_release; tr_scr->base.get_tex_surface = trace_screen_get_tex_surface; tr_scr->base.tex_surface_release = trace_screen_tex_surface_release; - tr_scr->base.surface_map = trace_screen_surface_map; - tr_scr->base.surface_unmap = trace_screen_surface_unmap; + tr_scr->base.get_tex_transfer = trace_screen_get_tex_transfer; + tr_scr->base.tex_transfer_release = trace_screen_tex_transfer_release; + tr_scr->base.transfer_map = trace_screen_transfer_map; + tr_scr->base.transfer_unmap = trace_screen_transfer_unmap; tr_scr->screen = screen; diff --git a/src/gallium/drivers/trace/tr_state.c b/src/gallium/drivers/trace/tr_state.c index 986d939e0c..81a9e2376e 100644 --- a/src/gallium/drivers/trace/tr_state.c +++ b/src/gallium/drivers/trace/tr_state.c @@ -223,7 +223,6 @@ void trace_dump_constant_buffer(const struct pipe_constant_buffer *state) trace_dump_struct_begin("pipe_constant_buffer"); trace_dump_member(ptr, state, buffer); - trace_dump_member(uint, state, size); trace_dump_struct_end(); } @@ -280,9 +279,9 @@ void trace_dump_depth_stencil_alpha_state(const struct pipe_depth_stencil_alpha_ trace_dump_member(uint, &state->stencil[i], fail_op); trace_dump_member(uint, &state->stencil[i], zpass_op); trace_dump_member(uint, &state->stencil[i], zfail_op); - trace_dump_member(uint, &state->stencil[i], ref_value); - trace_dump_member(uint, &state->stencil[i], value_mask); - trace_dump_member(uint, &state->stencil[i], write_mask); + trace_dump_member(uint, &state->stencil[i], ref_value); + trace_dump_member(uint, &state->stencil[i], valuemask); + trace_dump_member(uint, &state->stencil[i], writemask); trace_dump_struct_end(); trace_dump_elem_end(); } @@ -293,7 +292,7 @@ void trace_dump_depth_stencil_alpha_state(const struct pipe_depth_stencil_alpha_ trace_dump_struct_begin("pipe_alpha_state"); trace_dump_member(bool, &state->alpha, enabled); trace_dump_member(uint, &state->alpha, func); - trace_dump_member(float, &state->alpha, ref); + trace_dump_member(float, &state->alpha, ref_value); trace_dump_struct_end(); trace_dump_member_end(); @@ -351,7 +350,7 @@ void trace_dump_framebuffer_state(const struct pipe_framebuffer_state *state) trace_dump_member(uint, state, width); trace_dump_member(uint, state, height); - trace_dump_member(uint, state, num_cbufs); + trace_dump_member(uint, state, nr_cbufs); trace_dump_member_array(ptr, state, cbufs); trace_dump_member(ptr, state, zsbuf); @@ -398,13 +397,39 @@ void trace_dump_surface(const struct pipe_surface *state) trace_dump_struct_begin("pipe_surface"); - trace_dump_member(ptr, state, buffer); trace_dump_member(format, state, format); trace_dump_member(uint, state, status); trace_dump_member(uint, state, clear_value); trace_dump_member(uint, state, width); trace_dump_member(uint, state, height); + trace_dump_member(uint, state, layout); + trace_dump_member(uint, state, offset); + trace_dump_member(uint, state, refcount); + trace_dump_member(uint, state, usage); + + trace_dump_member(ptr, state, texture); + trace_dump_member(uint, state, face); + trace_dump_member(uint, state, level); + trace_dump_member(uint, state, zslice); + + trace_dump_struct_end(); +} + + +void trace_dump_transfer(const struct pipe_transfer *state) +{ + if(!state) { + trace_dump_null(); + return; + } + + trace_dump_struct_begin("pipe_transfer"); + + trace_dump_member(format, state, format); + trace_dump_member(uint, state, width); + trace_dump_member(uint, state, height); + trace_dump_member_begin("block"); trace_dump_block(&state->block); trace_dump_member_end(); @@ -412,8 +437,6 @@ void trace_dump_surface(const struct pipe_surface *state) trace_dump_member(uint, state, nblocksx); trace_dump_member(uint, state, nblocksy); trace_dump_member(uint, state, stride); - trace_dump_member(uint, state, layout); - trace_dump_member(uint, state, offset); trace_dump_member(uint, state, refcount); trace_dump_member(uint, state, usage); @@ -435,7 +458,7 @@ void trace_dump_vertex_buffer(const struct pipe_vertex_buffer *state) trace_dump_struct_begin("pipe_vertex_buffer"); - trace_dump_member(uint, state, pitch); + trace_dump_member(uint, state, stride); trace_dump_member(uint, state, max_index); trace_dump_member(uint, state, buffer_offset); trace_dump_member(ptr, state, buffer); diff --git a/src/gallium/drivers/trace/tr_state.h b/src/gallium/drivers/trace/tr_state.h index 5ae533dc66..513ed0ac98 100644 --- a/src/gallium/drivers/trace/tr_state.h +++ b/src/gallium/drivers/trace/tr_state.h @@ -68,6 +68,8 @@ void trace_dump_sampler_state(const struct pipe_sampler_state *state); void trace_dump_surface(const struct pipe_surface *state); +void trace_dump_transfer(const struct pipe_transfer *state); + void trace_dump_vertex_buffer(const struct pipe_vertex_buffer *state); void trace_dump_vertex_element(const struct pipe_vertex_element *state); diff --git a/src/gallium/drivers/trace/tr_texture.c b/src/gallium/drivers/trace/tr_texture.c index 440a78704a..120ba0dd31 100644 --- a/src/gallium/drivers/trace/tr_texture.c +++ b/src/gallium/drivers/trace/tr_texture.c @@ -87,7 +87,6 @@ trace_surface_create(struct trace_texture *tr_tex, memcpy(&tr_surf->base, surface, sizeof(struct pipe_surface)); - tr_surf->base.winsys = tr_tex->base.screen->winsys; tr_surf->base.texture = NULL; pipe_texture_reference(&tr_surf->base.texture, &tr_tex->base); tr_surf->surface = surface; @@ -110,3 +109,43 @@ trace_surface_destroy(struct trace_texture *tr_tex, FREE(tr_surf); } + +struct pipe_transfer * +trace_transfer_create(struct trace_texture *tr_tex, + struct pipe_transfer *transfer) +{ + struct trace_transfer *tr_trans; + + if(!transfer) + goto error; + + assert(transfer->texture == tr_tex->texture); + + tr_trans = CALLOC_STRUCT(trace_transfer); + if(!tr_trans) + goto error; + + memcpy(&tr_trans->base, transfer, sizeof(struct pipe_transfer)); + + tr_trans->base.texture = NULL; + pipe_texture_reference(&tr_trans->base.texture, &tr_tex->base); + tr_trans->transfer = transfer; + + return &tr_trans->base; + +error: + pipe_transfer_reference(&transfer, NULL); + return NULL; +} + + +void +trace_transfer_destroy(struct trace_texture *tr_tex, + struct pipe_transfer *transfer) +{ + struct trace_transfer *tr_trans = trace_transfer(tr_tex, transfer); + pipe_texture_reference(&tr_trans->base.texture, NULL); + pipe_transfer_reference(&tr_trans->transfer, NULL); + FREE(tr_trans); +} + diff --git a/src/gallium/drivers/trace/tr_texture.h b/src/gallium/drivers/trace/tr_texture.h index 9e72edb8a3..168cefd53d 100644 --- a/src/gallium/drivers/trace/tr_texture.h +++ b/src/gallium/drivers/trace/tr_texture.h @@ -48,6 +48,14 @@ struct trace_surface struct pipe_surface base; struct pipe_surface *surface; +}; + + +struct trace_transfer +{ + struct pipe_transfer base; + + struct pipe_transfer *transfer; void *map; }; @@ -75,6 +83,17 @@ trace_surface(struct trace_texture *tr_tex, } +static INLINE struct trace_transfer * +trace_transfer(struct trace_texture *tr_tex, + struct pipe_transfer *transfer) +{ + if(!transfer) + return NULL; + assert(transfer->texture == &tr_tex->base); + return (struct trace_transfer *)transfer; +} + + struct pipe_texture * trace_texture_create(struct trace_screen *tr_scr, struct pipe_texture *texture); @@ -91,5 +110,13 @@ void trace_surface_destroy(struct trace_texture *tr_tex, struct pipe_surface *surface); +struct pipe_transfer * +trace_transfer_create(struct trace_texture *tr_tex, + struct pipe_transfer *transfer); + +void +trace_transfer_destroy(struct trace_texture *tr_tex, + struct pipe_transfer *transfer); + #endif /* TR_TEXTURE_H_ */ diff --git a/src/gallium/drivers/trace/tr_winsys.c b/src/gallium/drivers/trace/tr_winsys.c index 177835854e..c4148fe810 100644 --- a/src/gallium/drivers/trace/tr_winsys.c +++ b/src/gallium/drivers/trace/tr_winsys.c @@ -98,86 +98,41 @@ trace_winsys_flush_frontbuffer(struct pipe_winsys *_winsys, } -static struct pipe_surface * -trace_winsys_surface_alloc(struct pipe_winsys *_winsys) -{ - struct trace_winsys *tr_ws = trace_winsys(_winsys); - struct pipe_winsys *winsys = tr_ws->winsys; - struct pipe_surface *result; - - trace_dump_call_begin("pipe_winsys", "surface_alloc"); - - trace_dump_arg(ptr, winsys); - - result = winsys->surface_alloc(winsys); - - trace_dump_ret(ptr, result); - - trace_dump_call_end(); - - assert(!result || !result->texture); - - return result; -} - - -static int -trace_winsys_surface_alloc_storage(struct pipe_winsys *_winsys, - struct pipe_surface *surface, +static struct pipe_buffer * +trace_winsys_surface_buffer_create(struct pipe_winsys *_winsys, unsigned width, unsigned height, enum pipe_format format, - unsigned flags, - unsigned tex_usage) + unsigned usage, + unsigned *pstride) { struct trace_winsys *tr_ws = trace_winsys(_winsys); struct pipe_winsys *winsys = tr_ws->winsys; - int result; + unsigned stride; + struct pipe_buffer *result; - assert(surface && !surface->texture); - - trace_dump_call_begin("pipe_winsys", "surface_alloc_storage"); + trace_dump_call_begin("pipe_winsys", "surface_buffer_create"); trace_dump_arg(ptr, winsys); - trace_dump_arg(ptr, surface); trace_dump_arg(uint, width); trace_dump_arg(uint, height); trace_dump_arg(format, format); - trace_dump_arg(uint, flags); - trace_dump_arg(uint, tex_usage); + trace_dump_arg(uint, usage); - result = winsys->surface_alloc_storage(winsys, - surface, + result = winsys->surface_buffer_create(winsys, width, height, format, - flags, - tex_usage); + usage, + pstride); - trace_dump_ret(int, result); + stride = *pstride; - trace_dump_call_end(); + trace_dump_arg(uint, stride); - return result; -} - - -static void -trace_winsys_surface_release(struct pipe_winsys *_winsys, - struct pipe_surface **psurface) -{ - struct trace_winsys *tr_ws = trace_winsys(_winsys); - struct pipe_winsys *winsys = tr_ws->winsys; - struct pipe_surface *surface = *psurface; - - assert(psurface && *psurface && !(*psurface)->texture); - - trace_dump_call_begin("pipe_winsys", "surface_release"); - - trace_dump_arg(ptr, winsys); - trace_dump_arg(ptr, surface); - - winsys->surface_release(winsys, psurface); + trace_dump_ret(ptr, result); trace_dump_call_end(); + + return result; } @@ -465,9 +420,7 @@ trace_winsys_create(struct pipe_winsys *winsys) tr_ws->base.destroy = trace_winsys_destroy; tr_ws->base.get_name = trace_winsys_get_name; tr_ws->base.flush_frontbuffer = trace_winsys_flush_frontbuffer; - tr_ws->base.surface_alloc = trace_winsys_surface_alloc; - tr_ws->base.surface_alloc_storage = trace_winsys_surface_alloc_storage; - tr_ws->base.surface_release = trace_winsys_surface_release; + tr_ws->base.surface_buffer_create = trace_winsys_surface_buffer_create; tr_ws->base.buffer_create = trace_winsys_buffer_create; tr_ws->base.user_buffer_create = trace_winsys_user_buffer_create; tr_ws->base.buffer_map = trace_winsys_buffer_map; diff --git a/src/gallium/drivers/trace/tr_winsys.h b/src/gallium/drivers/trace/tr_winsys.h index 062ddf66a0..3670cb915e 100644 --- a/src/gallium/drivers/trace/tr_winsys.h +++ b/src/gallium/drivers/trace/tr_winsys.h @@ -30,8 +30,8 @@ #include "pipe/p_compiler.h" -#include "pipe/p_debug.h" -#include "pipe/p_winsys.h" +#include "util/u_debug.h" +#include "pipe/internal/p_winsys_screen.h" /** diff --git a/src/gallium/include/pipe/p_winsys.h b/src/gallium/include/pipe/internal/p_winsys_screen.h index 5d18291dc6..ee835578b2 100644 --- a/src/gallium/include/pipe/p_winsys.h +++ b/src/gallium/include/pipe/internal/p_winsys_screen.h @@ -36,7 +36,7 @@ #define P_WINSYS_H -#include "p_format.h" +#include "pipe/p_format.h" #ifdef __cplusplus @@ -76,24 +76,6 @@ struct pipe_winsys void *context_private ); - /** allocate a new surface (no context dependency) */ - struct pipe_surface *(*surface_alloc)(struct pipe_winsys *ws); - - /** - * Allocate storage for a pipe_surface. - * \param flags XXX unused, remove someday - * \return 0 if succeeds. - */ - int (*surface_alloc_storage)(struct pipe_winsys *ws, - struct pipe_surface *surf, - unsigned width, unsigned height, - enum pipe_format format, - unsigned flags, - unsigned tex_usage); - - void (*surface_release)(struct pipe_winsys *ws, struct pipe_surface **s); - - /** * Buffer management. Buffer attributes are mostly fixed over its lifetime. * @@ -138,6 +120,24 @@ struct pipe_winsys void *ptr, unsigned bytes); + /** + * Allocate storage for a display target surface. + * + * Often surfaces which are meant to be blitted to the front screen (i.e., + * display targets) must be allocated with special characteristics, memory + * pools, or obtained directly from the windowing system. + * + * This callback is invoked by the pipe_screenwhen creating a texture marked + * with the PIPE_TEXTURE_USAGE_DISPLAY_TARGET flag to get the underlying + * buffer storage. + */ + struct pipe_buffer *(*surface_buffer_create)(struct pipe_winsys *ws, + unsigned width, unsigned height, + enum pipe_format format, + unsigned usage, + unsigned *stride); + + /** * Map the entire data store of a buffer object into the client's address. * flags is bitmask of PIPE_BUFFER_USAGE_CPU_READ/WRITE flags. @@ -178,7 +178,6 @@ struct pipe_winsys }; - #ifdef __cplusplus } #endif diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h index 4d64c74a4a..bc2a0a7ef3 100644 --- a/src/gallium/include/pipe/p_compiler.h +++ b/src/gallium/include/pipe/p_compiler.h @@ -96,7 +96,6 @@ typedef int _Bool; typedef unsigned int uint; typedef unsigned char ubyte; typedef unsigned short ushort; -typedef uint64_t uint64; #if 0 #define boolean bool @@ -112,20 +111,22 @@ typedef unsigned char boolean; /* Function inlining */ -#ifdef __cplusplus -# define INLINE inline -#elif defined(__GNUC__) -# define INLINE __inline__ -#elif defined(_MSC_VER) -# define INLINE __inline -#elif defined(__ICL) -# define INLINE __inline -#elif defined(__INTEL_COMPILER) -# define INLINE inline -#elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100) -# define INLINE __inline -#else -# define INLINE +#ifndef INLINE +# ifdef __cplusplus +# define INLINE inline +# elif defined(__GNUC__) +# define INLINE __inline__ +# elif defined(_MSC_VER) +# define INLINE __inline +# elif defined(__ICL) +# define INLINE __inline +# elif defined(__INTEL_COMPILER) +# define INLINE inline +# elif defined(__WATCOMC__) && (__WATCOMC__ >= 1100) +# define INLINE __inline +# else +# define INLINE +# endif #endif @@ -144,10 +145,12 @@ typedef unsigned char boolean; #define ALIGN16_DECL(TYPE, NAME, SIZE) TYPE NAME##___aligned[SIZE] __attribute__(( aligned( 16 ) )) #define ALIGN16_ASSIGN(NAME) NAME##___aligned #define ALIGN16_ATTRIB __attribute__(( aligned( 16 ) )) +#define ALIGN8_ATTRIB __attribute__(( aligned( 8 ) )) #else #define ALIGN16_DECL(TYPE, NAME, SIZE) TYPE NAME##___unaligned[SIZE + 1] #define ALIGN16_ASSIGN(NAME) align16(NAME##___unaligned) #define ALIGN16_ATTRIB +#define ALIGN8_ATTRIB #endif diff --git a/src/gallium/include/pipe/p_config.h b/src/gallium/include/pipe/p_config.h index af3746c026..05cbd2fc4d 100644 --- a/src/gallium/include/pipe/p_config.h +++ b/src/gallium/include/pipe/p_config.h @@ -85,8 +85,19 @@ #define PIPE_ARCH_X86_64 #endif -#if 0 /* FIXME */ +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) +#if defined(PIPE_CC_GCC) && !defined(__SSE2__) +/* #warning SSE2 support requires -msse -msse2 compiler options */ +#else +#define PIPE_ARCH_SSE +#endif +#endif + +#if defined(__PPC__) #define PIPE_ARCH_PPC +#if defined(__PPC64__) +#define PIPE_ARCH_PPC_64 +#endif #endif diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 2646706ff2..9454cc87db 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -40,6 +40,7 @@ struct pipe_screen; struct pipe_fence_handle; struct pipe_state_cache; struct pipe_query; +struct pipe_winsys; /** @@ -109,7 +110,7 @@ struct pipe_context { boolean (*get_query_result)(struct pipe_context *pipe, struct pipe_query *q, boolean wait, - uint64 *result); + uint64_t *result); /*@}*/ /** diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index dc8a92dccb..3cbc93d12b 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -171,6 +171,9 @@ enum pipe_texture_target { #define PIPE_TEXTURE_USAGE_PRIMARY 0x4 /* ie a frontbuffer */ #define PIPE_TEXTURE_USAGE_DEPTH_STENCIL 0x8 #define PIPE_TEXTURE_USAGE_SAMPLER 0x10 +#define PIPE_TEXTURE_USAGE_DYNAMIC 0x20 +/** Pipe driver custom usage flags should be greater or equal to this value */ +#define PIPE_TEXTURE_USAGE_CUSTOM (1 << 16) #define PIPE_TEXTURE_GEOM_NON_SQUARE 0x1 #define PIPE_TEXTURE_GEOM_NON_POWER_OF_TWO 0x2 @@ -191,6 +194,16 @@ enum pipe_texture_target { /** + * Transfer object usage flags + */ +enum pipe_transfer_usage { + PIPE_TRANSFER_READ, + PIPE_TRANSFER_WRITE, + PIPE_TRANSFER_READ_WRITE //< Read/modify/write +}; + + +/** * Buffer usage flags */ #define PIPE_BUFFER_USAGE_CPU_READ (1 << 0) @@ -201,6 +214,7 @@ enum pipe_texture_target { #define PIPE_BUFFER_USAGE_VERTEX (1 << 5) #define PIPE_BUFFER_USAGE_INDEX (1 << 6) #define PIPE_BUFFER_USAGE_CONSTANT (1 << 7) +#define PIPE_BUFFER_USAGE_DISCARD (1 << 8) /** Pipe driver custom usage flags should be greater or equal to this value */ #define PIPE_BUFFER_USAGE_CUSTOM (1 << 16) @@ -243,6 +257,7 @@ enum pipe_texture_target { #define PIPE_PRIM_QUADS 7 #define PIPE_PRIM_QUAD_STRIP 8 #define PIPE_PRIM_POLYGON 9 +#define PIPE_PRIM_MAX 10 /** @@ -292,6 +307,7 @@ enum pipe_texture_target { #define PIPE_CAP_GUARD_BAND_BOTTOM 23 /*< float */ #define PIPE_CAP_TEXTURE_MIRROR_CLAMP 24 #define PIPE_CAP_TEXTURE_MIRROR_REPEAT 25 +#define PIPE_CAP_MAX_VERTEX_TEXTURE_UNITS 26 diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h index 546cf5d9b4..3f65a60436 100644 --- a/src/gallium/include/pipe/p_format.h +++ b/src/gallium/include/pipe/p_format.h @@ -1,6 +1,7 @@ /************************************************************************** * * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright (c) 2008 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -29,8 +30,9 @@ #define PIPE_FORMAT_H #include "p_compiler.h" -#include "p_debug.h" +/* FIXME: remove these header dependencies */ +#include "util/u_debug.h" #include "util/u_string.h" #ifdef __cplusplus @@ -245,13 +247,14 @@ static INLINE uint pf_rev(pipe_format_ycbcr_t f) /** * Compresssed format layouts (this will probably change) */ -#define _PIPE_FORMAT_DXT( LEVEL, RSIZE, GSIZE, BSIZE, ASIZE ) \ +#define _PIPE_FORMAT_DXT( LEVEL, RSIZE, GSIZE, BSIZE, ASIZE, TYPE ) \ ((PIPE_FORMAT_LAYOUT_DXT << 0) | \ ((LEVEL) << 2) | \ ((RSIZE) << 5) | \ ((GSIZE) << 8) | \ ((BSIZE) << 11) | \ - ((ASIZE) << 14) ) + ((ASIZE) << 14) | \ + ((TYPE) << 29)) @@ -360,20 +363,30 @@ enum pipe_format { PIPE_FORMAT_R32G32B32A32_FIXED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 4, 4, 4, 4, PIPE_FORMAT_TYPE_FIXED ), /* sRGB formats */ PIPE_FORMAT_L8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RRR1, 1, 1, 1, 0, PIPE_FORMAT_TYPE_SRGB ), - PIPE_FORMAT_A8_L8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RRRG, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ), + PIPE_FORMAT_A8L8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RRRG, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ), PIPE_FORMAT_R8G8B8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 0, PIPE_FORMAT_TYPE_SRGB ), PIPE_FORMAT_R8G8B8A8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ), PIPE_FORMAT_R8G8B8X8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ), + PIPE_FORMAT_A8R8G8B8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_ARGB, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ), + PIPE_FORMAT_X8R8G8B8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_1RGB, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ), + PIPE_FORMAT_B8G8R8A8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_BGRA, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ), + PIPE_FORMAT_B8G8R8X8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_BGR1, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ), /* mixed formats */ PIPE_FORMAT_X8UB8UG8SR8S_NORM = _PIPE_FORMAT_MIXED( _PIPE_FORMAT_1BGR, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1 ), PIPE_FORMAT_B6UG5SR5S_NORM = _PIPE_FORMAT_MIXED( _PIPE_FORMAT_BGR1, 6, 5, 5, 0, 0, 1, 1, 0, 1, 0 ), /* compressed formats */ - PIPE_FORMAT_DXT1_RGB = _PIPE_FORMAT_DXT( 1, 8, 8, 8, 0 ), - PIPE_FORMAT_DXT1_RGBA = _PIPE_FORMAT_DXT( 1, 8, 8, 8, 8 ), - PIPE_FORMAT_DXT3_RGBA = _PIPE_FORMAT_DXT( 3, 8, 8, 8, 8 ), - PIPE_FORMAT_DXT5_RGBA = _PIPE_FORMAT_DXT( 5, 8, 8, 8, 8 ) + PIPE_FORMAT_DXT1_RGB = _PIPE_FORMAT_DXT( 1, 8, 8, 8, 0, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_DXT1_RGBA = _PIPE_FORMAT_DXT( 1, 8, 8, 8, 8, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_DXT3_RGBA = _PIPE_FORMAT_DXT( 3, 8, 8, 8, 8, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_DXT5_RGBA = _PIPE_FORMAT_DXT( 5, 8, 8, 8, 8, PIPE_FORMAT_TYPE_UNORM ), + + /* sRGB, compressed */ + PIPE_FORMAT_DXT1_SRGB = _PIPE_FORMAT_DXT( 1, 8, 8, 8, 0, PIPE_FORMAT_TYPE_SRGB ), + PIPE_FORMAT_DXT1_SRGBA = _PIPE_FORMAT_DXT( 1, 8, 8, 8, 8, PIPE_FORMAT_TYPE_SRGB ), + PIPE_FORMAT_DXT3_SRGBA = _PIPE_FORMAT_DXT( 3, 8, 8, 8, 8, PIPE_FORMAT_TYPE_SRGB ), + PIPE_FORMAT_DXT5_SRGBA = _PIPE_FORMAT_DXT( 5, 8, 8, 8, 8, PIPE_FORMAT_TYPE_SRGB ) }; /** @@ -477,12 +490,16 @@ pf_get_block(enum pipe_format format, struct pipe_format_block *block) switch(format) { case PIPE_FORMAT_DXT1_RGBA: case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_SRGBA: + case PIPE_FORMAT_DXT1_SRGB: block->size = 8; block->width = 4; block->height = 4; break; case PIPE_FORMAT_DXT3_RGBA: case PIPE_FORMAT_DXT5_RGBA: + case PIPE_FORMAT_DXT3_SRGBA: + case PIPE_FORMAT_DXT5_SRGBA: block->size = 16; block->width = 4; block->height = 4; @@ -540,7 +557,7 @@ pf_has_alpha( enum pipe_format format ) /* FIXME: pf_get_component_bits( PIPE_FORMAT_A8L8_UNORM, PIPE_FORMAT_COMP_A ) should not return 0 right? */ if(format == PIPE_FORMAT_A8_UNORM || format == PIPE_FORMAT_A8L8_UNORM || - format == PIPE_FORMAT_A8_L8_SRGB) + format == PIPE_FORMAT_A8L8_SRGB) return TRUE; return pf_get_component_bits( format, PIPE_FORMAT_COMP_A ) ? TRUE : FALSE; case PIPE_FORMAT_LAYOUT_YCBCR: @@ -550,6 +567,9 @@ pf_has_alpha( enum pipe_format format ) case PIPE_FORMAT_DXT1_RGBA: case PIPE_FORMAT_DXT3_RGBA: case PIPE_FORMAT_DXT5_RGBA: + case PIPE_FORMAT_DXT1_SRGBA: + case PIPE_FORMAT_DXT3_SRGBA: + case PIPE_FORMAT_DXT5_SRGBA: return TRUE; default: return FALSE; diff --git a/src/gallium/include/pipe/p_inlines.h b/src/gallium/include/pipe/p_inlines.h index d70de8e301..ffbe2d7612 100644 --- a/src/gallium/include/pipe/p_inlines.h +++ b/src/gallium/include/pipe/p_inlines.h @@ -31,7 +31,6 @@ #include "p_context.h" #include "p_defines.h" #include "p_screen.h" -#include "p_winsys.h" #ifdef __cplusplus @@ -39,40 +38,6 @@ extern "C" { #endif -/* XXX: these are a kludge. will fix when all surfaces are views into - * textures, and free-floating winsys surfaces go away. - */ -static INLINE void * -pipe_surface_map( struct pipe_surface *surf, unsigned flags ) -{ - if (surf->texture) { - struct pipe_screen *screen = surf->texture->screen; - return surf->texture->screen->surface_map( screen, surf, flags ); - } - else { - struct pipe_winsys *winsys = surf->winsys; - char *map = (char *)winsys->buffer_map( winsys, surf->buffer, flags ); - if (map == NULL) - return NULL; - return (void *)(map + surf->offset); - } -} - -static INLINE void -pipe_surface_unmap( struct pipe_surface *surf ) -{ - if (surf->texture) { - struct pipe_screen *screen = surf->texture->screen; - surf->texture->screen->surface_unmap( screen, surf ); - } - else { - struct pipe_winsys *winsys = surf->winsys; - winsys->buffer_unmap( winsys, surf->buffer ); - } -} - - - /** * Set 'ptr' to point to 'surf' and update reference counting. * The old thing pointed to, if any, will be unreferenced first. @@ -82,23 +47,17 @@ static INLINE void pipe_surface_reference(struct pipe_surface **ptr, struct pipe_surface *surf) { /* bump the refcount first */ - if (surf) + if (surf) { + assert(surf->refcount); surf->refcount++; + } if (*ptr) { - - /* There are currently two sorts of surfaces... This needs to be - * fixed so that all surfaces are views into a texture. - */ - if ((*ptr)->texture) { - struct pipe_screen *screen = (*ptr)->texture->screen; - screen->tex_surface_release( screen, ptr ); - } - else { - struct pipe_winsys *winsys = (*ptr)->winsys; - winsys->surface_release(winsys, ptr); - } - + struct pipe_screen *screen; + assert((*ptr)->refcount); + assert((*ptr)->texture); + screen = (*ptr)->texture->screen; + screen->tex_surface_release( screen, ptr ); assert(!*ptr); } @@ -106,24 +65,31 @@ pipe_surface_reference(struct pipe_surface **ptr, struct pipe_surface *surf) } -/* XXX: thread safety issues! +/** + * \sa pipe_surface_reference */ static INLINE void -winsys_buffer_reference(struct pipe_winsys *winsys, - struct pipe_buffer **ptr, - struct pipe_buffer *buf) +pipe_transfer_reference(struct pipe_transfer **ptr, struct pipe_transfer *trans) { - if (buf) - buf->refcount++; + /* bump the refcount first */ + if (trans) { + assert(trans->refcount); + trans->refcount++; + } - if (*ptr && --(*ptr)->refcount == 0) - winsys->buffer_destroy( winsys, *ptr ); + if (*ptr) { + struct pipe_screen *screen; + assert((*ptr)->refcount); + assert((*ptr)->texture); + screen = (*ptr)->texture->screen; + screen->tex_transfer_release( screen, ptr ); + assert(!*ptr); + } - *ptr = buf; + *ptr = trans; } - /** * \sa pipe_surface_reference */ @@ -133,12 +99,15 @@ pipe_texture_reference(struct pipe_texture **ptr, { assert(ptr); - if (pt) + if (pt) { + assert(pt->refcount); pt->refcount++; + } if (*ptr) { struct pipe_screen *screen = (*ptr)->screen; assert(screen); + assert((*ptr)->refcount); screen->texture_release(screen, ptr); assert(!*ptr); @@ -154,32 +123,27 @@ pipe_texture_release(struct pipe_texture **ptr) struct pipe_screen *screen; assert(ptr); screen = (*ptr)->screen; + assert((*ptr)->refcount); screen->texture_release(screen, ptr); *ptr = NULL; } /** - * Convenience wrappers for winsys buffer functions. + * Convenience wrappers for screen buffer functions. */ static INLINE struct pipe_buffer * pipe_buffer_create( struct pipe_screen *screen, unsigned alignment, unsigned usage, unsigned size ) { - return screen->winsys->buffer_create(screen->winsys, alignment, usage, size); + return screen->buffer_create(screen, alignment, usage, size); } static INLINE struct pipe_buffer * pipe_user_buffer_create( struct pipe_screen *screen, void *ptr, unsigned size ) { - return screen->winsys->user_buffer_create(screen->winsys, ptr, size); -} - -static INLINE void -pipe_buffer_destroy( struct pipe_screen *screen, struct pipe_buffer *buf ) -{ - screen->winsys->buffer_destroy(screen->winsys, buf); + return screen->user_buffer_create(screen, ptr, size); } static INLINE void * @@ -187,25 +151,36 @@ pipe_buffer_map(struct pipe_screen *screen, struct pipe_buffer *buf, unsigned usage) { - return screen->winsys->buffer_map(screen->winsys, buf, usage); + return screen->buffer_map(screen, buf, usage); } static INLINE void pipe_buffer_unmap(struct pipe_screen *screen, struct pipe_buffer *buf) { - screen->winsys->buffer_unmap(screen->winsys, buf); + screen->buffer_unmap(screen, buf); } -/* XXX when we're using this everywhere, get rid of - * winsys_buffer_reference() above. +/* XXX: thread safety issues! */ static INLINE void pipe_buffer_reference(struct pipe_screen *screen, struct pipe_buffer **ptr, struct pipe_buffer *buf) { - winsys_buffer_reference(screen->winsys, ptr, buf); + if (buf) { + assert(buf->refcount); + buf->refcount++; + } + + if (*ptr) { + assert((*ptr)->refcount); + if(--(*ptr)->refcount == 0) { + screen->buffer_destroy( screen, *ptr ); + } + } + + *ptr = buf; } diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h index b15affef7a..341d1caea0 100644 --- a/src/gallium/include/pipe/p_screen.h +++ b/src/gallium/include/pipe/p_screen.h @@ -26,6 +26,8 @@ **************************************************************************/ /** + * @file + * * Screen, Adapter or GPU * * These are driver functions/facilities that are context independent. @@ -37,7 +39,8 @@ #include "pipe/p_compiler.h" -#include "pipe/p_state.h" +#include "pipe/p_format.h" +#include "pipe/p_defines.h" @@ -46,6 +49,12 @@ extern "C" { #endif +/** Opaque type */ +struct pipe_fence_handle; +struct pipe_winsys; +struct pipe_buffer; + + /** * Gallium screen/adapter context. Basically everything @@ -101,7 +110,7 @@ struct pipe_screen { */ struct pipe_texture * (*texture_blanket)(struct pipe_screen *, const struct pipe_texture *templat, - const unsigned *pitch, + const unsigned *stride, struct pipe_buffer *buffer); void (*texture_release)(struct pipe_screen *, @@ -120,13 +129,127 @@ struct pipe_screen { struct pipe_surface ** ); - void *(*surface_map)( struct pipe_screen *, - struct pipe_surface *surface, - unsigned flags ); + /** Get a transfer object for transferring data to/from a texture */ + struct pipe_transfer *(*get_tex_transfer)(struct pipe_screen *, + struct pipe_texture *texture, + unsigned face, unsigned level, + unsigned zslice, + enum pipe_transfer_usage usage, + unsigned x, unsigned y, + unsigned w, unsigned h); - void (*surface_unmap)( struct pipe_screen *, - struct pipe_surface *surface ); + /* Transfer objects allocated by the above must be released here: + */ + void (*tex_transfer_release)( struct pipe_screen *, + struct pipe_transfer ** ); + void *(*transfer_map)( struct pipe_screen *, + struct pipe_transfer *transfer ); + + void (*transfer_unmap)( struct pipe_screen *, + struct pipe_transfer *transfer ); + + + /** + * Buffer management. Buffer attributes are mostly fixed over its lifetime. + * + */ + struct pipe_buffer *(*buffer_create)( struct pipe_screen *screen, + unsigned alignment, + unsigned usage, + unsigned size ); + + /** + * Create a buffer that wraps user-space data. + * + * Effectively this schedules a delayed call to buffer_create + * followed by an upload of the data at *some point in the future*, + * or perhaps never. Basically the allocate/upload is delayed + * until the buffer is actually passed to hardware. + * + * The intention is to provide a quick way to turn regular data + * into a buffer, and secondly to avoid a copy operation if that + * data subsequently turns out to be only accessed by the CPU. + * + * Common example is OpenGL vertex buffers that are subsequently + * processed either by software TNL in the driver or by passing to + * hardware. + * + * XXX: What happens if the delayed call to buffer_create() fails? + * + * Note that ptr may be accessed at any time upto the time when the + * buffer is destroyed, so the data must not be freed before then. + */ + struct pipe_buffer *(*user_buffer_create)(struct pipe_screen *screen, + void *ptr, + unsigned bytes); + + /** + * Allocate storage for a display target surface. + * + * Often surfaces which are meant to be blitted to the front screen (i.e., + * display targets) must be allocated with special characteristics, memory + * pools, or obtained directly from the windowing system. + * + * This callback is invoked by the pipe_screenwhen creating a texture marked + * with the PIPE_TEXTURE_USAGE_DISPLAY_TARGET flag to get the underlying + * buffer storage. + */ + struct pipe_buffer *(*surface_buffer_create)(struct pipe_screen *screen, + unsigned width, unsigned height, + enum pipe_format format, + unsigned usage, + unsigned *stride); + + + /** + * Map the entire data store of a buffer object into the client's address. + * flags is bitmask of PIPE_BUFFER_USAGE_CPU_READ/WRITE flags. + */ + void *(*buffer_map)( struct pipe_screen *screen, + struct pipe_buffer *buf, + unsigned usage ); + + void (*buffer_unmap)( struct pipe_screen *screen, + struct pipe_buffer *buf ); + + void (*buffer_destroy)( struct pipe_screen *screen, + struct pipe_buffer *buf ); + + + /** + * Do any special operations to ensure frontbuffer contents are + * displayed, eg copy fake frontbuffer. + */ + void (*flush_frontbuffer)( struct pipe_screen *screen, + struct pipe_surface *surf, + void *context_private ); + + + + /** Set ptr = fence, with reference counting */ + void (*fence_reference)( struct pipe_screen *screen, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence ); + + /** + * Checks whether the fence has been signalled. + * \param flags driver-specific meaning + * \return zero on success. + */ + int (*fence_signalled)( struct pipe_screen *screen, + struct pipe_fence_handle *fence, + unsigned flag ); + + /** + * Wait for the fence to finish. + * \param flags driver-specific meaning + * \return zero on success. + */ + int (*fence_finish)( struct pipe_screen *screen, + struct pipe_fence_handle *fence, + unsigned flag ); + }; diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index d591f046fb..35df70e7b7 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -1,4 +1,31 @@ -#if !defined TGSI_TOKEN_H +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_TOKEN_H #define TGSI_TOKEN_H #ifdef __cplusplus @@ -36,10 +63,10 @@ struct tgsi_processor struct tgsi_token { - unsigned Type : 4; /* TGSI_TOKEN_TYPE_ */ - unsigned Size : 8; /* UINT */ + unsigned Type : 4; /**< TGSI_TOKEN_TYPE_x */ + unsigned NrTokens : 8; /**< UINT */ unsigned Padding : 19; - unsigned Extended : 1; /* BOOL */ + unsigned Extended : 1; /**< BOOL */ }; enum tgsi_file_type { @@ -79,22 +106,22 @@ enum tgsi_file_type { struct tgsi_declaration { - unsigned Type : 4; /* TGSI_TOKEN_TYPE_DECLARATION */ - unsigned Size : 8; /* UINT */ - unsigned File : 4; /* one of TGSI_FILE_x */ - unsigned UsageMask : 4; /* bitmask of TGSI_WRITEMASK_x flags */ - unsigned Interpolate : 4; /* TGSI_INTERPOLATE_ */ - unsigned Semantic : 1; /* BOOL, any semantic info? */ - unsigned Centroid : 1; /* centroid sampling */ - unsigned Invariant : 1; /* invariant optimization */ + unsigned Type : 4; /**< TGSI_TOKEN_TYPE_DECLARATION */ + unsigned NrTokens : 8; /**< UINT */ + unsigned File : 4; /**< one of TGSI_FILE_x */ + unsigned UsageMask : 4; /**< bitmask of TGSI_WRITEMASK_x flags */ + unsigned Interpolate : 4; /**< one of TGSI_INTERPOLATE_x */ + unsigned Semantic : 1; /**< BOOL, any semantic info? */ + unsigned Centroid : 1; /**< centroid sampling? */ + unsigned Invariant : 1; /**< invariant optimization? */ unsigned Padding : 4; - unsigned Extended : 1; /* BOOL */ + unsigned Extended : 1; /**< BOOL */ }; struct tgsi_declaration_range { - unsigned First : 16; /* UINT */ - unsigned Last : 16; /* UINT */ + unsigned First : 16; /**< UINT */ + unsigned Last : 16; /**< UINT */ }; #define TGSI_SEMANTIC_POSITION 0 @@ -108,8 +135,8 @@ struct tgsi_declaration_range struct tgsi_declaration_semantic { - unsigned SemanticName : 8; /* one of TGSI_SEMANTIC_ */ - unsigned SemanticIndex : 16; /* UINT */ + unsigned SemanticName : 8; /**< one of TGSI_SEMANTIC_x */ + unsigned SemanticIndex : 16; /**< UINT */ unsigned Padding : 8; }; @@ -117,11 +144,11 @@ struct tgsi_declaration_semantic struct tgsi_immediate { - unsigned Type : 4; /* TGSI_TOKEN_TYPE_IMMEDIATE */ - unsigned Size : 8; /* UINT */ - unsigned DataType : 4; /* TGSI_IMM_ */ + unsigned Type : 4; /**< TGSI_TOKEN_TYPE_IMMEDIATE */ + unsigned NrTokens : 8; /**< UINT */ + unsigned DataType : 4; /**< one of TGSI_IMM_x */ unsigned Padding : 15; - unsigned Extended : 1; /* BOOL */ + unsigned Extended : 1; /**< BOOL */ }; struct tgsi_immediate_float32 @@ -398,7 +425,7 @@ struct tgsi_immediate_float32 #define TGSI_SAT_ZERO_ONE 1 /* clamp to [0,1] */ #define TGSI_SAT_MINUS_PLUS_ONE 2 /* clamp to [-1,1] */ -/* +/** * Opcode is the operation code to execute. A given operation defines the * semantics how the source registers (if any) are interpreted and what is * written to the destination registers (if any) as a result of execution. @@ -415,7 +442,7 @@ struct tgsi_immediate_float32 struct tgsi_instruction { unsigned Type : 4; /* TGSI_TOKEN_TYPE_INSTRUCTION */ - unsigned Size : 8; /* UINT */ + unsigned NrTokens : 8; /* UINT */ unsigned Opcode : 8; /* TGSI_OPCODE_ */ unsigned Saturate : 2; /* TGSI_SAT_ */ unsigned NumDstRegs : 2; /* UINT */ @@ -431,7 +458,7 @@ struct tgsi_instruction * * Then, tgsi_instruction::NumSrcRegs of tgsi_src_register follow. * - * tgsi_instruction::Size contains the total number of words that make the + * tgsi_instruction::NrTokens contains the total number of words that make the * instruction, including the instruction word. */ @@ -483,7 +510,7 @@ struct tgsi_instruction_ext #define TGSI_SWIZZLE_Z 2 #define TGSI_SWIZZLE_W 3 -/* +/** * Precision controls the precision at which the operation should be executed. * * CondDstUpdate enables condition code register writes. When this field is @@ -550,7 +577,7 @@ struct tgsi_instruction_ext_predicate unsigned Extended : 1; /* BOOL */ }; -/* +/** * File specifies the register array to access. * * Index specifies the element number of a register in the register file. @@ -582,7 +609,7 @@ struct tgsi_src_register unsigned Extended : 1; /* BOOL */ }; -/* +/** * If tgsi_src_register::Extended is TRUE, tgsi_src_register_ext follows. * * Then, if tgsi_src_register::Indirect is TRUE, another tgsi_src_register @@ -601,7 +628,7 @@ struct tgsi_src_register_ext unsigned Extended : 1; /* BOOL */ }; -/* +/** * If tgsi_src_register_ext::Type is TGSI_SRC_REGISTER_EXT_TYPE_SWZ, * it should be cast to tgsi_src_register_ext_swz. * @@ -619,7 +646,7 @@ struct tgsi_src_register_ext #define TGSI_EXTSWIZZLE_ZERO 4 #define TGSI_EXTSWIZZLE_ONE 5 -/* +/** * ExtSwizzleX, ExtSwizzleY, ExtSwizzleZ and ExtSwizzleW swizzle the source * register in an extended manner. * diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index da783389da..a2e839da5c 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -27,6 +27,8 @@ /** + * @file + * * Abstract graphics pipe state objects. * * Basic notes: @@ -64,7 +66,6 @@ extern "C" { /* fwd decls */ struct pipe_screen; struct pipe_surface; -struct pipe_winsys; @@ -160,7 +161,6 @@ struct pipe_clip_state struct pipe_constant_buffer { struct pipe_buffer *buffer; - unsigned size; /** in bytes (XXX: redundant!) */ }; @@ -170,7 +170,8 @@ struct pipe_shader_state }; -struct pipe_depth_state { +struct pipe_depth_state +{ unsigned enabled:1; /**< depth test enabled? */ unsigned writemask:1; /**< allow depth buffer writes? */ unsigned func:3; /**< depth test func (PIPE_FUNC_x) */ @@ -178,22 +179,24 @@ struct pipe_depth_state { }; -struct pipe_stencil_state { +struct pipe_stencil_state +{ unsigned enabled:1; /**< stencil[0]: stencil enabled, stencil[1]: two-side enabled */ unsigned func:3; /**< PIPE_FUNC_x */ unsigned fail_op:3; /**< PIPE_STENCIL_OP_x */ unsigned zpass_op:3; /**< PIPE_STENCIL_OP_x */ unsigned zfail_op:3; /**< PIPE_STENCIL_OP_x */ - ubyte ref_value; - ubyte value_mask; - ubyte write_mask; + ubyte ref_value; + ubyte valuemask; + ubyte writemask; }; -struct pipe_alpha_state { +struct pipe_alpha_state +{ unsigned enabled:1; unsigned func:3; /**< PIPE_FUNC_x */ - float ref; /**< reference value */ + float ref_value; /**< reference value */ }; @@ -236,7 +239,7 @@ struct pipe_framebuffer_state unsigned width, height; /** multiple colorbuffers for multiple render targets */ - unsigned num_cbufs; + unsigned nr_cbufs; struct pipe_surface *cbufs[PIPE_MAX_COLOR_BUFS]; struct pipe_surface *zsbuf; /**< Z/stencil buffer */ @@ -272,24 +275,41 @@ struct pipe_sampler_state */ struct pipe_surface { - struct pipe_buffer *buffer; /**< surface's buffer/memory */ enum pipe_format format; /**< PIPE_FORMAT_x */ unsigned status; /**< PIPE_SURFACE_STATUS_x */ unsigned clear_value; /**< XXX may be temporary */ unsigned width; /**< logical width in pixels */ unsigned height; /**< logical height in pixels */ - struct pipe_format_block block; - unsigned nblocksx; /**< allocated width in blocks */ - unsigned nblocksy; /**< allocated height in blocks */ - unsigned stride; /**< stride in bytes between rows of blocks */ unsigned layout; /**< PIPE_SURFACE_LAYOUT_x */ unsigned offset; /**< offset from start of buffer, in bytes */ unsigned refcount; unsigned usage; /**< PIPE_BUFFER_USAGE_* */ - struct pipe_winsys *winsys; /**< winsys which owns/created the surface */ + struct pipe_texture *texture; /**< texture into which this is a view */ + unsigned face; + unsigned level; + unsigned zslice; +}; + + +/** + * Transfer object. For data transfer to/from a texture. + */ +struct pipe_transfer +{ + enum pipe_format format; /**< PIPE_FORMAT_x */ + unsigned x; /**< x offset from start of texture image */ + unsigned y; /**< y offset from start of texture image */ + unsigned width; /**< logical width in pixels */ + unsigned height; /**< logical height in pixels */ + struct pipe_format_block block; + unsigned nblocksx; /**< allocated width in blocks */ + unsigned nblocksy; /**< allocated height in blocks */ + unsigned stride; /**< stride in bytes between rows of blocks */ + unsigned refcount; + unsigned usage; /**< PIPE_TRANSFER_* */ - struct pipe_texture *texture; /**< optional texture into which this is a view */ + struct pipe_texture *texture; /**< texture to transfer to/from */ unsigned face; unsigned level; unsigned zslice; @@ -315,9 +335,9 @@ struct pipe_texture unsigned last_level:8; /**< Index of last mipmap level present/defined */ unsigned compressed:1; - unsigned nr_samples:8; /**< for multisampled surfaces, nr of samples */ + unsigned nr_samples:8; /**< for multisampled surfaces, nr of samples */ - unsigned tex_usage; /* PIPE_TEXTURE_USAGE_* */ + unsigned tex_usage; /* PIPE_TEXTURE_USAGE_* */ /* These are also refcounted: */ @@ -334,7 +354,7 @@ struct pipe_texture */ struct pipe_vertex_buffer { - unsigned pitch; /**< stride to same attrib in next vertex, in bytes */ + unsigned stride; /**< stride to same attrib in next vertex, in bytes */ unsigned max_index; /**< number of vertices in this buffer */ unsigned buffer_offset; /**< offset to start of data in buffer, in bytes */ struct pipe_buffer *buffer; /**< the actual buffer */ diff --git a/src/gallium/include/pipe/p_thread.h b/src/gallium/include/pipe/p_thread.h index e01d5a602b..8af3cd958b 100644 --- a/src/gallium/include/pipe/p_thread.h +++ b/src/gallium/include/pipe/p_thread.h @@ -25,6 +25,8 @@ /** + * @file + * * Thread, mutex, condition var and thread-specific data functions. */ diff --git a/src/gallium/include/state_tracker/drm_api.h b/src/gallium/include/state_tracker/drm_api.h new file mode 100644 index 0000000000..54480fa047 --- /dev/null +++ b/src/gallium/include/state_tracker/drm_api.h @@ -0,0 +1,33 @@ + +#ifndef _DRM_API_H_ +#define _DRM_API_H_ + +struct pipe_screen; +struct pipe_winsys; +struct pipe_context; + +struct drm_api +{ + /** + * Special buffer function + */ + /*@{*/ + struct pipe_screen* (*create_screen)(int drmFB, int pciID); + struct pipe_context* (*create_context)(struct pipe_screen *screen); + /*@}*/ + + /** + * Special buffer function + */ + /*@{*/ + struct pipe_buffer* (*buffer_from_handle)(struct pipe_winsys *winsys, const char *name, unsigned handle); + unsigned (*handle_from_buffer)(struct pipe_winsys *winsys, struct pipe_buffer *buffer); + /*@}*/ +}; + +/** + * A driver needs to export this symbol + */ +extern struct drm_api drm_api_hocks; + +#endif diff --git a/src/gallium/state_trackers/Makefile b/src/gallium/state_trackers/Makefile new file mode 100644 index 0000000000..265ca468c2 --- /dev/null +++ b/src/gallium/state_trackers/Makefile @@ -0,0 +1,25 @@ +TOP = ../../.. +include $(TOP)/configs/current + + +SUBDIRS = $(GALLIUM_STATE_TRACKERS_DIRS) + + +default: subdirs + + +subdirs: + @for dir in $(SUBDIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE)) || exit 1 ; \ + fi \ + done + + +clean: + rm -f `find . -name \*.[oa]` + rm -f `find . -name depend` + + +# Dummy install target +install: diff --git a/src/gallium/state_trackers/egl/Makefile b/src/gallium/state_trackers/egl/Makefile new file mode 100644 index 0000000000..ea4cec0bb8 --- /dev/null +++ b/src/gallium/state_trackers/egl/Makefile @@ -0,0 +1,28 @@ +TARGET = libegldrm.a +CFILES = $(wildcard ./*.c) +OBJECTS = $(patsubst ./%.c,./%.o,$(CFILES)) +GALLIUMDIR = ../.. +TOP = ../../../.. + +include ${TOP}/configs/current + +CFLAGS += -g -Wall -Werror-implicit-function-declaration -fPIC \ + -I${GALLIUMDIR}/include \ + -I${GALLIUMDIR}/auxiliary \ + -I${TOP}/src/mesa/drivers/dri/common \ + -I${TOP}/src/mesa \ + -I$(TOP)/include \ + -I$(TOP)/src/egl/main \ + ${LIBDRM_CFLAGS} + +############################################# + +.PHONY = all clean + +all: ${TARGET} + +${TARGET}: ${OBJECTS} + ar rcs $@ $^ + +clean: + rm -rf ${OBJECTS} ${TARGET} diff --git a/src/gallium/state_trackers/egl/egl_context.c b/src/gallium/state_trackers/egl/egl_context.c new file mode 100644 index 0000000000..8564972b91 --- /dev/null +++ b/src/gallium/state_trackers/egl/egl_context.c @@ -0,0 +1,193 @@ + +#include "utils.h" +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "egl_tracker.h" + +#include "egllog.h" + + +#include "pipe/p_context.h" +#include "pipe/p_screen.h" + +#include "state_tracker/st_public.h" +#include "state_tracker/drm_api.h" + +#include "GL/internal/glcore.h" + +#define need_GL_ARB_multisample +#define need_GL_ARB_point_parameters +#define need_GL_ARB_texture_compression +#define need_GL_ARB_vertex_buffer_object +#define need_GL_ARB_vertex_program +#define need_GL_ARB_window_pos +#define need_GL_EXT_blend_color +#define need_GL_EXT_blend_equation_separate +#define need_GL_EXT_blend_func_separate +#define need_GL_EXT_blend_minmax +#define need_GL_EXT_cull_vertex +#define need_GL_EXT_fog_coord +#define need_GL_EXT_framebuffer_object +#define need_GL_EXT_multi_draw_arrays +#define need_GL_EXT_secondary_color +#define need_GL_NV_vertex_program +#include "extension_helper.h" + +/** + * TODO HACK! FUGLY! + * Copied for intel extentions. + */ +const struct dri_extension card_extensions[] = { + {"GL_ARB_multisample", GL_ARB_multisample_functions}, + {"GL_ARB_multitexture", NULL}, + {"GL_ARB_point_parameters", GL_ARB_point_parameters_functions}, + {"GL_ARB_texture_border_clamp", NULL}, + {"GL_ARB_texture_compression", GL_ARB_texture_compression_functions}, + {"GL_ARB_texture_cube_map", NULL}, + {"GL_ARB_texture_env_add", NULL}, + {"GL_ARB_texture_env_combine", NULL}, + {"GL_ARB_texture_env_dot3", NULL}, + {"GL_ARB_texture_mirrored_repeat", NULL}, + {"GL_ARB_texture_rectangle", NULL}, + {"GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions}, + {"GL_ARB_pixel_buffer_object", NULL}, + {"GL_ARB_vertex_program", GL_ARB_vertex_program_functions}, + {"GL_ARB_window_pos", GL_ARB_window_pos_functions}, + {"GL_EXT_blend_color", GL_EXT_blend_color_functions}, + {"GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions}, + {"GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions}, + {"GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions}, + {"GL_EXT_blend_subtract", NULL}, + {"GL_EXT_cull_vertex", GL_EXT_cull_vertex_functions}, + {"GL_EXT_fog_coord", GL_EXT_fog_coord_functions}, + {"GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions}, + {"GL_EXT_multi_draw_arrays", GL_EXT_multi_draw_arrays_functions}, + {"GL_EXT_packed_depth_stencil", NULL}, + {"GL_EXT_pixel_buffer_object", NULL}, + {"GL_EXT_secondary_color", GL_EXT_secondary_color_functions}, + {"GL_EXT_stencil_wrap", NULL}, + {"GL_EXT_texture_edge_clamp", NULL}, + {"GL_EXT_texture_env_combine", NULL}, + {"GL_EXT_texture_env_dot3", NULL}, + {"GL_EXT_texture_filter_anisotropic", NULL}, + {"GL_EXT_texture_lod_bias", NULL}, + {"GL_3DFX_texture_compression_FXT1", NULL}, + {"GL_APPLE_client_storage", NULL}, + {"GL_MESA_pack_invert", NULL}, + {"GL_MESA_ycbcr_texture", NULL}, + {"GL_NV_blend_square", NULL}, + {"GL_NV_vertex_program", GL_NV_vertex_program_functions}, + {"GL_NV_vertex_program1_1", NULL}, + {"GL_SGIS_generate_mipmap", NULL }, + {NULL, NULL} +}; + +EGLContext +drm_create_context(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config, EGLContext share_list, const EGLint *attrib_list) +{ + struct drm_device *dev = (struct drm_device *)drv; + struct drm_context *ctx; + struct drm_context *share = NULL; + struct st_context *st_share = NULL; + _EGLConfig *conf; + int i; + __GLcontextModes *visual; + + conf = _eglLookupConfig(drv, dpy, config); + if (!conf) { + _eglError(EGL_BAD_CONFIG, "eglCreateContext"); + return EGL_NO_CONTEXT; + } + + for (i = 0; attrib_list && attrib_list[i] != EGL_NONE; i++) { + switch (attrib_list[i]) { + /* no attribs defined for now */ + default: + _eglError(EGL_BAD_ATTRIBUTE, "eglCreateContext"); + return EGL_NO_CONTEXT; + } + } + + ctx = (struct drm_context *) calloc(1, sizeof(struct drm_context)); + if (!ctx) + goto err_c; + + _eglInitContext(drv, dpy, &ctx->base, config, attrib_list); + + ctx->pipe = drm_api_hocks.create_context(dev->screen); + if (!ctx->pipe) + goto err_pipe; + + if (share) + st_share = share->st; + + visual = drm_visual_from_config(conf); + ctx->st = st_create_context(ctx->pipe, visual, st_share); + drm_visual_modes_destroy(visual); + + if (!ctx->st) + goto err_gl; + + /* generate handle and insert into hash table */ + _eglSaveContext(&ctx->base); + assert(_eglGetContextHandle(&ctx->base)); + + return _eglGetContextHandle(&ctx->base); + +err_gl: + ctx->pipe->destroy(ctx->pipe); +err_pipe: + free(ctx); +err_c: + return EGL_NO_CONTEXT; +} + +EGLBoolean +drm_destroy_context(_EGLDriver *drv, EGLDisplay dpy, EGLContext context) +{ + struct drm_context *c = lookup_drm_context(context); + _eglRemoveContext(&c->base); + if (c->base.IsBound) { + c->base.DeletePending = EGL_TRUE; + } else { + st_destroy_context(c->st); + c->pipe->destroy(c->pipe); + free(c); + } + return EGL_TRUE; +} + +EGLBoolean +drm_make_current(_EGLDriver *drv, EGLDisplay dpy, EGLSurface draw, EGLSurface read, EGLContext context) +{ + struct drm_surface *readSurf = lookup_drm_surface(read); + struct drm_surface *drawSurf = lookup_drm_surface(draw); + struct drm_context *ctx = lookup_drm_context(context); + EGLBoolean b; + + b = _eglMakeCurrent(drv, dpy, draw, read, context); + if (!b) + return EGL_FALSE; + + if (ctx) { + if (!drawSurf || !readSurf) + return EGL_FALSE; + + drawSurf->user = ctx; + readSurf->user = ctx; + + st_make_current(ctx->st, drawSurf->stfb, readSurf->stfb); + + /* st_resize_framebuffer needs a bound context to work */ + st_resize_framebuffer(drawSurf->stfb, drawSurf->w, drawSurf->h); + st_resize_framebuffer(readSurf->stfb, readSurf->w, readSurf->h); + } else { + drawSurf->user = NULL; + readSurf->user = NULL; + + st_make_current(NULL, NULL, NULL); + } + + return EGL_TRUE; +} diff --git a/src/gallium/state_trackers/egl/egl_surface.c b/src/gallium/state_trackers/egl/egl_surface.c new file mode 100644 index 0000000000..281dff9f8a --- /dev/null +++ b/src/gallium/state_trackers/egl/egl_surface.c @@ -0,0 +1,409 @@ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "egl_tracker.h" + +#include "egllog.h" + +#include "pipe/p_inlines.h" +#include "pipe/p_screen.h" +#include "pipe/p_context.h" + +#include "state_tracker/drm_api.h" + +/* + * Util functions + */ + +static drmModeModeInfoPtr +drm_find_mode(drmModeConnectorPtr connector, _EGLMode *mode) +{ + int i; + drmModeModeInfoPtr m = NULL; + + for (i = 0; i < connector->count_modes; i++) { + m = &connector->modes[i]; + if (m->hdisplay == mode->Width && m->vdisplay == mode->Height && m->vrefresh == mode->RefreshRate) + break; + m = &connector->modes[0]; /* if we can't find one, return first */ + } + + return m; +} + +static struct st_framebuffer * +drm_create_framebuffer(const __GLcontextModes *visual, + unsigned width, + unsigned height, + void *priv) +{ + enum pipe_format colorFormat, depthFormat, stencilFormat; + + if (visual->redBits == 5) + colorFormat = PIPE_FORMAT_R5G6B5_UNORM; + else + colorFormat = PIPE_FORMAT_A8R8G8B8_UNORM; + + if (visual->depthBits == 16) + depthFormat = PIPE_FORMAT_Z16_UNORM; + else if (visual->depthBits == 24) + depthFormat = PIPE_FORMAT_S8Z24_UNORM; + else + depthFormat = PIPE_FORMAT_NONE; + + if (visual->stencilBits == 8) + stencilFormat = PIPE_FORMAT_S8Z24_UNORM; + else + stencilFormat = PIPE_FORMAT_NONE; + + return st_create_framebuffer(visual, + colorFormat, + depthFormat, + stencilFormat, + width, + height, + priv); +} + +static void +drm_create_texture(_EGLDriver *drv, + struct drm_screen *scrn, + unsigned w, unsigned h) +{ + struct drm_device *dev = (struct drm_device *)drv; + struct pipe_screen *screen = dev->screen; + struct pipe_surface *surface; + struct pipe_texture *texture; + struct pipe_texture templat; + struct pipe_buffer *buf; + unsigned stride = 1024; + unsigned pitch = 0; + unsigned size = 0; + + /* ugly */ + if (stride < w) + stride = 2048; + + pitch = stride * 4; + size = h * 2 * pitch; + + buf = pipe_buffer_create(screen, + 0, /* alignment */ + PIPE_BUFFER_USAGE_GPU_READ_WRITE | + PIPE_BUFFER_USAGE_CPU_READ_WRITE, + size); + + if (!buf) + goto err_buf; + + memset(&templat, 0, sizeof(templat)); + templat.tex_usage |= PIPE_TEXTURE_USAGE_DISPLAY_TARGET; + templat.tex_usage |= PIPE_TEXTURE_USAGE_RENDER_TARGET; + templat.target = PIPE_TEXTURE_2D; + templat.last_level = 0; + templat.depth[0] = 1; + templat.format = PIPE_FORMAT_A8R8G8B8_UNORM; + templat.width[0] = w; + templat.height[0] = h; + pf_get_block(templat.format, &templat.block); + + texture = screen->texture_blanket(dev->screen, + &templat, + &pitch, + buf); + if (!texture) + goto err_tex; + + surface = screen->get_tex_surface(screen, + texture, + 0, + 0, + 0, + PIPE_BUFFER_USAGE_GPU_WRITE); + + if (!surface) + goto err_surf; + + + scrn->tex = texture; + scrn->surface = surface; + scrn->buffer = buf; + scrn->front.width = w; + scrn->front.height = h; + scrn->front.pitch = pitch; + scrn->front.handle = drm_api_hocks.handle_from_buffer(dev->winsys, scrn->buffer); + if (0) + goto err_handle; + + return; + +err_handle: + pipe_surface_reference(&surface, NULL); +err_surf: + pipe_texture_reference(&texture, NULL); +err_tex: + pipe_buffer_reference(screen, &buf, NULL); +err_buf: + return; +} + +/* + * Exported functions + */ + +void +drm_takedown_shown_screen(_EGLDriver *drv, struct drm_screen *screen) +{ + struct drm_device *dev = (struct drm_device *)drv; + + screen->surf = NULL; + + drmModeSetCrtc( + dev->drmFD, + screen->crtcID, + 0, // FD + 0, 0, + NULL, 0, // List of output ids + NULL); + + drmModeRmFB(dev->drmFD, screen->fbID); + drmModeFreeFB(screen->fb); + screen->fb = NULL; + + pipe_surface_reference(&screen->surface, NULL); + pipe_texture_reference(&screen->tex, NULL); + pipe_buffer_reference(dev->screen, &screen->buffer, NULL); + + screen->shown = 0; +} + +EGLSurface +drm_create_window_surface(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config, NativeWindowType window, const EGLint *attrib_list) +{ + return EGL_NO_SURFACE; +} + + +EGLSurface +drm_create_pixmap_surface(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config, NativePixmapType pixmap, const EGLint *attrib_list) +{ + return EGL_NO_SURFACE; +} + + +EGLSurface +drm_create_pbuffer_surface(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config, + const EGLint *attrib_list) +{ + int i; + int width = -1; + int height = -1; + struct drm_surface *surf = NULL; + __GLcontextModes *visual; + _EGLConfig *conf; + + conf = _eglLookupConfig(drv, dpy, config); + if (!conf) { + _eglError(EGL_BAD_CONFIG, "eglCreatePbufferSurface"); + return EGL_NO_CONTEXT; + } + + for (i = 0; attrib_list && attrib_list[i] != EGL_NONE; i++) { + switch (attrib_list[i]) { + case EGL_WIDTH: + width = attrib_list[++i]; + break; + case EGL_HEIGHT: + height = attrib_list[++i]; + break; + default: + _eglError(EGL_BAD_ATTRIBUTE, "eglCreatePbufferSurface"); + return EGL_NO_SURFACE; + } + } + + if (width < 1 || height < 1) { + _eglError(EGL_BAD_ATTRIBUTE, "eglCreatePbufferSurface"); + return EGL_NO_SURFACE; + } + + surf = (struct drm_surface *) calloc(1, sizeof(struct drm_surface)); + if (!surf) + goto err; + + if (!_eglInitSurface(drv, dpy, &surf->base, EGL_PBUFFER_BIT, config, attrib_list)) + goto err_surf; + + surf->w = width; + surf->h = height; + + visual = drm_visual_from_config(conf); + surf->stfb = drm_create_framebuffer(visual, + width, + height, + (void*)surf); + drm_visual_modes_destroy(visual); + + _eglSaveSurface(&surf->base); + return surf->base.Handle; + +err_surf: + free(surf); +err: + return EGL_NO_SURFACE; +} + +EGLSurface +drm_create_screen_surface_mesa(_EGLDriver *drv, EGLDisplay dpy, EGLConfig cfg, + const EGLint *attrib_list) +{ + EGLSurface surf = drm_create_pbuffer_surface(drv, dpy, cfg, attrib_list); + + return surf; +} + +EGLBoolean +drm_show_screen_surface_mesa(_EGLDriver *drv, EGLDisplay dpy, + EGLScreenMESA screen, + EGLSurface surface, EGLModeMESA m) +{ + struct drm_device *dev = (struct drm_device *)drv; + struct drm_surface *surf = lookup_drm_surface(surface); + struct drm_screen *scrn = lookup_drm_screen(dpy, screen); + struct pipe_context *pipe; + _EGLMode *mode = _eglLookupMode(dpy, m); + int ret; + unsigned int i, k; + + if (scrn->shown) + drm_takedown_shown_screen(drv, scrn); + + + drm_create_texture(drv, scrn, mode->Width, mode->Height); + if (!scrn->buffer) + return EGL_FALSE; + + ret = drmModeAddFB(dev->drmFD, + scrn->front.width, scrn->front.height, + 32, 32, scrn->front.pitch, + scrn->front.handle, + &scrn->fbID); + + if (ret) + goto err_bo; + + scrn->fb = drmModeGetFB(dev->drmFD, scrn->fbID); + if (!scrn->fb) + goto err_bo; + + /* find a fitting crtc */ + { + drmModeConnector *con = scrn->connector; + + scrn->mode = drm_find_mode(con, mode); + if (!scrn->mode) + goto err_fb; + + for (k = 0; k < con->count_encoders; k++) { + drmModeEncoder *enc = drmModeGetEncoder(dev->drmFD, con->encoders[k]); + for (i = 0; i < dev->res->count_crtcs; i++) { + if (enc->possible_crtcs & (1<<i)) { + /* save the ID */ + scrn->crtcID = dev->res->crtcs[i]; + + /* skip the rest */ + i = dev->res->count_crtcs; + k = dev->res->count_encoders; + } + } + drmModeFreeEncoder(enc); + } + } + + ret = drmModeSetCrtc(dev->drmFD, + scrn->crtcID, + scrn->fbID, + 0, 0, + &scrn->connectorID, 1, + scrn->mode); + + if (ret) + goto err_crtc; + + surf->screen = scrn; + + scrn->surf = surf; + scrn->shown = 1; + + return EGL_TRUE; + +err_crtc: + scrn->crtcID = 0; + +err_fb: + drmModeRmFB(dev->drmFD, scrn->fbID); + drmModeFreeFB(scrn->fb); + scrn->fb = NULL; + +err_bo: + pipe_surface_reference(&scrn->surface, NULL); + pipe_texture_reference(&scrn->tex, NULL); + pipe_buffer_reference(dev->screen, &scrn->buffer, NULL); + + return EGL_FALSE; +} + +EGLBoolean +drm_destroy_surface(_EGLDriver *drv, EGLDisplay dpy, EGLSurface surface) +{ + struct drm_surface *surf = lookup_drm_surface(surface); + _eglRemoveSurface(&surf->base); + if (surf->base.IsBound) { + surf->base.DeletePending = EGL_TRUE; + } else { + if (surf->screen) + drm_takedown_shown_screen(drv, surf->screen); + st_unreference_framebuffer(surf->stfb); + free(surf); + } + return EGL_TRUE; +} + +EGLBoolean +drm_swap_buffers(_EGLDriver *drv, EGLDisplay dpy, EGLSurface draw) +{ + struct drm_surface *surf = lookup_drm_surface(draw); + struct pipe_surface *back_surf; + + if (!surf) + return EGL_FALSE; + + /* error checking */ + if (!_eglSwapBuffers(drv, dpy, draw)) + return EGL_FALSE; + + st_get_framebuffer_surface(surf->stfb, ST_SURFACE_BACK_LEFT, &back_surf); + + if (back_surf) { + + st_notify_swapbuffers(surf->stfb); + + if (surf->screen) { + surf->user->pipe->flush(surf->user->pipe, PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_TEXTURE_CACHE, NULL); + surf->user->pipe->surface_copy(surf->user->pipe, + 0, + surf->screen->surface, + 0, 0, + back_surf, + 0, 0, + surf->w, surf->h); + surf->user->pipe->flush(surf->user->pipe, PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_TEXTURE_CACHE, NULL); + /* TODO stuff here */ + } + + st_notify_swapbuffers_complete(surf->stfb); + } + + return EGL_TRUE; +} diff --git a/src/gallium/state_trackers/egl/egl_tracker.c b/src/gallium/state_trackers/egl/egl_tracker.c new file mode 100644 index 0000000000..2813bf4360 --- /dev/null +++ b/src/gallium/state_trackers/egl/egl_tracker.c @@ -0,0 +1,217 @@ + +#include "utils.h" + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "egl_tracker.h" + +#include "egllog.h" +#include "state_tracker/drm_api.h" + +#include "pipe/p_screen.h" +#include "pipe/internal/p_winsys_screen.h" + +/** HACK */ +void* driDriverAPI; +extern const struct dri_extension card_extensions[]; + + +/* + * Exported functions + */ + +/** + * The bootstrap function. Return a new drm_driver object and + * plug in API functions. + */ +_EGLDriver * +_eglMain(_EGLDisplay *dpy, const char *args) +{ + struct drm_device *drm; + + drm = (struct drm_device *) calloc(1, sizeof(struct drm_device)); + if (!drm) { + return NULL; + } + + /* First fill in the dispatch table with defaults */ + _eglInitDriverFallbacks(&drm->base); + /* then plug in our Drm-specific functions */ + drm->base.API.Initialize = drm_initialize; + drm->base.API.Terminate = drm_terminate; + drm->base.API.CreateContext = drm_create_context; + drm->base.API.MakeCurrent = drm_make_current; + drm->base.API.CreateWindowSurface = drm_create_window_surface; + drm->base.API.CreatePixmapSurface = drm_create_pixmap_surface; + drm->base.API.CreatePbufferSurface = drm_create_pbuffer_surface; + drm->base.API.DestroySurface = drm_destroy_surface; + drm->base.API.DestroyContext = drm_destroy_context; + drm->base.API.CreateScreenSurfaceMESA = drm_create_screen_surface_mesa; + drm->base.API.ShowScreenSurfaceMESA = drm_show_screen_surface_mesa; + drm->base.API.SwapBuffers = drm_swap_buffers; + + drm->base.ClientAPIsMask = EGL_OPENGL_BIT /*| EGL_OPENGL_ES_BIT*/; + drm->base.Name = "DRM/Gallium/Win"; + + /* enable supported extensions */ + drm->base.Extensions.MESA_screen_surface = EGL_TRUE; + drm->base.Extensions.MESA_copy_context = EGL_TRUE; + + return &drm->base; +} + +static void +drm_get_device_id(struct drm_device *device) +{ + char path[512]; + FILE *file; + + /* TODO get the real minor */ + int minor = 0; + + snprintf(path, sizeof(path), "/sys/class/drm/card%d/device/device", minor); + file = fopen(path, "r"); + if (!file) { + _eglLog(_EGL_WARNING, "Could not retrive device ID\n"); + return; + } + + fgets(path, sizeof( path ), file); + sscanf(path, "%x", &device->deviceID); + fclose(file); +} + +static void +drm_update_res(struct drm_device *dev) +{ + drmModeFreeResources(dev->res); + dev->res = drmModeGetResources(dev->drmFD); +} + +static void +drm_add_modes_from_connector(_EGLScreen *screen, drmModeConnectorPtr connector) +{ + drmModeModeInfoPtr m = NULL; + int i; + + for (i = 0; i < connector->count_modes; i++) { + m = &connector->modes[i]; + _eglAddNewMode(screen, m->hdisplay, m->vdisplay, m->vrefresh, m->name); + } +} + +EGLBoolean +drm_initialize(_EGLDriver *drv, EGLDisplay dpy, EGLint *major, EGLint *minor) +{ + _EGLDisplay *disp = _eglLookupDisplay(dpy); + struct drm_device *dev = (struct drm_device *)drv; + struct drm_screen *screen = NULL; + drmModeConnectorPtr connector = NULL; + drmModeResPtr res = NULL; + unsigned count_connectors = 0; + int num_screens = 0; + EGLint i; + int fd; + + fd = drmOpen("i915", NULL); + if (fd < 0) + goto err_fd; + + dev->drmFD = fd; + drm_get_device_id(dev); + + dev->screen = drm_api_hocks.create_screen(dev->drmFD, dev->deviceID); + if (!dev->screen) + goto err_screen; + dev->winsys = dev->screen->winsys; + + /* TODO HACK */ + driInitExtensions(NULL, card_extensions, GL_FALSE); + + drm_update_res(dev); + res = dev->res; + if (res) + count_connectors = res->count_connectors; + else + _eglLog(_EGL_WARNING, "Could not retrive kms information\n"); + + for(i = 0; i < count_connectors && i < MAX_SCREENS; i++) { + connector = drmModeGetConnector(fd, res->connectors[i]); + + if (!connector) + continue; + + if (connector->connection != DRM_MODE_CONNECTED) { + drmModeFreeConnector(connector); + continue; + } + + screen = malloc(sizeof(struct drm_screen)); + memset(screen, 0, sizeof(*screen)); + screen->connector = connector; + screen->connectorID = connector->connector_id; + _eglInitScreen(&screen->base); + _eglAddScreen(disp, &screen->base); + drm_add_modes_from_connector(&screen->base, connector); + dev->screens[num_screens++] = screen; + } + dev->count_screens = num_screens; + + /* for now we only have one config */ + _EGLConfig *config = calloc(1, sizeof(*config)); + memset(config, 1, sizeof(*config)); + _eglInitConfig(config, 1); + _eglSetConfigAttrib(config, EGL_RED_SIZE, 8); + _eglSetConfigAttrib(config, EGL_GREEN_SIZE, 8); + _eglSetConfigAttrib(config, EGL_BLUE_SIZE, 8); + _eglSetConfigAttrib(config, EGL_ALPHA_SIZE, 8); + _eglSetConfigAttrib(config, EGL_BUFFER_SIZE, 32); + _eglSetConfigAttrib(config, EGL_DEPTH_SIZE, 24); + _eglSetConfigAttrib(config, EGL_STENCIL_SIZE, 8); + _eglSetConfigAttrib(config, EGL_SURFACE_TYPE, EGL_PBUFFER_BIT); + _eglAddConfig(disp, config); + + drv->Initialized = EGL_TRUE; + + *major = 1; + *minor = 4; + + return EGL_TRUE; + +err_screen: + drmClose(fd); +err_fd: + return EGL_FALSE; +} + +EGLBoolean +drm_terminate(_EGLDriver *drv, EGLDisplay dpy) +{ + struct drm_device *dev = (struct drm_device *)drv; + struct drm_screen *screen; + int i = 0; + + drmFreeVersion(dev->version); + + for (i = 0; i < dev->count_screens; i++) { + screen = dev->screens[i]; + + if (screen->shown) + drm_takedown_shown_screen(drv, screen); + + drmModeFreeConnector(screen->connector); + _eglDestroyScreen(&screen->base); + dev->screens[i] = NULL; + } + + dev->screen->destroy(dev->screen); + dev->winsys = NULL; + + drmClose(dev->drmFD); + + _eglCleanupDisplay(_eglLookupDisplay(dpy)); + free(dev); + + return EGL_TRUE; +} diff --git a/src/gallium/state_trackers/egl/egl_tracker.h b/src/gallium/state_trackers/egl/egl_tracker.h new file mode 100644 index 0000000000..908bab5f9b --- /dev/null +++ b/src/gallium/state_trackers/egl/egl_tracker.h @@ -0,0 +1,191 @@ + +#ifndef _EGL_TRACKER_H_ +#define _EGL_TRACKER_H_ + +#include <stdint.h> + +#include "eglconfig.h" +#include "eglcontext.h" +#include "egldisplay.h" +#include "egldriver.h" +#include "eglglobals.h" +#include "eglmode.h" +#include "eglscreen.h" +#include "eglsurface.h" + +#include "xf86drm.h" +#include "xf86drmMode.h" + +#include "pipe/p_compiler.h" + +#include "state_tracker/st_public.h" + +#define MAX_SCREENS 16 + +struct pipe_winsys; +struct pipe_screen; +struct pipe_context; +struct state_tracker; + +struct drm_screen; +struct drm_context; + +struct drm_device +{ + _EGLDriver base; /* base class/object */ + + /* + * pipe + */ + + struct pipe_winsys *winsys; + struct pipe_screen *screen; + + /* + * drm + */ + + int drmFD; + drmVersionPtr version; + int deviceID; + + drmModeResPtr res; + + struct drm_screen *screens[MAX_SCREENS]; + size_t count_screens; +}; + +struct drm_surface +{ + _EGLSurface base; /* base class/object */ + + /* + * pipe + */ + + + struct st_framebuffer *stfb; + + /* + * drm + */ + + struct drm_context *user; + struct drm_screen *screen; + + int w; + int h; +}; + +struct drm_context +{ + _EGLContext base; /* base class/object */ + + /* pipe */ + + struct pipe_context *pipe; + struct st_context *st; +}; + +struct drm_screen +{ + _EGLScreen base; + + /* + * pipe + */ + + struct pipe_buffer *buffer; + struct pipe_texture *tex; + struct pipe_surface *surface; + + /* + * drm + */ + + struct { + unsigned height; + unsigned width; + unsigned pitch; + unsigned handle; + } front; + + /* currently only support one connector */ + drmModeConnectorPtr connector; + uint32_t connectorID; + + /* Has this screen been shown */ + int shown; + + /* Surface that is currently attached to this screen */ + struct drm_surface *surf; + + /* framebuffer */ + drmModeFBPtr fb; + uint32_t fbID; + + /* crtc and mode used */ + /*drmModeCrtcPtr crtc;*/ + uint32_t crtcID; + + drmModeModeInfoPtr mode; +}; + + +static INLINE struct drm_context * +lookup_drm_context(EGLContext context) +{ + _EGLContext *c = _eglLookupContext(context); + return (struct drm_context *) c; +} + + +static INLINE struct drm_surface * +lookup_drm_surface(EGLSurface surface) +{ + _EGLSurface *s = _eglLookupSurface(surface); + return (struct drm_surface *) s; +} + +static INLINE struct drm_screen * +lookup_drm_screen(EGLDisplay dpy, EGLScreenMESA screen) +{ + _EGLScreen *s = _eglLookupScreen(dpy, screen); + return (struct drm_screen *) s; +} + +/** + * egl_visual.h + */ +/*@{*/ +void drm_visual_modes_destroy(__GLcontextModes *modes); +__GLcontextModes* drm_visual_modes_create(unsigned count, size_t minimum_size); +__GLcontextModes* drm_visual_from_config(_EGLConfig *conf); +/*@}*/ + +/** + * egl_surface.h + */ +/*@{*/ +void drm_takedown_shown_screen(_EGLDriver *drv, struct drm_screen *screen); +/*@}*/ + +/** + * All function exported to the egl side. + */ +/*@{*/ +EGLBoolean drm_initialize(_EGLDriver *drv, EGLDisplay dpy, EGLint *major, EGLint *minor); +EGLBoolean drm_terminate(_EGLDriver *drv, EGLDisplay dpy); +EGLContext drm_create_context(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config, EGLContext share_list, const EGLint *attrib_list); +EGLBoolean drm_destroy_context(_EGLDriver *drv, EGLDisplay dpy, EGLContext context); +EGLSurface drm_create_window_surface(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config, NativeWindowType window, const EGLint *attrib_list); +EGLSurface drm_create_pixmap_surface(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config, NativePixmapType pixmap, const EGLint *attrib_list); +EGLSurface drm_create_pbuffer_surface(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config, const EGLint *attrib_list); +EGLSurface drm_create_screen_surface_mesa(_EGLDriver *drv, EGLDisplay dpy, EGLConfig cfg, const EGLint *attrib_list); +EGLBoolean drm_show_screen_surface_mesa(_EGLDriver *drv, EGLDisplay dpy, EGLScreenMESA screen, EGLSurface surface, EGLModeMESA m); +EGLBoolean drm_destroy_surface(_EGLDriver *drv, EGLDisplay dpy, EGLSurface surface); +EGLBoolean drm_make_current(_EGLDriver *drv, EGLDisplay dpy, EGLSurface draw, EGLSurface read, EGLContext context); +EGLBoolean drm_swap_buffers(_EGLDriver *drv, EGLDisplay dpy, EGLSurface draw); +/*@}*/ + +#endif diff --git a/src/gallium/state_trackers/egl/egl_visual.c b/src/gallium/state_trackers/egl/egl_visual.c new file mode 100644 index 0000000000..e59f893851 --- /dev/null +++ b/src/gallium/state_trackers/egl/egl_visual.c @@ -0,0 +1,85 @@ + +#include "egl_tracker.h" + +#include "egllog.h" + +void +drm_visual_modes_destroy(__GLcontextModes *modes) +{ + _eglLog(_EGL_DEBUG, "%s", __FUNCTION__); + + while (modes) { + __GLcontextModes * const next = modes->next; + free(modes); + modes = next; + } +} + +__GLcontextModes * +drm_visual_modes_create(unsigned count, size_t minimum_size) +{ + /* This code copied from libGLX, and modified */ + const size_t size = (minimum_size > sizeof(__GLcontextModes)) + ? minimum_size : sizeof(__GLcontextModes); + __GLcontextModes * head = NULL; + __GLcontextModes ** next; + unsigned i; + + _eglLog(_EGL_DEBUG, "%s %d %d", __FUNCTION__, count, minimum_size); + + next = & head; + for (i = 0 ; i < count ; i++) { + *next = (__GLcontextModes *) calloc(1, size); + if (*next == NULL) { + drm_visual_modes_destroy(head); + head = NULL; + break; + } + + (*next)->doubleBufferMode = 1; + (*next)->visualID = GLX_DONT_CARE; + (*next)->visualType = GLX_DONT_CARE; + (*next)->visualRating = GLX_NONE; + (*next)->transparentPixel = GLX_NONE; + (*next)->transparentRed = GLX_DONT_CARE; + (*next)->transparentGreen = GLX_DONT_CARE; + (*next)->transparentBlue = GLX_DONT_CARE; + (*next)->transparentAlpha = GLX_DONT_CARE; + (*next)->transparentIndex = GLX_DONT_CARE; + (*next)->xRenderable = GLX_DONT_CARE; + (*next)->fbconfigID = GLX_DONT_CARE; + (*next)->swapMethod = GLX_SWAP_UNDEFINED_OML; + (*next)->bindToTextureRgb = GLX_DONT_CARE; + (*next)->bindToTextureRgba = GLX_DONT_CARE; + (*next)->bindToMipmapTexture = GLX_DONT_CARE; + (*next)->bindToTextureTargets = 0; + (*next)->yInverted = GLX_DONT_CARE; + + next = & ((*next)->next); + } + + return head; +} + +__GLcontextModes * +drm_visual_from_config(_EGLConfig *conf) +{ + __GLcontextModes *visual; + (void)conf; + + visual = drm_visual_modes_create(1, sizeof(*visual)); + visual->redBits = 8; + visual->greenBits = 8; + visual->blueBits = 8; + visual->alphaBits = 8; + + visual->rgbBits = 32; + visual->doubleBufferMode = 1; + + visual->depthBits = 24; + visual->haveDepthBuffer = visual->depthBits > 0; + visual->stencilBits = 8; + visual->haveStencilBuffer = visual->stencilBits > 0; + + return visual; +} diff --git a/src/gallium/state_trackers/g3dvl/Makefile b/src/gallium/state_trackers/g3dvl/Makefile new file mode 100644 index 0000000000..f9f4d6be3c --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/Makefile @@ -0,0 +1,21 @@ +TARGET = libg3dvl.a +OBJECTS = vl_display.o vl_screen.o vl_context.o vl_surface.o vl_shader_build.o vl_util.o vl_basic_csc.o \ + vl_r16snorm_mc_buf.o +GALLIUMDIR = ../.. + +CFLAGS += -g -Wall -Werror-implicit-function-declaration -fPIC \ + -I${GALLIUMDIR}/include \ + -I${GALLIUMDIR}/auxiliary \ + -I${GALLIUMDIR}/winsys/g3dvl \ + +############################################# + +.PHONY = all clean + +all: ${TARGET} + +${TARGET}: ${OBJECTS} + ar rcs $@ $^ + +clean: + rm -rf ${OBJECTS} ${TARGET} diff --git a/src/gallium/state_trackers/g3dvl/vl_basic_csc.c b/src/gallium/state_trackers/g3dvl/vl_basic_csc.c new file mode 100644 index 0000000000..187a13a560 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_basic_csc.c @@ -0,0 +1,715 @@ +#define VL_INTERNAL +#include "vl_basic_csc.h" +#include <assert.h> +#include <pipe/p_context.h> +#include <pipe/p_state.h> +#include <pipe/p_inlines.h> +#include <tgsi/tgsi_parse.h> +#include <tgsi/tgsi_build.h> +#include <util/u_memory.h> +#include "vl_csc.h" +#include "vl_surface.h" +#include "vl_shader_build.h" +#include "vl_types.h" + +struct vlVertexShaderConsts +{ + struct vlVertex4f dst_scale; + struct vlVertex4f dst_trans; + struct vlVertex4f src_scale; + struct vlVertex4f src_trans; +}; + +struct vlFragmentShaderConsts +{ + struct vlVertex4f bias; + float matrix[16]; +}; + +struct vlBasicCSC +{ + struct vlCSC base; + + struct pipe_context *pipe; + struct pipe_viewport_state viewport; + struct pipe_framebuffer_state framebuffer; + struct pipe_texture *framebuffer_tex; + void *sampler; + void *vertex_shader, *fragment_shader; + struct pipe_vertex_buffer vertex_bufs[2]; + struct pipe_vertex_element vertex_elems[2]; + struct pipe_constant_buffer vs_const_buf, fs_const_buf; +}; + +static int vlResizeFrameBuffer +( + struct vlCSC *csc, + unsigned int width, + unsigned int height +) +{ + struct vlBasicCSC *basic_csc; + struct pipe_context *pipe; + struct pipe_texture template; + + assert(csc); + + basic_csc = (struct vlBasicCSC*)csc; + pipe = basic_csc->pipe; + + if (basic_csc->framebuffer.width == width && basic_csc->framebuffer.height == height) + return 0; + + basic_csc->viewport.scale[0] = width; + basic_csc->viewport.scale[1] = height; + basic_csc->viewport.scale[2] = 1; + basic_csc->viewport.scale[3] = 1; + basic_csc->viewport.translate[0] = 0; + basic_csc->viewport.translate[1] = 0; + basic_csc->viewport.translate[2] = 0; + basic_csc->viewport.translate[3] = 0; + + if (basic_csc->framebuffer_tex) + { + pipe_surface_reference(&basic_csc->framebuffer.cbufs[0], NULL); + pipe_texture_reference(&basic_csc->framebuffer_tex, NULL); + } + + memset(&template, 0, sizeof(struct pipe_texture)); + template.target = PIPE_TEXTURE_2D; + template.format = PIPE_FORMAT_A8R8G8B8_UNORM; + template.last_level = 0; + template.width[0] = width; + template.height[0] = height; + template.depth[0] = 1; + template.compressed = 0; + pf_get_block(template.format, &template.block); + template.tex_usage = PIPE_TEXTURE_USAGE_DISPLAY_TARGET; + + basic_csc->framebuffer_tex = pipe->screen->texture_create(pipe->screen, &template); + + basic_csc->framebuffer.width = width; + basic_csc->framebuffer.height = height; + basic_csc->framebuffer.cbufs[0] = pipe->screen->get_tex_surface + ( + pipe->screen, + basic_csc->framebuffer_tex, + 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE + ); + + /* Clear to black, in case video doesn't fill the entire window */ + pipe->clear(pipe, basic_csc->framebuffer.cbufs[0], 0); + + return 0; +} + +static int vlBegin +( + struct vlCSC *csc +) +{ + struct vlBasicCSC *basic_csc; + struct pipe_context *pipe; + + assert(csc); + + basic_csc = (struct vlBasicCSC*)csc; + pipe = basic_csc->pipe; + + pipe->set_framebuffer_state(pipe, &basic_csc->framebuffer); + pipe->set_viewport_state(pipe, &basic_csc->viewport); + pipe->bind_sampler_states(pipe, 1, (void**)&basic_csc->sampler); + /* Source texture set in vlPutPictureCSC() */ + pipe->bind_vs_state(pipe, basic_csc->vertex_shader); + pipe->bind_fs_state(pipe, basic_csc->fragment_shader); + pipe->set_vertex_buffers(pipe, 2, basic_csc->vertex_bufs); + pipe->set_vertex_elements(pipe, 2, basic_csc->vertex_elems); + pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &basic_csc->vs_const_buf); + pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &basic_csc->fs_const_buf); + + return 0; +} + +static int vlPutPictureCSC +( + struct vlCSC *csc, + struct vlSurface *surface, + int srcx, + int srcy, + int srcw, + int srch, + int destx, + int desty, + int destw, + int desth, + enum vlPictureType picture_type +) +{ + struct vlBasicCSC *basic_csc; + struct pipe_context *pipe; + struct vlVertexShaderConsts *vs_consts; + + assert(csc); + assert(surface); + + basic_csc = (struct vlBasicCSC*)csc; + pipe = basic_csc->pipe; + + vs_consts = pipe_buffer_map + ( + pipe->screen, + basic_csc->vs_const_buf.buffer, + PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD + ); + + vs_consts->dst_scale.x = destw / (float)basic_csc->framebuffer.cbufs[0]->width; + vs_consts->dst_scale.y = desth / (float)basic_csc->framebuffer.cbufs[0]->height; + vs_consts->dst_scale.z = 1; + vs_consts->dst_scale.w = 1; + vs_consts->dst_trans.x = destx / (float)basic_csc->framebuffer.cbufs[0]->width; + vs_consts->dst_trans.y = desty / (float)basic_csc->framebuffer.cbufs[0]->height; + vs_consts->dst_trans.z = 0; + vs_consts->dst_trans.w = 0; + + vs_consts->src_scale.x = srcw / (float)surface->texture->width[0]; + vs_consts->src_scale.y = srch / (float)surface->texture->height[0]; + vs_consts->src_scale.z = 1; + vs_consts->src_scale.w = 1; + vs_consts->src_trans.x = srcx / (float)surface->texture->width[0]; + vs_consts->src_trans.y = srcy / (float)surface->texture->height[0]; + vs_consts->src_trans.z = 0; + vs_consts->src_trans.w = 0; + + pipe_buffer_unmap(pipe->screen, basic_csc->vs_const_buf.buffer); + + pipe->set_sampler_textures(pipe, 1, &surface->texture); + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLE_STRIP, 0, 4); + + return 0; +} + +static int vlEnd +( + struct vlCSC *csc +) +{ + assert(csc); + + return 0; +} + +static struct pipe_surface* vlGetFrameBuffer +( + struct vlCSC *csc +) +{ + struct vlBasicCSC *basic_csc; + + assert(csc); + + basic_csc = (struct vlBasicCSC*)csc; + + return basic_csc->framebuffer.cbufs[0]; +} + +static int vlDestroy +( + struct vlCSC *csc +) +{ + struct vlBasicCSC *basic_csc; + struct pipe_context *pipe; + unsigned int i; + + assert(csc); + + basic_csc = (struct vlBasicCSC*)csc; + pipe = basic_csc->pipe; + + if (basic_csc->framebuffer_tex) + { + pipe_surface_reference(&basic_csc->framebuffer.cbufs[0], NULL); + pipe_texture_reference(&basic_csc->framebuffer_tex, NULL); + } + + pipe->delete_sampler_state(pipe, basic_csc->sampler); + pipe->delete_vs_state(pipe, basic_csc->vertex_shader); + pipe->delete_fs_state(pipe, basic_csc->fragment_shader); + + for (i = 0; i < 2; ++i) + pipe_buffer_reference(pipe->screen, &basic_csc->vertex_bufs[i].buffer, NULL); + + pipe_buffer_reference(pipe->screen, &basic_csc->vs_const_buf.buffer, NULL); + pipe_buffer_reference(pipe->screen, &basic_csc->fs_const_buf.buffer, NULL); + + FREE(basic_csc); + + return 0; +} + +/* + * Represents 2 triangles in a strip in normalized coords. + * Used to render the surface onto the frame buffer. + */ +static const struct vlVertex2f surface_verts[4] = +{ + {0.0f, 0.0f}, + {0.0f, 1.0f}, + {1.0f, 0.0f}, + {1.0f, 1.0f} +}; + +/* + * Represents texcoords for the above. We can use the position values directly. + * TODO: Duplicate these in the shader, no need to create a buffer. + */ +static const struct vlVertex2f *surface_texcoords = surface_verts; + +/* + * Identity color conversion constants, for debugging + */ +static const struct vlFragmentShaderConsts identity = +{ + { + 0.0f, 0.0f, 0.0f, 0.0f + }, + { + 1.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + } +}; + +/* + * Converts ITU-R BT.601 YCbCr pixels to RGB pixels where: + * Y is in [16,235], Cb and Cr are in [16,240] + * R, G, and B are in [16,235] + */ +static const struct vlFragmentShaderConsts bt_601 = +{ + { + 0.0f, 0.501960784f, 0.501960784f, 0.0f + }, + { + 1.0f, 0.0f, 1.371f, 0.0f, + 1.0f, -0.336f, -0.698f, 0.0f, + 1.0f, 1.732f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + } +}; + +/* + * Converts ITU-R BT.601 YCbCr pixels to RGB pixels where: + * Y is in [16,235], Cb and Cr are in [16,240] + * R, G, and B are in [0,255] + */ +static const struct vlFragmentShaderConsts bt_601_full = +{ + { + 0.062745098f, 0.501960784f, 0.501960784f, 0.0f + }, + { + 1.164f, 0.0f, 1.596f, 0.0f, + 1.164f, -0.391f, -0.813f, 0.0f, + 1.164f, 2.018f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + } +}; + +/* + * Converts ITU-R BT.709 YCbCr pixels to RGB pixels where: + * Y is in [16,235], Cb and Cr are in [16,240] + * R, G, and B are in [16,235] + */ +static const struct vlFragmentShaderConsts bt_709 = +{ + { + 0.0f, 0.501960784f, 0.501960784f, 0.0f + }, + { + 1.0f, 0.0f, 1.540f, 0.0f, + 1.0f, -0.183f, -0.459f, 0.0f, + 1.0f, 1.816f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + } +}; + +/* + * Converts ITU-R BT.709 YCbCr pixels to RGB pixels where: + * Y is in [16,235], Cb and Cr are in [16,240] + * R, G, and B are in [0,255] + */ +const struct vlFragmentShaderConsts bt_709_full = +{ + { + 0.062745098f, 0.501960784f, 0.501960784f, 0.0f + }, + { + 1.164f, 0.0f, 1.793f, 0.0f, + 1.164f, -0.213f, -0.534f, 0.0f, + 1.164f, 2.115f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + } +}; + +static int vlCreateVertexShader +( + struct vlBasicCSC *csc +) +{ + const unsigned int max_tokens = 50; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = csc->pipe; + tokens = (struct tgsi_token*)MALLOC(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Vertex texcoords + */ + for (i = 0; i < 2; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling vector to scale vertex pos rect to destination size + * decl c1 ; Translation vector to move vertex pos rect into position + * decl c2 ; Scaling vector to scale texcoord rect to source size + * decl c3 ; Translation vector to move texcoord rect into position + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 3); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl o0 ; Vertex pos + * decl o1 ; Vertex texcoords + */ + for (i = 0; i < 2; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * madd o0, i0, c0, c1 ; Scale and translate unit output rect to destination size and pos + * madd o1, i1, c2, c3 ; Scale and translate unit texcoord rect to source size and pos + */ + for (i = 0; i < 2; ++i) + { + inst = vl_inst4(TGSI_OPCODE_MADD, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i, TGSI_FILE_CONSTANT, i * 2, TGSI_FILE_CONSTANT, i * 2 + 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + csc->vertex_shader = pipe->create_vs_state(pipe, &vs); + FREE(tokens); + + return 0; +} + +static int vlCreateFragmentShader +( + struct vlBasicCSC *csc +) +{ + const unsigned int max_tokens = 50; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(context); + + pipe = csc->pipe; + tokens = (struct tgsi_token*)MALLOC(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* decl i0 ; Texcoords for s0 */ + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, 1, 0, 0, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl c0 ; Bias vector for CSC + * decl c1-c4 ; CSC matrix c1-c4 + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 4); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0 */ + decl = vl_decl_temps(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl s0 ; Sampler for tex containing picture to display */ + decl = vl_decl_samplers(0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* tex2d t0, i0, s0 ; Read src pixel */ + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_INPUT, 0, TGSI_FILE_SAMPLER, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* sub t0, t0, c0 ; Subtract bias vector from pixel */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * dp4 o0.x, t0, c1 ; Multiply pixel by the color conversion matrix + * dp4 o0.y, t0, c2 + * dp4 o0.z, t0, c3 + */ + for (i = 0; i < 3; ++i) + { + inst = vl_inst3(TGSI_OPCODE_DP4, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, i + 1); + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + csc->fragment_shader = pipe->create_fs_state(pipe, &fs); + FREE(tokens); + + return 0; +} + +static int vlCreateDataBufs +( + struct vlBasicCSC *csc +) +{ + struct pipe_context *pipe; + + assert(csc); + + pipe = csc->pipe; + + /* + * Create our vertex buffer and vertex buffer element + * VB contains 4 vertices that render a quad covering the entire window + * to display a rendered surface + * Quad is rendered as a tri strip + */ + csc->vertex_bufs[0].stride = sizeof(struct vlVertex2f); + csc->vertex_bufs[0].max_index = 3; + csc->vertex_bufs[0].buffer_offset = 0; + csc->vertex_bufs[0].buffer = pipe_buffer_create + ( + pipe->screen, + 1, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct vlVertex2f) * 4 + ); + + memcpy + ( + pipe_buffer_map(pipe->screen, csc->vertex_bufs[0].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + surface_verts, + sizeof(struct vlVertex2f) * 4 + ); + + pipe_buffer_unmap(pipe->screen, csc->vertex_bufs[0].buffer); + + csc->vertex_elems[0].src_offset = 0; + csc->vertex_elems[0].vertex_buffer_index = 0; + csc->vertex_elems[0].nr_components = 2; + csc->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* + * Create our texcoord buffer and texcoord buffer element + * Texcoord buffer contains the TCs for mapping the rendered surface to the 4 vertices + */ + csc->vertex_bufs[1].stride = sizeof(struct vlVertex2f); + csc->vertex_bufs[1].max_index = 3; + csc->vertex_bufs[1].buffer_offset = 0; + csc->vertex_bufs[1].buffer = pipe_buffer_create + ( + pipe->screen, + 1, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(struct vlVertex2f) * 4 + ); + + memcpy + ( + pipe_buffer_map(pipe->screen, csc->vertex_bufs[1].buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + surface_texcoords, + sizeof(struct vlVertex2f) * 4 + ); + + pipe_buffer_unmap(pipe->screen, csc->vertex_bufs[1].buffer); + + csc->vertex_elems[1].src_offset = 0; + csc->vertex_elems[1].vertex_buffer_index = 1; + csc->vertex_elems[1].nr_components = 2; + csc->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* + * Create our vertex shader's constant buffer + * Const buffer contains scaling and translation vectors + */ + csc->vs_const_buf.buffer = pipe_buffer_create + ( + pipe->screen, + 1, + PIPE_BUFFER_USAGE_CONSTANT | PIPE_BUFFER_USAGE_DISCARD, + sizeof(struct vlVertexShaderConsts) + ); + + /* + * Create our fragment shader's constant buffer + * Const buffer contains the color conversion matrix and bias vectors + */ + csc->fs_const_buf.buffer = pipe_buffer_create + ( + pipe->screen, + 1, + PIPE_BUFFER_USAGE_CONSTANT, + sizeof(struct vlFragmentShaderConsts) + ); + + /* + * TODO: Refactor this into a seperate function, + * allow changing the CSC matrix at runtime to switch between regular & full versions + */ + memcpy + ( + pipe_buffer_map(pipe->screen, csc->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + &bt_601_full, + sizeof(struct vlFragmentShaderConsts) + ); + + pipe_buffer_unmap(pipe->screen, csc->fs_const_buf.buffer); + + return 0; +} + +static int vlInit +( + struct vlBasicCSC *csc +) +{ + struct pipe_context *pipe; + struct pipe_sampler_state sampler; + + assert(csc); + + pipe = csc->pipe; + + /* Delay creating the FB until vlPutPictureCSC() so we know window size */ + csc->framebuffer_tex = NULL; + csc->framebuffer.width = 0; + csc->framebuffer.height = 0; + csc->framebuffer.nr_cbufs = 1; + csc->framebuffer.cbufs[0] = NULL; + csc->framebuffer.zsbuf = NULL; + + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.compare_mode = PIPE_TEX_COMPARE_NONE; + sampler.compare_func = PIPE_FUNC_ALWAYS; + sampler.normalized_coords = 1; + /*sampler.prefilter = ;*/ + /*sampler.shadow_ambient = ;*/ + /*sampler.lod_bias = ;*/ + /*sampler.min_lod = ;*/ + /*sampler.max_lod = ;*/ + /*sampler.border_color[i] = ;*/ + /*sampler.max_anisotropy = ;*/ + csc->sampler = pipe->create_sampler_state(pipe, &sampler); + + vlCreateVertexShader(csc); + vlCreateFragmentShader(csc); + vlCreateDataBufs(csc); + + return 0; +} + +int vlCreateBasicCSC +( + struct pipe_context *pipe, + struct vlCSC **csc +) +{ + struct vlBasicCSC *basic_csc; + + assert(pipe); + assert(csc); + + basic_csc = CALLOC_STRUCT(vlBasicCSC); + + if (!basic_csc) + return 1; + + basic_csc->base.vlResizeFrameBuffer = &vlResizeFrameBuffer; + basic_csc->base.vlBegin = &vlBegin; + basic_csc->base.vlPutPicture = &vlPutPictureCSC; + basic_csc->base.vlEnd = &vlEnd; + basic_csc->base.vlGetFrameBuffer = &vlGetFrameBuffer; + basic_csc->base.vlDestroy = &vlDestroy; + basic_csc->pipe = pipe; + + vlInit(basic_csc); + + *csc = &basic_csc->base; + + return 0; +} diff --git a/src/gallium/state_trackers/g3dvl/vl_basic_csc.h b/src/gallium/state_trackers/g3dvl/vl_basic_csc.h new file mode 100644 index 0000000000..2e17f1d814 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_basic_csc.h @@ -0,0 +1,13 @@ +#ifndef vl_basic_csc_h +#define vl_basic_csc_h + +struct pipe_context; +struct vlCSC; + +int vlCreateBasicCSC +( + struct pipe_context *pipe, + struct vlCSC **csc +); + +#endif diff --git a/src/gallium/state_trackers/g3dvl/vl_context.c b/src/gallium/state_trackers/g3dvl/vl_context.c new file mode 100644 index 0000000000..65ddb9f01e --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_context.c @@ -0,0 +1,208 @@ +#define VL_INTERNAL +#include "vl_context.h" +#include <assert.h> +#include <pipe/p_context.h> +#include <pipe/p_state.h> +#include <util/u_memory.h> +#include "vl_render.h" +#include "vl_r16snorm_mc_buf.h" +#include "vl_csc.h" +#include "vl_basic_csc.h" + +static int vlInitCommon(struct vlContext *context) +{ + struct pipe_context *pipe; + struct pipe_rasterizer_state rast; + struct pipe_blend_state blend; + struct pipe_depth_stencil_alpha_state dsa; + unsigned int i; + + assert(context); + + pipe = context->pipe; + + rast.flatshade = 1; + rast.flatshade_first = 0; + rast.light_twoside = 0; + rast.front_winding = PIPE_WINDING_CCW; + rast.cull_mode = PIPE_WINDING_CW; + rast.fill_cw = PIPE_POLYGON_MODE_FILL; + rast.fill_ccw = PIPE_POLYGON_MODE_FILL; + rast.offset_cw = 0; + rast.offset_ccw = 0; + rast.scissor = 0; + rast.poly_smooth = 0; + rast.poly_stipple_enable = 0; + rast.point_sprite = 0; + rast.point_size_per_vertex = 0; + rast.multisample = 0; + rast.line_smooth = 0; + rast.line_stipple_enable = 0; + rast.line_stipple_factor = 0; + rast.line_stipple_pattern = 0; + rast.line_last_pixel = 0; + /* Don't need clipping, but viewport mapping done here */ + rast.bypass_clipping = 0; + rast.bypass_vs = 0; + rast.origin_lower_left = 0; + rast.line_width = 1; + rast.point_smooth = 0; + rast.point_size = 1; + rast.offset_units = 1; + rast.offset_scale = 1; + /*rast.sprite_coord_mode[i] = ;*/ + context->raster = pipe->create_rasterizer_state(pipe, &rast); + pipe->bind_rasterizer_state(pipe, context->raster); + + blend.blend_enable = 0; + blend.rgb_func = PIPE_BLEND_ADD; + blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rgb_dst_factor = PIPE_BLENDFACTOR_ONE; + blend.alpha_func = PIPE_BLEND_ADD; + blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend.alpha_dst_factor = PIPE_BLENDFACTOR_ONE; + blend.logicop_enable = 0; + blend.logicop_func = PIPE_LOGICOP_CLEAR; + /* Needed to allow color writes to FB, even if blending disabled */ + blend.colormask = PIPE_MASK_RGBA; + blend.dither = 0; + context->blend = pipe->create_blend_state(pipe, &blend); + pipe->bind_blend_state(pipe, context->blend); + + dsa.depth.enabled = 0; + dsa.depth.writemask = 0; + dsa.depth.func = PIPE_FUNC_ALWAYS; + dsa.depth.occlusion_count = 0; + for (i = 0; i < 2; ++i) + { + dsa.stencil[i].enabled = 0; + dsa.stencil[i].func = PIPE_FUNC_ALWAYS; + dsa.stencil[i].fail_op = PIPE_STENCIL_OP_KEEP; + dsa.stencil[i].zpass_op = PIPE_STENCIL_OP_KEEP; + dsa.stencil[i].zfail_op = PIPE_STENCIL_OP_KEEP; + dsa.stencil[i].ref_value = 0; + dsa.stencil[i].valuemask = 0; + dsa.stencil[i].writemask = 0; + } + dsa.alpha.enabled = 0; + dsa.alpha.func = PIPE_FUNC_ALWAYS; + dsa.alpha.ref_value = 0; + context->dsa = pipe->create_depth_stencil_alpha_state(pipe, &dsa); + pipe->bind_depth_stencil_alpha_state(pipe, context->dsa); + + return 0; +} + +int vlCreateContext +( + struct vlScreen *screen, + struct pipe_context *pipe, + unsigned int picture_width, + unsigned int picture_height, + enum vlFormat picture_format, + enum vlProfile profile, + enum vlEntryPoint entry_point, + struct vlContext **context +) +{ + struct vlContext *ctx; + + assert(screen); + assert(context); + assert(pipe); + + ctx = CALLOC_STRUCT(vlContext); + + if (!ctx) + return 1; + + ctx->screen = screen; + ctx->pipe = pipe; + ctx->picture_width = picture_width; + ctx->picture_height = picture_height; + ctx->picture_format = picture_format; + ctx->profile = profile; + ctx->entry_point = entry_point; + + vlInitCommon(ctx); + + vlCreateR16SNormBufferedMC(pipe, picture_width, picture_height, picture_format, &ctx->render); + vlCreateBasicCSC(pipe, &ctx->csc); + + *context = ctx; + + return 0; +} + +int vlDestroyContext +( + struct vlContext *context +) +{ + assert(context); + + /* XXX: Must unbind shaders before we can delete them for some reason */ + context->pipe->bind_vs_state(context->pipe, NULL); + context->pipe->bind_fs_state(context->pipe, NULL); + + context->render->vlDestroy(context->render); + context->csc->vlDestroy(context->csc); + + context->pipe->delete_blend_state(context->pipe, context->blend); + context->pipe->delete_rasterizer_state(context->pipe, context->raster); + context->pipe->delete_depth_stencil_alpha_state(context->pipe, context->dsa); + + FREE(context); + + return 0; +} + +struct vlScreen* vlContextGetScreen +( + struct vlContext *context +) +{ + assert(context); + + return context->screen; +} + +struct pipe_context* vlGetPipeContext +( + struct vlContext *context +) +{ + assert(context); + + return context->pipe; +} + +unsigned int vlGetPictureWidth +( + struct vlContext *context +) +{ + assert(context); + + return context->picture_width; +} + +unsigned int vlGetPictureHeight +( + struct vlContext *context +) +{ + assert(context); + + return context->picture_height; +} + +enum vlFormat vlGetPictureFormat +( + struct vlContext *context +) +{ + assert(context); + + return context->picture_format; +} diff --git a/src/gallium/state_trackers/g3dvl/vl_context.h b/src/gallium/state_trackers/g3dvl/vl_context.h new file mode 100644 index 0000000000..3d14634c44 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_context.h @@ -0,0 +1,73 @@ +#ifndef vl_context_h +#define vl_context_h + +#include "vl_types.h" + +struct pipe_context; + +#ifdef VL_INTERNAL +struct vlRender; +struct vlCSC; + +struct vlContext +{ + struct vlScreen *screen; + struct pipe_context *pipe; + unsigned int picture_width; + unsigned int picture_height; + enum vlFormat picture_format; + enum vlProfile profile; + enum vlEntryPoint entry_point; + + void *raster; + void *dsa; + void *blend; + + struct vlRender *render; + struct vlCSC *csc; +}; +#endif + +int vlCreateContext +( + struct vlScreen *screen, + struct pipe_context *pipe, + unsigned int picture_width, + unsigned int picture_height, + enum vlFormat picture_format, + enum vlProfile profile, + enum vlEntryPoint entry_point, + struct vlContext **context +); + +int vlDestroyContext +( + struct vlContext *context +); + +struct vlScreen* vlContextGetScreen +( + struct vlContext *context +); + +struct pipe_context* vlGetPipeContext +( + struct vlContext *context +); + +unsigned int vlGetPictureWidth +( + struct vlContext *context +); + +unsigned int vlGetPictureHeight +( + struct vlContext *context +); + +enum vlFormat vlGetPictureFormat +( + struct vlContext *context +); + +#endif diff --git a/src/gallium/state_trackers/g3dvl/vl_csc.h b/src/gallium/state_trackers/g3dvl/vl_csc.h new file mode 100644 index 0000000000..36417a2792 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_csc.h @@ -0,0 +1,53 @@ +#ifndef vl_csc_h +#define vl_csc_h + +#include "vl_types.h" + +struct pipe_surface; + +struct vlCSC +{ + int (*vlResizeFrameBuffer) + ( + struct vlCSC *csc, + unsigned int width, + unsigned int height + ); + + int (*vlBegin) + ( + struct vlCSC *csc + ); + + int (*vlPutPicture) + ( + struct vlCSC *csc, + struct vlSurface *surface, + int srcx, + int srcy, + int srcw, + int srch, + int destx, + int desty, + int destw, + int desth, + enum vlPictureType picture_type + ); + + int (*vlEnd) + ( + struct vlCSC *csc + ); + + struct pipe_surface* (*vlGetFrameBuffer) + ( + struct vlCSC *csc + ); + + int (*vlDestroy) + ( + struct vlCSC *csc + ); +}; + +#endif diff --git a/src/gallium/state_trackers/g3dvl/vl_defs.h b/src/gallium/state_trackers/g3dvl/vl_defs.h new file mode 100644 index 0000000000..d612d02502 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_defs.h @@ -0,0 +1,11 @@ +#ifndef vl_defs_h +#define vl_defs_h + +#define VL_BLOCK_WIDTH 8 +#define VL_BLOCK_HEIGHT 8 +#define VL_BLOCK_SIZE (VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT) +#define VL_MACROBLOCK_WIDTH 16 +#define VL_MACROBLOCK_HEIGHT 16 +#define VL_MACROBLOCK_SIZE (VL_MACROBLOCK_WIDTH * VL_MACROBLOCK_HEIGHT) + +#endif diff --git a/src/gallium/state_trackers/g3dvl/vl_display.c b/src/gallium/state_trackers/g3dvl/vl_display.c new file mode 100644 index 0000000000..dce06de758 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_display.c @@ -0,0 +1,48 @@ +#define VL_INTERNAL +#include "vl_display.h" +#include <assert.h> +#include <util/u_memory.h> + +int vlCreateDisplay +( + vlNativeDisplay native_display, + struct vlDisplay **display +) +{ + struct vlDisplay *dpy; + + assert(native_display); + assert(display); + + dpy = CALLOC_STRUCT(vlDisplay); + + if (!dpy) + return 1; + + dpy->native = native_display; + *display = dpy; + + return 0; +} + +int vlDestroyDisplay +( + struct vlDisplay *display +) +{ + assert(display); + + FREE(display); + + return 0; +} + +vlNativeDisplay vlGetNativeDisplay +( + struct vlDisplay *display +) +{ + assert(display); + + return display->native; +} diff --git a/src/gallium/state_trackers/g3dvl/vl_display.h b/src/gallium/state_trackers/g3dvl/vl_display.h new file mode 100644 index 0000000000..e11fd40799 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_display.h @@ -0,0 +1,29 @@ +#ifndef vl_display_h +#define vl_display_h + +#include "vl_types.h" + +#ifdef VL_INTERNAL +struct vlDisplay +{ + vlNativeDisplay native; +}; +#endif + +int vlCreateDisplay +( + vlNativeDisplay native_display, + struct vlDisplay **display +); + +int vlDestroyDisplay +( + struct vlDisplay *display +); + +vlNativeDisplay vlGetNativeDisplay +( + struct vlDisplay *display +); + +#endif diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c new file mode 100644 index 0000000000..7cd753f736 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c @@ -0,0 +1,1157 @@ +#define VL_INTERNAL +#include "vl_r16snorm_mc_buf.h" +#include <assert.h> +#include <pipe/p_context.h> +#include <pipe/p_screen.h> +#include <pipe/p_state.h> +#include <pipe/p_inlines.h> +#include <tgsi/tgsi_parse.h> +#include <tgsi/tgsi_build.h> +#include <util/u_math.h> +#include <util/u_memory.h> +#include "vl_render.h" +#include "vl_shader_build.h" +#include "vl_surface.h" +#include "vl_util.h" +#include "vl_types.h" +#include "vl_defs.h" + +const unsigned int DEFAULT_BUF_ALIGNMENT = 1; + +enum vlMacroBlockTypeEx +{ + vlMacroBlockExTypeIntra, + vlMacroBlockExTypeFwdPredictedFrame, + vlMacroBlockExTypeFwdPredictedField, + vlMacroBlockExTypeBkwdPredictedFrame, + vlMacroBlockExTypeBkwdPredictedField, + vlMacroBlockExTypeBiPredictedFrame, + vlMacroBlockExTypeBiPredictedField, + + vlNumMacroBlockExTypes +}; + +struct vlVertexShaderConsts +{ + struct vlVertex4f denorm; +}; + +struct vlFragmentShaderConsts +{ + struct vlVertex4f multiplier; + struct vlVertex4f div; +}; + +struct vlMacroBlockVertexStream0 +{ + struct vlVertex2f pos; + struct vlVertex2f luma_tc; + struct vlVertex2f cb_tc; + struct vlVertex2f cr_tc; +}; + +struct vlR16SnormBufferedMC +{ + struct vlRender base; + + unsigned int picture_width; + unsigned int picture_height; + enum vlFormat picture_format; + unsigned int macroblocks_per_picture; + + struct vlSurface *buffered_surface; + struct vlSurface *past_surface; + struct vlSurface *future_surface; + struct vlVertex2f surface_tex_inv_size; + struct vlVertex2f zero_block[3]; + unsigned int num_macroblocks; + struct vlMpeg2MacroBlock *macroblocks; + struct pipe_transfer *tex_transfer[3]; + short *texels[3]; + + struct pipe_context *pipe; + struct pipe_viewport_state viewport; + struct pipe_framebuffer_state render_target; + + union + { + void *all[5]; + struct + { + void *y; + void *cb; + void *cr; + void *ref[2]; + }; + } samplers; + + union + { + struct pipe_texture *all[5]; + struct + { + struct pipe_texture *y; + struct pipe_texture *cb; + struct pipe_texture *cr; + struct pipe_texture *ref[2]; + }; + } textures; + + union + { + struct pipe_vertex_buffer all[3]; + struct + { + struct pipe_vertex_buffer ycbcr; + struct pipe_vertex_buffer ref[2]; + }; + } vertex_bufs; + + void *i_vs, *p_vs[2], *b_vs[2]; + void *i_fs, *p_fs[2], *b_fs[2]; + struct pipe_vertex_element vertex_elems[8]; + struct pipe_constant_buffer vs_const_buf; + struct pipe_constant_buffer fs_const_buf; +}; + +static inline int vlBegin +( + struct vlRender *render +) +{ + assert(render); + + return 0; +} + +static inline int vlGrabFrameCodedBlock(short *src, short *dst, unsigned int dst_pitch) +{ + unsigned int y; + + for (y = 0; y < VL_BLOCK_HEIGHT; ++y) + memcpy + ( + dst + y * dst_pitch, + src + y * VL_BLOCK_WIDTH, + VL_BLOCK_WIDTH * 2 + ); + + return 0; +} + +static inline int vlGrabFieldCodedBlock(short *src, short *dst, unsigned int dst_pitch) +{ + unsigned int y; + + for (y = 0; y < VL_BLOCK_HEIGHT; ++y) + memcpy + ( + dst + y * dst_pitch * 2, + src + y * VL_BLOCK_WIDTH, + VL_BLOCK_WIDTH * 2 + ); + + return 0; +} + +static inline int vlGrabNoBlock(short *dst, unsigned int dst_pitch) +{ + unsigned int y; + + for (y = 0; y < VL_BLOCK_HEIGHT; ++y) + memset + ( + dst + y * dst_pitch, + 0, + VL_BLOCK_WIDTH * 2 + ); + + return 0; +} + +static inline int vlGrabBlocks +( + struct vlR16SnormBufferedMC *mc, + unsigned int mbx, + unsigned int mby, + enum vlDCTType dct_type, + unsigned int coded_block_pattern, + short *blocks +) +{ + short *texels; + unsigned int tex_pitch; + unsigned int x, y, tb = 0, sb = 0; + unsigned int mbpx = mbx * VL_MACROBLOCK_WIDTH, mbpy = mby * VL_MACROBLOCK_HEIGHT; + + assert(mc); + assert(blocks); + + tex_pitch = mc->tex_transfer[0]->stride / mc->tex_transfer[0]->block.size; + texels = mc->texels[0] + mbpy * tex_pitch + mbpx; + + for (y = 0; y < 2; ++y) + { + for (x = 0; x < 2; ++x, ++tb) + { + if ((coded_block_pattern >> (5 - tb)) & 1) + { + short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT; + + if (dct_type == vlDCTTypeFrameCoded) + { + vlGrabFrameCodedBlock + ( + cur_block, + texels + y * tex_pitch * VL_BLOCK_HEIGHT + x * VL_BLOCK_WIDTH, + tex_pitch + ); + } + else + { + vlGrabFieldCodedBlock + ( + cur_block, + texels + y * tex_pitch + x * VL_BLOCK_WIDTH, + tex_pitch + ); + } + + ++sb; + } + else if (mc->zero_block[0].x < 0.0f) + { + vlGrabNoBlock(texels + y * tex_pitch * VL_BLOCK_HEIGHT + x * VL_BLOCK_WIDTH, tex_pitch); + + mc->zero_block[0].x = (mbpx + x * 8) * mc->surface_tex_inv_size.x; + mc->zero_block[0].y = (mbpy + y * 8) * mc->surface_tex_inv_size.y; + } + } + } + + /* TODO: Implement 422, 444 */ + mbpx >>= 1; + mbpy >>= 1; + + for (tb = 0; tb < 2; ++tb) + { + tex_pitch = mc->tex_transfer[tb + 1]->stride / mc->tex_transfer[tb + 1]->block.size; + texels = mc->texels[tb + 1] + mbpy * tex_pitch + mbpx; + + if ((coded_block_pattern >> (1 - tb)) & 1) + { + short *cur_block = blocks + sb * VL_BLOCK_WIDTH * VL_BLOCK_HEIGHT; + + vlGrabFrameCodedBlock + ( + cur_block, + texels, + tex_pitch + ); + + ++sb; + } + else if (mc->zero_block[tb + 1].x < 0.0f) + { + vlGrabNoBlock(texels, tex_pitch); + + mc->zero_block[tb + 1].x = (mbpx << 1) * mc->surface_tex_inv_size.x; + mc->zero_block[tb + 1].y = (mbpy << 1) * mc->surface_tex_inv_size.y; + } + } + + return 0; +} + +static inline enum vlMacroBlockTypeEx vlGetMacroBlockTypeEx(struct vlMpeg2MacroBlock *mb) +{ + assert(mb); + + switch (mb->mb_type) + { + case vlMacroBlockTypeIntra: + return vlMacroBlockExTypeIntra; + case vlMacroBlockTypeFwdPredicted: + return mb->mo_type == vlMotionTypeFrame ? + vlMacroBlockExTypeFwdPredictedFrame : vlMacroBlockExTypeFwdPredictedField; + case vlMacroBlockTypeBkwdPredicted: + return mb->mo_type == vlMotionTypeFrame ? + vlMacroBlockExTypeBkwdPredictedFrame : vlMacroBlockExTypeBkwdPredictedField; + case vlMacroBlockTypeBiPredicted: + return mb->mo_type == vlMotionTypeFrame ? + vlMacroBlockExTypeBiPredictedFrame : vlMacroBlockExTypeBiPredictedField; + default: + assert(0); + } + + /* Unreachable */ + return -1; +} + +static inline int vlGrabMacroBlock +( + struct vlR16SnormBufferedMC *mc, + struct vlMpeg2MacroBlock *macroblock +) +{ + assert(mc); + assert(macroblock); + assert(mc->num_macroblocks < mc->macroblocks_per_picture); + + mc->macroblocks[mc->num_macroblocks].mbx = macroblock->mbx; + mc->macroblocks[mc->num_macroblocks].mby = macroblock->mby; + mc->macroblocks[mc->num_macroblocks].mb_type = macroblock->mb_type; + mc->macroblocks[mc->num_macroblocks].mo_type = macroblock->mo_type; + mc->macroblocks[mc->num_macroblocks].dct_type = macroblock->dct_type; + mc->macroblocks[mc->num_macroblocks].PMV[0][0][0] = macroblock->PMV[0][0][0]; + mc->macroblocks[mc->num_macroblocks].PMV[0][0][1] = macroblock->PMV[0][0][1]; + mc->macroblocks[mc->num_macroblocks].PMV[0][1][0] = macroblock->PMV[0][1][0]; + mc->macroblocks[mc->num_macroblocks].PMV[0][1][1] = macroblock->PMV[0][1][1]; + mc->macroblocks[mc->num_macroblocks].PMV[1][0][0] = macroblock->PMV[1][0][0]; + mc->macroblocks[mc->num_macroblocks].PMV[1][0][1] = macroblock->PMV[1][0][1]; + mc->macroblocks[mc->num_macroblocks].PMV[1][1][0] = macroblock->PMV[1][1][0]; + mc->macroblocks[mc->num_macroblocks].PMV[1][1][1] = macroblock->PMV[1][1][1]; + mc->macroblocks[mc->num_macroblocks].cbp = macroblock->cbp; + mc->macroblocks[mc->num_macroblocks].blocks = macroblock->blocks; + + vlGrabBlocks + ( + mc, + macroblock->mbx, + macroblock->mby, + macroblock->dct_type, + macroblock->cbp, + macroblock->blocks + ); + + mc->num_macroblocks++; + + return 0; +} + +#define SET_BLOCK(vb, cbp, mbx, mby, unitx, unity, ofsx, ofsy, hx, hy, lm, cbm, crm, zb) \ + do { \ + (vb)[0].pos.x = (mbx) * (unitx) + (ofsx); (vb)[0].pos.y = (mby) * (unity) + (ofsy); \ + (vb)[1].pos.x = (mbx) * (unitx) + (ofsx); (vb)[1].pos.y = (mby) * (unity) + (ofsy) + (hy); \ + (vb)[2].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].pos.y = (mby) * (unity) + (ofsy); \ + (vb)[3].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].pos.y = (mby) * (unity) + (ofsy); \ + (vb)[4].pos.x = (mbx) * (unitx) + (ofsx); (vb)[4].pos.y = (mby) * (unity) + (ofsy) + (hy); \ + (vb)[5].pos.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].pos.y = (mby) * (unity) + (ofsy) + (hy); \ + \ + if ((cbp) & (lm)) \ + { \ + (vb)[0].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].luma_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[1].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + (vb)[2].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].luma_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[3].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].luma_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[4].luma_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + (vb)[5].luma_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].luma_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + } \ + else \ + { \ + (vb)[0].luma_tc.x = (zb)[0].x; (vb)[0].luma_tc.y = (zb)[0].y; \ + (vb)[1].luma_tc.x = (zb)[0].x; (vb)[1].luma_tc.y = (zb)[0].y + (hy); \ + (vb)[2].luma_tc.x = (zb)[0].x + (hx); (vb)[2].luma_tc.y = (zb)[0].y; \ + (vb)[3].luma_tc.x = (zb)[0].x + (hx); (vb)[3].luma_tc.y = (zb)[0].y; \ + (vb)[4].luma_tc.x = (zb)[0].x; (vb)[4].luma_tc.y = (zb)[0].y + (hy); \ + (vb)[5].luma_tc.x = (zb)[0].x + (hx); (vb)[5].luma_tc.y = (zb)[0].y + (hy); \ + } \ + \ + if ((cbp) & (cbm)) \ + { \ + (vb)[0].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cb_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[1].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + (vb)[2].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cb_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[3].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cb_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[4].cb_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + (vb)[5].cb_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cb_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + } \ + else \ + { \ + (vb)[0].cb_tc.x = (zb)[1].x; (vb)[0].cb_tc.y = (zb)[1].y; \ + (vb)[1].cb_tc.x = (zb)[1].x; (vb)[1].cb_tc.y = (zb)[1].y + (hy); \ + (vb)[2].cb_tc.x = (zb)[1].x + (hx); (vb)[2].cb_tc.y = (zb)[1].y; \ + (vb)[3].cb_tc.x = (zb)[1].x + (hx); (vb)[3].cb_tc.y = (zb)[1].y; \ + (vb)[4].cb_tc.x = (zb)[1].x; (vb)[4].cb_tc.y = (zb)[1].y + (hy); \ + (vb)[5].cb_tc.x = (zb)[1].x + (hx); (vb)[5].cb_tc.y = (zb)[1].y + (hy); \ + } \ + \ + if ((cbp) & (crm)) \ + { \ + (vb)[0].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[0].cr_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[1].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[1].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + (vb)[2].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[2].cr_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[3].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[3].cr_tc.y = (mby) * (unity) + (ofsy); \ + (vb)[4].cr_tc.x = (mbx) * (unitx) + (ofsx); (vb)[4].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + (vb)[5].cr_tc.x = (mbx) * (unitx) + (ofsx) + (hx); (vb)[5].cr_tc.y = (mby) * (unity) + (ofsy) + (hy); \ + } \ + else \ + { \ + (vb)[0].cr_tc.x = (zb)[2].x; (vb)[0].cr_tc.y = (zb)[2].y; \ + (vb)[1].cr_tc.x = (zb)[2].x; (vb)[1].cr_tc.y = (zb)[2].y + (hy); \ + (vb)[2].cr_tc.x = (zb)[2].x + (hx); (vb)[2].cr_tc.y = (zb)[2].y; \ + (vb)[3].cr_tc.x = (zb)[2].x + (hx); (vb)[3].cr_tc.y = (zb)[2].y; \ + (vb)[4].cr_tc.x = (zb)[2].x; (vb)[4].cr_tc.y = (zb)[2].y + (hy); \ + (vb)[5].cr_tc.x = (zb)[2].x + (hx); (vb)[5].cr_tc.y = (zb)[2].y + (hy); \ + } \ + } while (0) + +static inline int vlGenMacroblockVerts +( + struct vlR16SnormBufferedMC *mc, + struct vlMpeg2MacroBlock *macroblock, + unsigned int pos, + struct vlMacroBlockVertexStream0 *ycbcr_vb, + struct vlVertex2f **ref_vb +) +{ + struct vlVertex2f mo_vec[2]; + unsigned int i; + + assert(mc); + assert(macroblock); + assert(ycbcr_vb); + assert(pos < mc->macroblocks_per_picture); + + switch (macroblock->mb_type) + { + case vlMacroBlockTypeBiPredicted: + { + struct vlVertex2f *vb; + + assert(ref_vb && ref_vb[1]); + + vb = ref_vb[1] + pos * 2 * 24; + + mo_vec[0].x = macroblock->PMV[0][1][0] * 0.5f * mc->surface_tex_inv_size.x; + mo_vec[0].y = macroblock->PMV[0][1][1] * 0.5f * mc->surface_tex_inv_size.y; + + if (macroblock->mo_type == vlMotionTypeFrame) + { + for (i = 0; i < 24 * 2; i += 2) + { + vb[i].x = mo_vec[0].x; + vb[i].y = mo_vec[0].y; + } + } + else + { + mo_vec[1].x = macroblock->PMV[1][1][0] * 0.5f * mc->surface_tex_inv_size.x; + mo_vec[1].y = macroblock->PMV[1][1][1] * 0.5f * mc->surface_tex_inv_size.y; + + for (i = 0; i < 24 * 2; i += 2) + { + vb[i].x = mo_vec[0].x; + vb[i].y = mo_vec[0].y; + vb[i + 1].x = mo_vec[1].x; + vb[i + 1].y = mo_vec[1].y; + } + } + + /* fall-through */ + } + case vlMacroBlockTypeFwdPredicted: + case vlMacroBlockTypeBkwdPredicted: + { + struct vlVertex2f *vb; + + assert(ref_vb && ref_vb[0]); + + vb = ref_vb[0] + pos * 2 * 24; + + if (macroblock->mb_type == vlMacroBlockTypeBkwdPredicted) + { + mo_vec[0].x = macroblock->PMV[0][1][0] * 0.5f * mc->surface_tex_inv_size.x; + mo_vec[0].y = macroblock->PMV[0][1][1] * 0.5f * mc->surface_tex_inv_size.y; + + if (macroblock->mo_type == vlMotionTypeField) + { + mo_vec[1].x = macroblock->PMV[1][1][0] * 0.5f * mc->surface_tex_inv_size.x; + mo_vec[1].y = macroblock->PMV[1][1][1] * 0.5f * mc->surface_tex_inv_size.y; + } + } + else + { + mo_vec[0].x = macroblock->PMV[0][0][0] * 0.5f * mc->surface_tex_inv_size.x; + mo_vec[0].y = macroblock->PMV[0][0][1] * 0.5f * mc->surface_tex_inv_size.y; + + if (macroblock->mo_type == vlMotionTypeField) + { + mo_vec[1].x = macroblock->PMV[1][0][0] * 0.5f * mc->surface_tex_inv_size.x; + mo_vec[1].y = macroblock->PMV[1][0][1] * 0.5f * mc->surface_tex_inv_size.y; + } + } + + if (macroblock->mo_type == vlMotionTypeFrame) + { + for (i = 0; i < 24 * 2; i += 2) + { + vb[i].x = mo_vec[0].x; + vb[i].y = mo_vec[0].y; + } + } + else + { + for (i = 0; i < 24 * 2; i += 2) + { + vb[i].x = mo_vec[0].x; + vb[i].y = mo_vec[0].y; + vb[i + 1].x = mo_vec[1].x; + vb[i + 1].y = mo_vec[1].y; + } + } + + /* fall-through */ + } + case vlMacroBlockTypeIntra: + { + const struct vlVertex2f unit = + { + mc->surface_tex_inv_size.x * VL_MACROBLOCK_WIDTH, + mc->surface_tex_inv_size.y * VL_MACROBLOCK_HEIGHT + }; + const struct vlVertex2f half = + { + mc->surface_tex_inv_size.x * (VL_MACROBLOCK_WIDTH / 2), + mc->surface_tex_inv_size.y * (VL_MACROBLOCK_HEIGHT / 2) + }; + + struct vlMacroBlockVertexStream0 *vb; + + vb = ycbcr_vb + pos * 24; + + SET_BLOCK + ( + vb, + macroblock->cbp, macroblock->mbx, macroblock->mby, + unit.x, unit.y, 0, 0, half.x, half.y, + 32, 2, 1, mc->zero_block + ); + + SET_BLOCK + ( + vb + 6, + macroblock->cbp, macroblock->mbx, macroblock->mby, + unit.x, unit.y, half.x, 0, half.x, half.y, + 16, 2, 1, mc->zero_block + ); + + SET_BLOCK + ( + vb + 12, + macroblock->cbp, macroblock->mbx, macroblock->mby, + unit.x, unit.y, 0, half.y, half.x, half.y, + 8, 2, 1, mc->zero_block + ); + + SET_BLOCK + ( + vb + 18, + macroblock->cbp, macroblock->mbx, macroblock->mby, + unit.x, unit.y, half.x, half.y, half.x, half.y, + 4, 2, 1, mc->zero_block + ); + + break; + } + default: + assert(0); + } + + return 0; +} + +static int vlFlush +( + struct vlRender *render +) +{ + struct vlR16SnormBufferedMC *mc; + struct pipe_context *pipe; + struct vlVertexShaderConsts *vs_consts; + unsigned int num_macroblocks[vlNumMacroBlockExTypes] = {0}; + unsigned int offset[vlNumMacroBlockExTypes]; + unsigned int vb_start = 0; + unsigned int i; + + assert(render); + + mc = (struct vlR16SnormBufferedMC*)render; + + if (!mc->buffered_surface) + return 0; + + if (mc->num_macroblocks < mc->macroblocks_per_picture) + return 0; + + assert(mc->num_macroblocks <= mc->macroblocks_per_picture); + + pipe = mc->pipe; + + for (i = 0; i < mc->num_macroblocks; ++i) + { + enum vlMacroBlockTypeEx mb_type_ex = vlGetMacroBlockTypeEx(&mc->macroblocks[i]); + + num_macroblocks[mb_type_ex]++; + } + + offset[0] = 0; + + for (i = 1; i < vlNumMacroBlockExTypes; ++i) + offset[i] = offset[i - 1] + num_macroblocks[i - 1]; + + { + struct vlMacroBlockVertexStream0 *ycbcr_vb; + struct vlVertex2f *ref_vb[2]; + + ycbcr_vb = (struct vlMacroBlockVertexStream0*)pipe_buffer_map + ( + pipe->screen, + mc->vertex_bufs.ycbcr.buffer, + PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD + ); + + for (i = 0; i < 2; ++i) + ref_vb[i] = (struct vlVertex2f*)pipe_buffer_map + ( + pipe->screen, + mc->vertex_bufs.ref[i].buffer, + PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD + ); + + for (i = 0; i < mc->num_macroblocks; ++i) + { + enum vlMacroBlockTypeEx mb_type_ex = vlGetMacroBlockTypeEx(&mc->macroblocks[i]); + + vlGenMacroblockVerts(mc, &mc->macroblocks[i], offset[mb_type_ex], ycbcr_vb, ref_vb); + + offset[mb_type_ex]++; + } + + pipe_buffer_unmap(pipe->screen, mc->vertex_bufs.ycbcr.buffer); + for (i = 0; i < 2; ++i) + pipe_buffer_unmap(pipe->screen, mc->vertex_bufs.ref[i].buffer); + } + + for (i = 0; i < 3; ++i) + { + pipe->screen->transfer_unmap(pipe->screen, mc->tex_transfer[i]); + pipe->screen->tex_transfer_release(pipe->screen, &mc->tex_transfer[i]); + } + + mc->render_target.cbufs[0] = pipe->screen->get_tex_surface + ( + pipe->screen, + mc->buffered_surface->texture, + 0, 0, 0, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE + ); + + pipe->set_framebuffer_state(pipe, &mc->render_target); + pipe->set_viewport_state(pipe, &mc->viewport); + vs_consts = pipe_buffer_map + ( + pipe->screen, + mc->vs_const_buf.buffer, + PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD + ); + + vs_consts->denorm.x = mc->buffered_surface->texture->width[0]; + vs_consts->denorm.y = mc->buffered_surface->texture->height[0]; + + pipe_buffer_unmap(pipe->screen, mc->vs_const_buf.buffer); + pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &mc->vs_const_buf); + pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &mc->fs_const_buf); + + if (num_macroblocks[vlMacroBlockExTypeIntra] > 0) + { + pipe->set_vertex_buffers(pipe, 1, mc->vertex_bufs.all); + pipe->set_vertex_elements(pipe, 4, mc->vertex_elems); + pipe->set_sampler_textures(pipe, 3, mc->textures.all); + pipe->bind_sampler_states(pipe, 3, mc->samplers.all); + pipe->bind_vs_state(pipe, mc->i_vs); + pipe->bind_fs_state(pipe, mc->i_fs); + + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeIntra] * 24); + vb_start += num_macroblocks[vlMacroBlockExTypeIntra] * 24; + } + + if (num_macroblocks[vlMacroBlockExTypeFwdPredictedFrame] > 0) + { + pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs.all); + pipe->set_vertex_elements(pipe, 6, mc->vertex_elems); + mc->textures.ref[0] = mc->past_surface->texture; + pipe->set_sampler_textures(pipe, 4, mc->textures.all); + pipe->bind_sampler_states(pipe, 4, mc->samplers.all); + pipe->bind_vs_state(pipe, mc->p_vs[0]); + pipe->bind_fs_state(pipe, mc->p_fs[0]); + + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeFwdPredictedFrame] * 24); + vb_start += num_macroblocks[vlMacroBlockExTypeFwdPredictedFrame] * 24; + } + + if (num_macroblocks[vlMacroBlockExTypeFwdPredictedField] > 0) + { + pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs.all); + pipe->set_vertex_elements(pipe, 6, mc->vertex_elems); + mc->textures.ref[0] = mc->past_surface->texture; + pipe->set_sampler_textures(pipe, 4, mc->textures.all); + pipe->bind_sampler_states(pipe, 4, mc->samplers.all); + pipe->bind_vs_state(pipe, mc->p_vs[1]); + pipe->bind_fs_state(pipe, mc->p_fs[1]); + + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeFwdPredictedField] * 24); + vb_start += num_macroblocks[vlMacroBlockExTypeFwdPredictedField] * 24; + } + + if (num_macroblocks[vlMacroBlockExTypeBkwdPredictedFrame] > 0) + { + pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs.all); + pipe->set_vertex_elements(pipe, 6, mc->vertex_elems); + mc->textures.ref[0] = mc->future_surface->texture; + pipe->set_sampler_textures(pipe, 4, mc->textures.all); + pipe->bind_sampler_states(pipe, 4, mc->samplers.all); + pipe->bind_vs_state(pipe, mc->p_vs[0]); + pipe->bind_fs_state(pipe, mc->p_fs[0]); + + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeBkwdPredictedFrame] * 24); + vb_start += num_macroblocks[vlMacroBlockExTypeBkwdPredictedFrame] * 24; + } + + if (num_macroblocks[vlMacroBlockExTypeBkwdPredictedField] > 0) + { + pipe->set_vertex_buffers(pipe, 2, mc->vertex_bufs.all); + pipe->set_vertex_elements(pipe, 6, mc->vertex_elems); + mc->textures.ref[0] = mc->future_surface->texture; + pipe->set_sampler_textures(pipe, 4, mc->textures.all); + pipe->bind_sampler_states(pipe, 4, mc->samplers.all); + pipe->bind_vs_state(pipe, mc->p_vs[1]); + pipe->bind_fs_state(pipe, mc->p_fs[1]); + + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeBkwdPredictedField] * 24); + vb_start += num_macroblocks[vlMacroBlockExTypeBkwdPredictedField] * 24; + } + + if (num_macroblocks[vlMacroBlockExTypeBiPredictedFrame] > 0) + { + pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs.all); + pipe->set_vertex_elements(pipe, 8, mc->vertex_elems); + mc->textures.ref[0] = mc->past_surface->texture; + mc->textures.ref[1] = mc->future_surface->texture; + pipe->set_sampler_textures(pipe, 5, mc->textures.all); + pipe->bind_sampler_states(pipe, 5, mc->samplers.all); + pipe->bind_vs_state(pipe, mc->b_vs[0]); + pipe->bind_fs_state(pipe, mc->b_fs[0]); + + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeBiPredictedFrame] * 24); + vb_start += num_macroblocks[vlMacroBlockExTypeBiPredictedFrame] * 24; + } + + if (num_macroblocks[vlMacroBlockExTypeBiPredictedField] > 0) + { + pipe->set_vertex_buffers(pipe, 3, mc->vertex_bufs.all); + pipe->set_vertex_elements(pipe, 8, mc->vertex_elems); + mc->textures.ref[0] = mc->past_surface->texture; + mc->textures.ref[1] = mc->future_surface->texture; + pipe->set_sampler_textures(pipe, 5, mc->textures.all); + pipe->bind_sampler_states(pipe, 5, mc->samplers.all); + pipe->bind_vs_state(pipe, mc->b_vs[1]); + pipe->bind_fs_state(pipe, mc->b_fs[1]); + + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLES, vb_start, num_macroblocks[vlMacroBlockExTypeBiPredictedField] * 24); + vb_start += num_macroblocks[vlMacroBlockExTypeBiPredictedField] * 24; + } + + pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, &mc->buffered_surface->render_fence); + pipe_surface_reference(&mc->render_target.cbufs[0], NULL); + + for (i = 0; i < 3; ++i) + mc->zero_block[i].x = -1.0f; + + mc->buffered_surface = NULL; + mc->num_macroblocks = 0; + + return 0; +} + +static int vlRenderMacroBlocksMpeg2R16SnormBuffered +( + struct vlRender *render, + struct vlMpeg2MacroBlockBatch *batch, + struct vlSurface *surface +) +{ + struct vlR16SnormBufferedMC *mc; + bool new_surface = false; + unsigned int i; + + assert(render); + + mc = (struct vlR16SnormBufferedMC*)render; + + if (mc->buffered_surface) + { + if (mc->buffered_surface != surface) + { + vlFlush(&mc->base); + new_surface = true; + } + } + else + new_surface = true; + + if (new_surface) + { + mc->buffered_surface = surface; + mc->past_surface = batch->past_surface; + mc->future_surface = batch->future_surface; + mc->surface_tex_inv_size.x = 1.0f / surface->texture->width[0]; + mc->surface_tex_inv_size.y = 1.0f / surface->texture->height[0]; + + for (i = 0; i < 3; ++i) + { + mc->tex_transfer[i] = mc->pipe->screen->get_tex_transfer + ( + mc->pipe->screen, + mc->textures.all[i], + 0, 0, 0, PIPE_TRANSFER_WRITE, 0, 0, + surface->texture->width[0], + surface->texture->height[0] + ); + + mc->texels[i] = mc->pipe->screen->transfer_map(mc->pipe->screen, mc->tex_transfer[i]); + } + } + + for (i = 0; i < batch->num_macroblocks; ++i) + vlGrabMacroBlock(mc, &batch->macroblocks[i]); + + return 0; +} + +static inline int vlEnd +( + struct vlRender *render +) +{ + assert(render); + + return 0; +} + +static int vlDestroy +( + struct vlRender *render +) +{ + struct vlR16SnormBufferedMC *mc; + struct pipe_context *pipe; + unsigned int i; + + assert(render); + + mc = (struct vlR16SnormBufferedMC*)render; + pipe = mc->pipe; + + for (i = 0; i < 5; ++i) + pipe->delete_sampler_state(pipe, mc->samplers.all[i]); + + for (i = 0; i < 3; ++i) + pipe_buffer_reference(pipe->screen, &mc->vertex_bufs.all[i].buffer, NULL); + + /* Textures 3 & 4 are not created directly, no need to release them here */ + for (i = 0; i < 3; ++i) + pipe_texture_reference(&mc->textures.all[i], NULL); + + pipe->delete_vs_state(pipe, mc->i_vs); + pipe->delete_fs_state(pipe, mc->i_fs); + + for (i = 0; i < 2; ++i) + { + pipe->delete_vs_state(pipe, mc->p_vs[i]); + pipe->delete_fs_state(pipe, mc->p_fs[i]); + pipe->delete_vs_state(pipe, mc->b_vs[i]); + pipe->delete_fs_state(pipe, mc->b_fs[i]); + } + + pipe_buffer_reference(pipe->screen, &mc->vs_const_buf.buffer, NULL); + pipe_buffer_reference(pipe->screen, &mc->fs_const_buf.buffer, NULL); + + FREE(mc->macroblocks); + FREE(mc); + + return 0; +} + +/* + * Muliplier renormalizes block samples from 16 bits to 12 bits. + * Divider is used when calculating Y % 2 for choosing top or bottom + * field for P or B macroblocks. + * TODO: Use immediates. + */ +static const struct vlFragmentShaderConsts fs_consts = +{ + {32767.0f / 255.0f, 32767.0f / 255.0f, 32767.0f / 255.0f, 0.0f}, + {0.5f, 2.0f, 0.0f, 0.0f} +}; + +#include "vl_r16snorm_mc_buf_shaders.inc" + +static int vlCreateDataBufs +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int mbw = align(mc->picture_width, VL_MACROBLOCK_WIDTH) / VL_MACROBLOCK_WIDTH; + const unsigned int mbh = align(mc->picture_height, VL_MACROBLOCK_HEIGHT) / VL_MACROBLOCK_HEIGHT; + + struct pipe_context *pipe; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + mc->macroblocks_per_picture = mbw * mbh; + + /* Create our vertex buffers */ + mc->vertex_bufs.ycbcr.stride = sizeof(struct vlVertex2f) * 4; + mc->vertex_bufs.ycbcr.max_index = 24 * mc->macroblocks_per_picture - 1; + mc->vertex_bufs.ycbcr.buffer_offset = 0; + mc->vertex_bufs.ycbcr.buffer = pipe_buffer_create + ( + pipe->screen, + DEFAULT_BUF_ALIGNMENT, + PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_DISCARD, + sizeof(struct vlVertex2f) * 4 * 24 * mc->macroblocks_per_picture + ); + + for (i = 1; i < 3; ++i) + { + mc->vertex_bufs.all[i].stride = sizeof(struct vlVertex2f) * 2; + mc->vertex_bufs.all[i].max_index = 24 * mc->macroblocks_per_picture - 1; + mc->vertex_bufs.all[i].buffer_offset = 0; + mc->vertex_bufs.all[i].buffer = pipe_buffer_create + ( + pipe->screen, + DEFAULT_BUF_ALIGNMENT, + PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_DISCARD, + sizeof(struct vlVertex2f) * 2 * 24 * mc->macroblocks_per_picture + ); + } + + /* Position element */ + mc->vertex_elems[0].src_offset = 0; + mc->vertex_elems[0].vertex_buffer_index = 0; + mc->vertex_elems[0].nr_components = 2; + mc->vertex_elems[0].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* Luma, texcoord element */ + mc->vertex_elems[1].src_offset = sizeof(struct vlVertex2f); + mc->vertex_elems[1].vertex_buffer_index = 0; + mc->vertex_elems[1].nr_components = 2; + mc->vertex_elems[1].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* Chroma Cr texcoord element */ + mc->vertex_elems[2].src_offset = sizeof(struct vlVertex2f) * 2; + mc->vertex_elems[2].vertex_buffer_index = 0; + mc->vertex_elems[2].nr_components = 2; + mc->vertex_elems[2].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* Chroma Cb texcoord element */ + mc->vertex_elems[3].src_offset = sizeof(struct vlVertex2f) * 3; + mc->vertex_elems[3].vertex_buffer_index = 0; + mc->vertex_elems[3].nr_components = 2; + mc->vertex_elems[3].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* First ref surface top field texcoord element */ + mc->vertex_elems[4].src_offset = 0; + mc->vertex_elems[4].vertex_buffer_index = 1; + mc->vertex_elems[4].nr_components = 2; + mc->vertex_elems[4].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* First ref surface bottom field texcoord element */ + mc->vertex_elems[5].src_offset = sizeof(struct vlVertex2f); + mc->vertex_elems[5].vertex_buffer_index = 1; + mc->vertex_elems[5].nr_components = 2; + mc->vertex_elems[5].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* Second ref surface top field texcoord element */ + mc->vertex_elems[6].src_offset = 0; + mc->vertex_elems[6].vertex_buffer_index = 2; + mc->vertex_elems[6].nr_components = 2; + mc->vertex_elems[6].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* Second ref surface bottom field texcoord element */ + mc->vertex_elems[7].src_offset = sizeof(struct vlVertex2f); + mc->vertex_elems[7].vertex_buffer_index = 2; + mc->vertex_elems[7].nr_components = 2; + mc->vertex_elems[7].src_format = PIPE_FORMAT_R32G32_FLOAT; + + /* Create our constant buffer */ + mc->vs_const_buf.buffer = pipe_buffer_create + ( + pipe->screen, + DEFAULT_BUF_ALIGNMENT, + PIPE_BUFFER_USAGE_CONSTANT | PIPE_BUFFER_USAGE_DISCARD, + sizeof(struct vlVertexShaderConsts) + ); + + mc->fs_const_buf.buffer = pipe_buffer_create + ( + pipe->screen, + DEFAULT_BUF_ALIGNMENT, + PIPE_BUFFER_USAGE_CONSTANT, + sizeof(struct vlFragmentShaderConsts) + ); + + memcpy + ( + pipe_buffer_map(pipe->screen, mc->fs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE), + &fs_consts, + sizeof(struct vlFragmentShaderConsts) + ); + + pipe_buffer_unmap(pipe->screen, mc->fs_const_buf.buffer); + + mc->macroblocks = MALLOC(sizeof(struct vlMpeg2MacroBlock) * mc->macroblocks_per_picture); + + return 0; +} + +static int vlInit +( + struct vlR16SnormBufferedMC *mc +) +{ + struct pipe_context *pipe; + struct pipe_sampler_state sampler; + struct pipe_texture template; + unsigned int filters[5]; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + + mc->buffered_surface = NULL; + mc->past_surface = NULL; + mc->future_surface = NULL; + for (i = 0; i < 3; ++i) + mc->zero_block[i].x = -1.0f; + mc->num_macroblocks = 0; + + /* For MC we render to textures, which are rounded up to nearest POT */ + mc->viewport.scale[0] = vlRoundUpPOT(mc->picture_width); + mc->viewport.scale[1] = vlRoundUpPOT(mc->picture_height); + mc->viewport.scale[2] = 1; + mc->viewport.scale[3] = 1; + mc->viewport.translate[0] = 0; + mc->viewport.translate[1] = 0; + mc->viewport.translate[2] = 0; + mc->viewport.translate[3] = 0; + + mc->render_target.width = vlRoundUpPOT(mc->picture_width); + mc->render_target.height = vlRoundUpPOT(mc->picture_height); + mc->render_target.nr_cbufs = 1; + /* FB for MC stage is a vlSurface created by the user, set at render time */ + mc->render_target.zsbuf = NULL; + + filters[0] = PIPE_TEX_FILTER_NEAREST; + /* FIXME: Linear causes discoloration around block edges */ + filters[1] = /*mc->picture_format == vlFormatYCbCr444 ?*/ PIPE_TEX_FILTER_NEAREST /*: PIPE_TEX_FILTER_LINEAR*/; + filters[2] = /*mc->picture_format == vlFormatYCbCr444 ?*/ PIPE_TEX_FILTER_NEAREST /*: PIPE_TEX_FILTER_LINEAR*/; + filters[3] = PIPE_TEX_FILTER_LINEAR; + filters[4] = PIPE_TEX_FILTER_LINEAR; + + for (i = 0; i < 5; ++i) + { + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_img_filter = filters[i]; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.mag_img_filter = filters[i]; + sampler.compare_mode = PIPE_TEX_COMPARE_NONE; + sampler.compare_func = PIPE_FUNC_ALWAYS; + sampler.normalized_coords = 1; + /*sampler.prefilter = ;*/ + /*sampler.shadow_ambient = ;*/ + /*sampler.lod_bias = ;*/ + sampler.min_lod = 0; + /*sampler.max_lod = ;*/ + /*sampler.border_color[i] = ;*/ + /*sampler.max_anisotropy = ;*/ + mc->samplers.all[i] = pipe->create_sampler_state(pipe, &sampler); + } + + memset(&template, 0, sizeof(struct pipe_texture)); + template.target = PIPE_TEXTURE_2D; + template.format = PIPE_FORMAT_R16_SNORM; + template.last_level = 0; + template.width[0] = vlRoundUpPOT(mc->picture_width); + template.height[0] = vlRoundUpPOT(mc->picture_height); + template.depth[0] = 1; + template.compressed = 0; + pf_get_block(template.format, &template.block); + template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_DYNAMIC; + + mc->textures.y = pipe->screen->texture_create(pipe->screen, &template); + + if (mc->picture_format == vlFormatYCbCr420) + { + template.width[0] = vlRoundUpPOT(mc->picture_width / 2); + template.height[0] = vlRoundUpPOT(mc->picture_height / 2); + } + else if (mc->picture_format == vlFormatYCbCr422) + template.height[0] = vlRoundUpPOT(mc->picture_height / 2); + + mc->textures.cb = pipe->screen->texture_create(pipe->screen, &template); + mc->textures.cr = pipe->screen->texture_create(pipe->screen, &template); + + /* textures.all[3] & textures.all[4] are assigned from vlSurfaces for P and B macroblocks at render time */ + + vlCreateVertexShaderIMB(mc); + vlCreateFragmentShaderIMB(mc); + vlCreateVertexShaderFramePMB(mc); + vlCreateVertexShaderFieldPMB(mc); + vlCreateFragmentShaderFramePMB(mc); + vlCreateFragmentShaderFieldPMB(mc); + vlCreateVertexShaderFrameBMB(mc); + vlCreateVertexShaderFieldBMB(mc); + vlCreateFragmentShaderFrameBMB(mc); + vlCreateFragmentShaderFieldBMB(mc); + vlCreateDataBufs(mc); + + return 0; +} + +int vlCreateR16SNormBufferedMC +( + struct pipe_context *pipe, + unsigned int picture_width, + unsigned int picture_height, + enum vlFormat picture_format, + struct vlRender **render +) +{ + struct vlR16SnormBufferedMC *mc; + + assert(pipe); + assert(render); + + mc = CALLOC_STRUCT(vlR16SnormBufferedMC); + + mc->base.vlBegin = &vlBegin; + mc->base.vlRenderMacroBlocksMpeg2 = &vlRenderMacroBlocksMpeg2R16SnormBuffered; + mc->base.vlEnd = &vlEnd; + mc->base.vlFlush = &vlFlush; + mc->base.vlDestroy = &vlDestroy; + mc->pipe = pipe; + mc->picture_width = picture_width; + mc->picture_height = picture_height; + + vlInit(mc); + + *render = &mc->base; + + return 0; +} diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.h b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.h new file mode 100644 index 0000000000..27177d64ca --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.h @@ -0,0 +1,18 @@ +#ifndef vl_r16snorm_mc_buf_h +#define vl_r16snorm_mc_buf_h + +#include "vl_types.h" + +struct pipe_context; +struct vlRender; + +int vlCreateR16SNormBufferedMC +( + struct pipe_context *pipe, + unsigned int picture_width, + unsigned int picture_height, + enum vlFormat picture_format, + struct vlRender **render +); + +#endif diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf_shaders.inc b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf_shaders.inc new file mode 100644 index 0000000000..ef4a4b2add --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf_shaders.inc @@ -0,0 +1,1185 @@ +static int vlCreateVertexShaderIMB +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int max_tokens = 50; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma Cb texcoords + * decl i3 ; Chroma Cr texcoords + */ + for (i = 0; i < 4; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma Cb texcoords + * decl o3 ; Chroma Cr texcoords + */ + for (i = 0; i < 4; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * mov o0, i0 ; Move input vertex pos to output + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma Cb texcoords to output + * mov o3, i3 ; Move input chroma Cr texcoords to output + */ + for (i = 0; i < 4; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + mc->i_vs = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderIMB +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Luma texcoords + * decl i1 ; Chroma Cb texcoords + * decl i2 ; Chroma Cr texcoords + */ + for (i = 0; i < 3; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + */ + for (i = 0; i < 3; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header,max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i2, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* mul o0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + mc->i_fs = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlCreateVertexShaderFramePMB +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma Cb texcoords + * decl i3 ; Chroma Cr texcoords + * decl i4 ; Ref surface top field texcoords + * decl i5 ; Ref surface bottom field texcoords (unused, packed in the same stream) + */ + for (i = 0; i < 6; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma Cb texcoords + * decl o3 ; Chroma Cr texcoords + * decl o4 ; Ref macroblock texcoords + */ + for (i = 0; i < 5; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * mov o0, i0 ; Move input vertex pos to output + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma Cb texcoords to output + * mov o3, i3 ; Move input chroma Cr texcoords to output + */ + for (i = 0; i < 4; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* add o4, i0, i4 ; Translate vertex pos by motion vec to form ref macroblock texcoords */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, 4); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + mc->p_vs[0] = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateVertexShaderFieldPMB +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma Cb texcoords + * decl i3 ; Chroma Cr texcoords + * decl i4 ; Ref macroblock top field texcoords + * decl i5 ; Ref macroblock bottom field texcoords + */ + for (i = 0; i < 6; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl c0 ; Render target dimensions */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma Cb texcoords + * decl o3 ; Chroma Cr texcoords + * decl o4 ; Ref macroblock top field texcoords + * decl o5 ; Ref macroblock bottom field texcoords + * decl o6 ; Denormalized vertex pos + */ + for (i = 0; i < 7; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * mov o0, i0 ; Move input vertex pos to output + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma Cb texcoords to output + * mov o3, i3 ; Move input chroma Cr texcoords to output + */ + for (i = 0; i < 4; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* + * add o4, i0, i4 ; Translate vertex pos by motion vec to form top field macroblock texcoords + * add o5, i0, i5 ; Translate vertex pos by motion vec to form bottom field macroblock texcoords + */ + for (i = 0; i < 2; ++i) + { + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i + 4); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* mul o6, i0, c0 ; Denorm vertex pos */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 6, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + mc->p_vs[1] = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderFramePMB +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Luma texcoords + * decl i1 ; Chroma Cb texcoords + * decl i2 ; Chroma Cr texcoords + * decl i3 ; Ref macroblock texcoords + */ + for (i = 0; i < 4; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for ref surface texture + */ + for (i = 0; i < 4; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i2, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* tex2d t1, i3, s3 ; Read texel from ref macroblock */ + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, 3, TGSI_FILE_SAMPLER, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, t1 ; Add ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + mc->p_fs[0] = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderFieldPMB +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int max_tokens = 200; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Luma texcoords + * decl i1 ; Chroma Cb texcoords + * decl i2 ; Chroma Cr texcoords + * decl i3 ; Ref macroblock top field texcoords + * decl i4 ; Ref macroblock bottom field texcoords + * decl i5 ; Denormalized vertex pos + */ + for (i = 0; i < 6; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm + * decl c1 ; Constants 1/2 & 2 in .x, .y channels for Y-mod-2 top/bottom field selection + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0-t4 */ + decl = vl_decl_temps(0, 4); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for ref surface texture + */ + for (i = 0; i < 4; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i2, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * tex2d t1, i3, s3 ; Read texel from ref macroblock top field + * tex2d t2, i4, s3 ; Read texel from ref macroblock bottom field + */ + for (i = 0; i < 2; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* XXX: Pos values off by 0.5? */ + /* sub t4, i5.y, c1.x ; Sub 0.5 from denormalized pos */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 5, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* floor t3, t3 ; Get rid of fractional part */ + inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mul t3, t3, c1.y ; Multiply by 2 */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* TODO: Move to conditional tex fetch on t3 instead of lerp */ + /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, t1 ; Add ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + mc->p_fs[1] = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlCreateVertexShaderFrameBMB +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma Cb texcoords + * decl i3 ; Chroma Cr texcoords + * decl i4 ; First ref macroblock top field texcoords + * decl i5 ; First ref macroblock bottom field texcoords (unused, packed in the same stream) + * decl i6 ; Second ref macroblock top field texcoords + * decl i7 ; Second ref macroblock bottom field texcoords (unused, packed in the same stream) + */ + for (i = 0; i < 8; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma Cb texcoords + * decl o3 ; Chroma Cr texcoords + * decl o4 ; First ref macroblock texcoords + * decl o5 ; Second ref macroblock texcoords + */ + for (i = 0; i < 6; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * mov o0, i0 ; Move input vertex pos to output + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma Cb texcoords to output + * mov o3, i3 ; Move input chroma Cr texcoords to output + */ + for (i = 0; i < 4; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* + * add o4, i0, i4 ; Translate vertex pos by motion vec to form first ref macroblock texcoords + * add o5, i0, i6 ; Translate vertex pos by motion vec to form second ref macroblock texcoords + */ + for (i = 0; i < 2; ++i) + { + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, (i + 2) * 2); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + mc->b_vs[0] = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateVertexShaderFieldBMB +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state vs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_VERTEX, header); + + ti = 3; + + /* + * decl i0 ; Vertex pos + * decl i1 ; Luma texcoords + * decl i2 ; Chroma Cb texcoords + * decl i3 ; Chroma Cr texcoords + * decl i4 ; First ref macroblock top field texcoords + * decl i5 ; First ref macroblock bottom field texcoords + * decl i6 ; Second ref macroblock top field texcoords + * decl i7 ; Second ref macroblock bottom field texcoords + */ + for (i = 0; i < 8; i++) + { + decl = vl_decl_input(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl c0 ; Render target dimensions */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl o0 ; Vertex pos + * decl o1 ; Luma texcoords + * decl o2 ; Chroma Cb texcoords + * decl o3 ; Chroma Cr texcoords + * decl o4 ; First ref macroblock top field texcoords + * decl o5 ; First ref macroblock Bottom field texcoords + * decl o6 ; Second ref macroblock top field texcoords + * decl o7 ; Second ref macroblock Bottom field texcoords + * decl o8 ; Denormalized vertex pos + */ + for (i = 0; i < 9; i++) + { + decl = vl_decl_output(i == 0 ? TGSI_SEMANTIC_POSITION : TGSI_SEMANTIC_GENERIC, i, i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* decl t0, t1 */ + decl = vl_decl_temps(0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * mov o0, i0 ; Move input vertex pos to output + * mov o1, i1 ; Move input luma texcoords to output + * mov o2, i2 ; Move input chroma Cb texcoords to output + * mov o3, i3 ; Move input chroma Cr texcoords to output + */ + for (i = 0; i < 4; ++i) + { + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_OUTPUT, i, TGSI_FILE_INPUT, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* + * add o4, i0, i4 ; Translate vertex pos by motion vec to form first top field macroblock texcoords + * add o5, i0, i5 ; Translate vertex pos by motion vec to form first bottom field macroblock texcoords + * add o6, i0, i6 ; Translate vertex pos by motion vec to form second top field macroblock texcoords + * add o7, i0, i7 ; Translate vertex pos by motion vec to form second bottom field macroblock texcoords + */ + for (i = 0; i < 4; ++i) + { + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, i + 4, TGSI_FILE_INPUT, 0, TGSI_FILE_INPUT, i + 4); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* mul o8, i0, c0 ; Denorm vertex pos */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_OUTPUT, 8, TGSI_FILE_INPUT, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + vs.tokens = tokens; + mc->b_vs[1] = pipe->create_vs_state(pipe, &vs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderFrameBMB +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int max_tokens = 100; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Luma texcoords + * decl i1 ; Chroma Cb texcoords + * decl i2 ; Chroma Cr texcoords + * decl i3 ; First ref macroblock texcoords + * decl i4 ; Second ref macroblock texcoords + */ + for (i = 0; i < 5; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm + * decl c1 ; Constant 1/2 in .x channel to use as weight to blend past and future texels + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0-t2 */ + decl = vl_decl_temps(0, 2); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for first ref surface texture + * decl s4 ; Sampler for second ref surface texture + */ + for (i = 0; i < 5; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i2, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * tex2d t1, i3, s3 ; Read texel from first ref macroblock + * tex2d t2, i4, s4 ; Read texel from second ref macroblock + */ + for (i = 0; i < 2; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, i + 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, t1 ; Add past/future ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + mc->b_fs[0] = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} + +static int vlCreateFragmentShaderFieldBMB +( + struct vlR16SnormBufferedMC *mc +) +{ + const unsigned int max_tokens = 200; + + struct pipe_context *pipe; + struct pipe_shader_state fs; + struct tgsi_token *tokens; + struct tgsi_header *header; + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + + unsigned int ti; + unsigned int i; + + assert(mc); + + pipe = mc->pipe; + tokens = (struct tgsi_token*)malloc(max_tokens * sizeof(struct tgsi_token)); + + /* Version */ + *(struct tgsi_version*)&tokens[0] = tgsi_build_version(); + /* Header */ + header = (struct tgsi_header*)&tokens[1]; + *header = tgsi_build_header(); + /* Processor */ + *(struct tgsi_processor*)&tokens[2] = tgsi_build_processor(TGSI_PROCESSOR_FRAGMENT, header); + + ti = 3; + + /* + * decl i0 ; Luma texcoords + * decl i1 ; Chroma Cb texcoords + * decl i2 ; Chroma Cr texcoords + * decl i3 ; First ref macroblock top field texcoords + * decl i4 ; First ref macroblock bottom field texcoords + * decl i5 ; Second ref macroblock top field texcoords + * decl i6 ; Second ref macroblock bottom field texcoords + * decl i7 ; Denormalized vertex pos + */ + for (i = 0; i < 8; ++i) + { + decl = vl_decl_interpolated_input(TGSI_SEMANTIC_GENERIC, i + 1, i, i, TGSI_INTERPOLATE_LINEAR); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * decl c0 ; Scaling factor, rescales 16-bit snorm to 9-bit snorm + * decl c1 ; Constants 1/2 & 2 in .x, .y channels to use as weight to blend past and future texels + * ; and for Y-mod-2 top/bottom field selection + */ + decl = vl_decl_constants(TGSI_SEMANTIC_GENERIC, 0, 0, 1); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl o0 ; Fragment color */ + decl = vl_decl_output(TGSI_SEMANTIC_COLOR, 0, 0, 0); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* decl t0-t5 */ + decl = vl_decl_temps(0, 5); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + + /* + * decl s0 ; Sampler for luma texture + * decl s1 ; Sampler for chroma Cb texture + * decl s2 ; Sampler for chroma Cr texture + * decl s3 ; Sampler for first ref surface texture + * decl s4 ; Sampler for second ref surface texture + */ + for (i = 0; i < 5; ++i) + { + decl = vl_decl_samplers(i, i); + ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, max_tokens - ti); + } + + /* + * tex2d t1, i0, s0 ; Read texel from luma texture + * mov t0.x, t1.x ; Move luma sample into .x component + * tex2d t1, i1, s1 ; Read texel from chroma Cb texture + * mov t0.y, t1.x ; Move Cb sample into .y component + * tex2d t1, i2, s2 ; Read texel from chroma Cr texture + * mov t0.z, t1.x ; Move Cr sample into .z component + */ + for (i = 0; i < 3; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_INPUT, i, TGSI_FILE_SAMPLER, i); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + inst = vl_inst2(TGSI_OPCODE_MOV, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X << i; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* mul t0, t0, c0 ; Rescale texel to correct range */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_CONSTANT, 0); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* XXX: Pos values off by 0.5? */ + /* sub t4, i7.y, c1.x ; Sub 0.5 from denormalized pos */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_INPUT, 7, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mul t3, t4, c1.x ; Multiply pos Y-coord by 1/2 */ + inst = vl_inst3(TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* floor t3, t3 ; Get rid of fractional part */ + inst = vl_inst2(TGSI_OPCODE_FLOOR, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* mul t3, t3, c1.y ; Multiply by 2 */ + inst = vl_inst3( TGSI_OPCODE_MUL, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_CONSTANT, 1); + inst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + inst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* sub t3, t4, t3 ; Subtract from original Y to get Y % 2 */ + inst = vl_inst3(TGSI_OPCODE_SUB, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * tex2d t1, i3, s3 ; Read texel from past ref macroblock top field + * tex2d t2, i4, s3 ; Read texel from past ref macroblock bottom field + */ + for (i = 0; i < 2; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 1, TGSI_FILE_INPUT, i + 3, TGSI_FILE_SAMPLER, 3); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* TODO: Move to conditional tex fetch on t3 instead of lerp */ + /* lerp t1, t3, t1, t2 ; Choose between top and bottom fields based on Y % 2 */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* + * tex2d t4, i5, s4 ; Read texel from future ref macroblock top field + * tex2d t5, i6, s4 ; Read texel from future ref macroblock bottom field + */ + for (i = 0; i < 2; ++i) + { + inst = vl_tex(TGSI_TEXTURE_2D, TGSI_FILE_TEMPORARY, i + 4, TGSI_FILE_INPUT, i + 5, TGSI_FILE_SAMPLER, 4); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + } + + /* TODO: Move to conditional tex fetch on t3 instead of lerp */ + /* lerp t2, t3, t4, t5 ; Choose between top and bottom fields based on Y % 2 */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 2, TGSI_FILE_TEMPORARY, 3, TGSI_FILE_TEMPORARY, 4, TGSI_FILE_TEMPORARY, 5); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* lerp t1, c1.x, t1, t2 ; Blend past and future texels */ + inst = vl_inst4(TGSI_OPCODE_LERP, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_CONSTANT, 1, TGSI_FILE_TEMPORARY, 1, TGSI_FILE_TEMPORARY, 2); + inst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_X; + inst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* add o0, t0, t1 ; Add past/future ref and differential to form final output */ + inst = vl_inst3(TGSI_OPCODE_ADD, TGSI_FILE_OUTPUT, 0, TGSI_FILE_TEMPORARY, 0, TGSI_FILE_TEMPORARY, 1); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + /* end */ + inst = vl_end(); + ti += tgsi_build_full_instruction(&inst, &tokens[ti], header, max_tokens - ti); + + fs.tokens = tokens; + mc->b_fs[1] = pipe->create_fs_state(pipe, &fs); + free(tokens); + + return 0; +} diff --git a/src/gallium/state_trackers/g3dvl/vl_render.h b/src/gallium/state_trackers/g3dvl/vl_render.h new file mode 100644 index 0000000000..166030b498 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_render.h @@ -0,0 +1,38 @@ +#ifndef vl_render_h +#define vl_render_h + +#include "vl_types.h" + +struct pipe_surface; + +struct vlRender +{ + int (*vlBegin) + ( + struct vlRender *render + ); + + int (*vlRenderMacroBlocksMpeg2) + ( + struct vlRender *render, + struct vlMpeg2MacroBlockBatch *batch, + struct vlSurface *surface + ); + + int (*vlEnd) + ( + struct vlRender *render + ); + + int (*vlFlush) + ( + struct vlRender *render + ); + + int (*vlDestroy) + ( + struct vlRender *render + ); +}; + +#endif diff --git a/src/gallium/state_trackers/g3dvl/vl_screen.c b/src/gallium/state_trackers/g3dvl/vl_screen.c new file mode 100644 index 0000000000..ade8643a66 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_screen.c @@ -0,0 +1,115 @@ +#define VL_INTERNAL +#include "vl_screen.h" +#include <assert.h> +#include <util/u_memory.h> + +int vlCreateScreen +( + struct vlDisplay *display, + int screen, + struct pipe_screen *pscreen, + struct vlScreen **vl_screen +) +{ + struct vlScreen *scrn; + + assert(display); + assert(pscreen); + assert(vl_screen); + + scrn = CALLOC_STRUCT(vlScreen); + + if (!scrn) + return 1; + + scrn->display = display; + scrn->ordinal = screen; + scrn->pscreen = pscreen; + *vl_screen = scrn; + + return 0; +} + +int vlDestroyScreen +( + struct vlScreen *screen +) +{ + assert(screen); + + FREE(screen); + + return 0; +} + +struct vlDisplay* vlGetDisplay +( + struct vlScreen *screen +) +{ + assert(screen); + + return screen->display; +} + +struct pipe_screen* vlGetPipeScreen +( + struct vlScreen *screen +) +{ + assert(screen); + + return screen->pscreen; +} + +unsigned int vlGetMaxProfiles +( + struct vlScreen *screen +) +{ + assert(screen); + + return vlProfileCount; +} + +int vlQueryProfiles +( + struct vlScreen *screen, + enum vlProfile *profiles +) +{ + assert(screen); + assert(profiles); + + profiles[0] = vlProfileMpeg2Simple; + profiles[1] = vlProfileMpeg2Main; + + return 0; +} + +unsigned int vlGetMaxEntryPoints +( + struct vlScreen *screen +) +{ + assert(screen); + + return vlEntryPointCount; +} + +int vlQueryEntryPoints +( + struct vlScreen *screen, + enum vlProfile profile, + enum vlEntryPoint *entry_points +) +{ + assert(screen); + assert(entry_points); + + entry_points[0] = vlEntryPointIDCT; + entry_points[1] = vlEntryPointMC; + entry_points[2] = vlEntryPointCSC; + + return 0; +} diff --git a/src/gallium/state_trackers/g3dvl/vl_screen.h b/src/gallium/state_trackers/g3dvl/vl_screen.h new file mode 100644 index 0000000000..98f3d429b6 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_screen.h @@ -0,0 +1,63 @@ +#ifndef vl_screen_h +#define vl_screen_h + +#include "vl_types.h" + +struct pipe_screen; + +#ifdef VL_INTERNAL +struct vlScreen +{ + struct vlDisplay *display; + unsigned int ordinal; + struct pipe_screen *pscreen; +}; +#endif + +int vlCreateScreen +( + struct vlDisplay *display, + int screen, + struct pipe_screen *pscreen, + struct vlScreen **vl_screen +); + +int vlDestroyScreen +( + struct vlScreen *screen +); + +struct vlDisplay* vlGetDisplay +( + struct vlScreen *screen +); + +struct pipe_screen* vlGetPipeScreen +( + struct vlScreen *screen +); + +unsigned int vlGetMaxProfiles +( + struct vlScreen *screen +); + +int vlQueryProfiles +( + struct vlScreen *screen, + enum vlProfile *profiles +); + +unsigned int vlGetMaxEntryPoints +( + struct vlScreen *screen +); + +int vlQueryEntryPoints +( + struct vlScreen *screen, + enum vlProfile profile, + enum vlEntryPoint *entry_points +); + +#endif diff --git a/src/gallium/state_trackers/g3dvl/vl_shader_build.c b/src/gallium/state_trackers/g3dvl/vl_shader_build.c new file mode 100644 index 0000000000..51f1721a33 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_shader_build.c @@ -0,0 +1,215 @@ +#include "vl_shader_build.h" +#include <assert.h> +#include <tgsi/tgsi_parse.h> +#include <tgsi/tgsi_build.h> + +struct tgsi_full_declaration vl_decl_input(unsigned int name, unsigned int index, unsigned int first, unsigned int last) +{ + struct tgsi_full_declaration decl = tgsi_default_full_declaration(); + + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = name; + decl.Semantic.SemanticIndex = index; + decl.DeclarationRange.First = first; + decl.DeclarationRange.Last = last; + + return decl; +} + +struct tgsi_full_declaration vl_decl_interpolated_input +( + unsigned int name, + unsigned int index, + unsigned int first, + unsigned int last, + int interpolation +) +{ + struct tgsi_full_declaration decl = tgsi_default_full_declaration(); + + assert + ( + interpolation == TGSI_INTERPOLATE_CONSTANT || + interpolation == TGSI_INTERPOLATE_LINEAR || + interpolation == TGSI_INTERPOLATE_PERSPECTIVE + ); + + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = name; + decl.Semantic.SemanticIndex = index; + decl.Declaration.Interpolate = interpolation;; + decl.DeclarationRange.First = first; + decl.DeclarationRange.Last = last; + + return decl; +} + +struct tgsi_full_declaration vl_decl_constants(unsigned int name, unsigned int index, unsigned int first, unsigned int last) +{ + struct tgsi_full_declaration decl = tgsi_default_full_declaration(); + + decl.Declaration.File = TGSI_FILE_CONSTANT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = name; + decl.Semantic.SemanticIndex = index; + decl.DeclarationRange.First = first; + decl.DeclarationRange.Last = last; + + return decl; +} + +struct tgsi_full_declaration vl_decl_output(unsigned int name, unsigned int index, unsigned int first, unsigned int last) +{ + struct tgsi_full_declaration decl = tgsi_default_full_declaration(); + + decl.Declaration.File = TGSI_FILE_OUTPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = name; + decl.Semantic.SemanticIndex = index; + decl.DeclarationRange.First = first; + decl.DeclarationRange.Last = last; + + return decl; +} + +struct tgsi_full_declaration vl_decl_temps(unsigned int first, unsigned int last) +{ + struct tgsi_full_declaration decl = tgsi_default_full_declaration(); + + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.DeclarationRange.First = first; + decl.DeclarationRange.Last = last; + + return decl; +} + +struct tgsi_full_declaration vl_decl_samplers(unsigned int first, unsigned int last) +{ + struct tgsi_full_declaration decl = tgsi_default_full_declaration(); + + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_SAMPLER; + decl.DeclarationRange.First = first; + decl.DeclarationRange.Last = last; + + return decl; +} + +struct tgsi_full_instruction vl_inst2 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src_file, + unsigned int src_index +) +{ + struct tgsi_full_instruction inst = tgsi_default_full_instruction(); + + inst.Instruction.Opcode = opcode; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = dst_file; + inst.FullDstRegisters[0].DstRegister.Index = dst_index; + inst.Instruction.NumSrcRegs = 1; + inst.FullSrcRegisters[0].SrcRegister.File = src_file; + inst.FullSrcRegisters[0].SrcRegister.Index = src_index; + + return inst; +} + +struct tgsi_full_instruction vl_inst3 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index +) +{ + struct tgsi_full_instruction inst = tgsi_default_full_instruction(); + + inst.Instruction.Opcode = opcode; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = dst_file; + inst.FullDstRegisters[0].DstRegister.Index = dst_index; + inst.Instruction.NumSrcRegs = 2; + inst.FullSrcRegisters[0].SrcRegister.File = src1_file; + inst.FullSrcRegisters[0].SrcRegister.Index = src1_index; + inst.FullSrcRegisters[1].SrcRegister.File = src2_file; + inst.FullSrcRegisters[1].SrcRegister.Index = src2_index; + + return inst; +} + +struct tgsi_full_instruction vl_tex +( + int tex, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index +) +{ + struct tgsi_full_instruction inst = tgsi_default_full_instruction(); + + inst.Instruction.Opcode = TGSI_OPCODE_TEX; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = dst_file; + inst.FullDstRegisters[0].DstRegister.Index = dst_index; + inst.Instruction.NumSrcRegs = 2; + inst.InstructionExtTexture.Texture = tex; + inst.FullSrcRegisters[0].SrcRegister.File = src1_file; + inst.FullSrcRegisters[0].SrcRegister.Index = src1_index; + inst.FullSrcRegisters[1].SrcRegister.File = src2_file; + inst.FullSrcRegisters[1].SrcRegister.Index = src2_index; + + return inst; +} + +struct tgsi_full_instruction vl_inst4 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index, + enum tgsi_file_type src3_file, + unsigned int src3_index +) +{ + struct tgsi_full_instruction inst = tgsi_default_full_instruction(); + + inst.Instruction.Opcode = opcode; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = dst_file; + inst.FullDstRegisters[0].DstRegister.Index = dst_index; + inst.Instruction.NumSrcRegs = 3; + inst.FullSrcRegisters[0].SrcRegister.File = src1_file; + inst.FullSrcRegisters[0].SrcRegister.Index = src1_index; + inst.FullSrcRegisters[1].SrcRegister.File = src2_file; + inst.FullSrcRegisters[1].SrcRegister.Index = src2_index; + inst.FullSrcRegisters[2].SrcRegister.File = src3_file; + inst.FullSrcRegisters[2].SrcRegister.Index = src3_index; + + return inst; +} + +struct tgsi_full_instruction vl_end(void) +{ + struct tgsi_full_instruction inst = tgsi_default_full_instruction(); + + inst.Instruction.Opcode = TGSI_OPCODE_END; + inst.Instruction.NumDstRegs = 0; + inst.Instruction.NumSrcRegs = 0; + + return inst; +} diff --git a/src/gallium/state_trackers/g3dvl/vl_shader_build.h b/src/gallium/state_trackers/g3dvl/vl_shader_build.h new file mode 100644 index 0000000000..dc615cb156 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_shader_build.h @@ -0,0 +1,61 @@ +#ifndef vl_shader_build_h +#define vl_shader_build_h + +#include <pipe/p_shader_tokens.h> + +struct tgsi_full_declaration vl_decl_input(unsigned int name, unsigned int index, unsigned int first, unsigned int last); +struct tgsi_full_declaration vl_decl_interpolated_input +( + unsigned int name, + unsigned int index, + unsigned int first, + unsigned int last, + int interpolation +); +struct tgsi_full_declaration vl_decl_constants(unsigned int name, unsigned int index, unsigned int first, unsigned int last); +struct tgsi_full_declaration vl_decl_output(unsigned int name, unsigned int index, unsigned int first, unsigned int last); +struct tgsi_full_declaration vl_decl_temps(unsigned int first, unsigned int last); +struct tgsi_full_declaration vl_decl_samplers(unsigned int first, unsigned int last); +struct tgsi_full_instruction vl_inst2 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src_file, + unsigned int src_index +); +struct tgsi_full_instruction vl_inst3 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index +); +struct tgsi_full_instruction vl_tex +( + int tex, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index +); +struct tgsi_full_instruction vl_inst4 +( + int opcode, + enum tgsi_file_type dst_file, + unsigned int dst_index, + enum tgsi_file_type src1_file, + unsigned int src1_index, + enum tgsi_file_type src2_file, + unsigned int src2_index, + enum tgsi_file_type src3_file, + unsigned int src3_index +); +struct tgsi_full_instruction vl_end(void); + +#endif diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.c b/src/gallium/state_trackers/g3dvl/vl_surface.c new file mode 100644 index 0000000000..92388f7978 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_surface.c @@ -0,0 +1,243 @@ +#define VL_INTERNAL +#include "vl_surface.h" +#include <assert.h> +#include <string.h> +#include <pipe/p_screen.h> +#include <pipe/p_state.h> +#include <pipe/p_inlines.h> +#include <util/u_memory.h> +#include <vl_winsys.h> +#include "vl_screen.h" +#include "vl_context.h" +#include "vl_render.h" +#include "vl_csc.h" +#include "vl_util.h" + +int vlCreateSurface +( + struct vlScreen *screen, + unsigned int width, + unsigned int height, + enum vlFormat format, + struct vlSurface **surface +) +{ + struct vlSurface *sfc; + struct pipe_texture template; + + assert(screen); + assert(surface); + + sfc = CALLOC_STRUCT(vlSurface); + + if (!sfc) + return 1; + + sfc->screen = screen; + sfc->width = width; + sfc->height = height; + sfc->format = format; + + memset(&template, 0, sizeof(struct pipe_texture)); + template.target = PIPE_TEXTURE_2D; + template.format = PIPE_FORMAT_A8R8G8B8_UNORM; + template.last_level = 0; + template.width[0] = vlRoundUpPOT(sfc->width); + template.height[0] = vlRoundUpPOT(sfc->height); + template.depth[0] = 1; + template.compressed = 0; + pf_get_block(template.format, &template.block); + template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_RENDER_TARGET; + + sfc->texture = vlGetPipeScreen(screen)->texture_create(vlGetPipeScreen(screen), &template); + + if (!sfc->texture) + { + FREE(sfc); + return 1; + } + + *surface = sfc; + + return 0; +} + +int vlDestroySurface +( + struct vlSurface *surface +) +{ + assert(surface); + + pipe_texture_reference(&surface->texture, NULL); + FREE(surface); + + return 0; +} + +int vlRenderMacroBlocksMpeg2 +( + struct vlMpeg2MacroBlockBatch *batch, + struct vlSurface *surface +) +{ + assert(batch); + assert(surface); + assert(surface->context); + + surface->context->render->vlBegin(surface->context->render); + + surface->context->render->vlRenderMacroBlocksMpeg2 + ( + surface->context->render, + batch, + surface + ); + + surface->context->render->vlEnd(surface->context->render); + + return 0; +} + +int vlPutPicture +( + struct vlSurface *surface, + vlNativeDrawable drawable, + int srcx, + int srcy, + int srcw, + int srch, + int destx, + int desty, + int destw, + int desth, + int drawable_w, + int drawable_h, + enum vlPictureType picture_type +) +{ + struct vlCSC *csc; + struct pipe_context *pipe; + + assert(surface); + assert(surface->context); + + surface->context->render->vlFlush(surface->context->render); + + csc = surface->context->csc; + pipe = surface->context->pipe; + + csc->vlResizeFrameBuffer(csc, drawable_w, drawable_h); + + csc->vlBegin(csc); + + csc->vlPutPicture + ( + csc, + surface, + srcx, + srcy, + srcw, + srch, + destx, + desty, + destw, + desth, + picture_type + ); + + csc->vlEnd(csc); + + pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, &surface->disp_fence); + + bind_pipe_drawable(pipe, drawable); + + pipe->screen->flush_frontbuffer + ( + pipe->screen, + csc->vlGetFrameBuffer(csc), + pipe->priv + ); + + return 0; +} + +int vlSurfaceGetStatus +( + struct vlSurface *surface, + enum vlResourceStatus *status +) +{ + assert(surface); + assert(surface->context); + assert(status); + + if (surface->render_fence && !surface->context->pipe->screen->fence_signalled(surface->context->pipe->screen, surface->render_fence, 0)) + { + *status = vlResourceStatusRendering; + return 0; + } + + if (surface->disp_fence && !surface->context->pipe->screen->fence_signalled(surface->context->pipe->screen, surface->disp_fence, 0)) + { + *status = vlResourceStatusDisplaying; + return 0; + } + + *status = vlResourceStatusFree; + + return 0; +} + +int vlSurfaceFlush +( + struct vlSurface *surface +) +{ + assert(surface); + assert(surface->context); + + surface->context->render->vlFlush(surface->context->render); + + return 0; +} + +int vlSurfaceSync +( + struct vlSurface *surface +) +{ + assert(surface); + assert(surface->context); + assert(surface->render_fence); + + surface->context->pipe->screen->fence_finish(surface->context->pipe->screen, surface->render_fence, 0); + + return 0; +} + +struct vlScreen* vlSurfaceGetScreen +( + struct vlSurface *surface +) +{ + assert(surface); + + return surface->screen; +} + +struct vlContext* vlBindToContext +( + struct vlSurface *surface, + struct vlContext *context +) +{ + struct vlContext *old; + + assert(surface); + + old = surface->context; + surface->context = context; + + return old; +} diff --git a/src/gallium/state_trackers/g3dvl/vl_surface.h b/src/gallium/state_trackers/g3dvl/vl_surface.h new file mode 100644 index 0000000000..133e1515ef --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_surface.h @@ -0,0 +1,86 @@ +#ifndef vl_surface_h +#define vl_surface_h + +#include "vl_types.h" + +#ifdef VL_INTERNAL +struct pipe_texture; + +struct vlSurface +{ + struct vlScreen *screen; + struct vlContext *context; + unsigned int width; + unsigned int height; + enum vlFormat format; + struct pipe_texture *texture; + struct pipe_fence_handle *render_fence; + struct pipe_fence_handle *disp_fence; +}; +#endif + +int vlCreateSurface +( + struct vlScreen *screen, + unsigned int width, + unsigned int height, + enum vlFormat format, + struct vlSurface **surface +); + +int vlDestroySurface +( + struct vlSurface *surface +); + +int vlRenderMacroBlocksMpeg2 +( + struct vlMpeg2MacroBlockBatch *batch, + struct vlSurface *surface +); + +int vlPutPicture +( + struct vlSurface *surface, + vlNativeDrawable drawable, + int srcx, + int srcy, + int srcw, + int srch, + int destx, + int desty, + int destw, + int desth, + int drawable_w, + int drawable_h, + enum vlPictureType picture_type +); + +int vlSurfaceGetStatus +( + struct vlSurface *surface, + enum vlResourceStatus *status +); + +int vlSurfaceFlush +( + struct vlSurface *surface +); + +int vlSurfaceSync +( + struct vlSurface *surface +); + +struct vlScreen* vlSurfaceGetScreen +( + struct vlSurface *surface +); + +struct vlContext* vlBindToContext +( + struct vlSurface *surface, + struct vlContext *context +); + +#endif diff --git a/src/gallium/state_trackers/g3dvl/vl_types.h b/src/gallium/state_trackers/g3dvl/vl_types.h new file mode 100644 index 0000000000..274e1f7437 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_types.h @@ -0,0 +1,115 @@ +#ifndef vl_types_h +#define vl_types_h + +#if 1 /*#ifdef X11*/ +#include <X11/Xlib.h> + +typedef Display* vlNativeDisplay; +typedef Drawable vlNativeDrawable; +#endif + +struct vlDisplay; +struct vlScreen; +struct vlContext; +struct vlSurface; + +enum vlResourceStatus +{ + vlResourceStatusFree, + vlResourceStatusRendering, + vlResourceStatusDisplaying +}; + +enum vlProfile +{ + vlProfileMpeg2Simple, + vlProfileMpeg2Main, + + vlProfileCount +}; + +enum vlEntryPoint +{ + vlEntryPointIDCT, + vlEntryPointMC, + vlEntryPointCSC, + + vlEntryPointCount +}; + +enum vlFormat +{ + vlFormatYCbCr420, + vlFormatYCbCr422, + vlFormatYCbCr444 +}; + +enum vlPictureType +{ + vlPictureTypeTopField, + vlPictureTypeBottomField, + vlPictureTypeFrame +}; + +enum vlMotionType +{ + vlMotionTypeField, + vlMotionTypeFrame, + vlMotionTypeDualPrime, + vlMotionType16x8 +}; + +enum vlFieldOrder +{ + vlFieldOrderFirst, + vlFieldOrderSecond +}; + +enum vlDCTType +{ + vlDCTTypeFrameCoded, + vlDCTTypeFieldCoded +}; + +struct vlVertex2f +{ + float x, y; +}; + +struct vlVertex4f +{ + float x, y, z, w; +}; + +enum vlMacroBlockType +{ + vlMacroBlockTypeIntra, + vlMacroBlockTypeFwdPredicted, + vlMacroBlockTypeBkwdPredicted, + vlMacroBlockTypeBiPredicted, + + vlNumMacroBlockTypes +}; + +struct vlMpeg2MacroBlock +{ + unsigned int mbx, mby; + enum vlMacroBlockType mb_type; + enum vlMotionType mo_type; + enum vlDCTType dct_type; + int PMV[2][2][2]; + unsigned int cbp; + short *blocks; +}; + +struct vlMpeg2MacroBlockBatch +{ + struct vlSurface *past_surface; + struct vlSurface *future_surface; + enum vlPictureType picture_type; + enum vlFieldOrder field_order; + unsigned int num_macroblocks; + struct vlMpeg2MacroBlock *macroblocks; +}; + +#endif diff --git a/src/gallium/state_trackers/g3dvl/vl_util.c b/src/gallium/state_trackers/g3dvl/vl_util.c new file mode 100644 index 0000000000..50aa9af66f --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_util.c @@ -0,0 +1,16 @@ +#include "vl_util.h" +#include <assert.h> + +unsigned int vlRoundUpPOT(unsigned int x) +{ + unsigned int i; + + assert(x > 0); + + --x; + + for (i = 1; i < sizeof(unsigned int) * 8; i <<= 1) + x |= x >> i; + + return x + 1; +} diff --git a/src/gallium/state_trackers/g3dvl/vl_util.h b/src/gallium/state_trackers/g3dvl/vl_util.h new file mode 100644 index 0000000000..bc98e79df4 --- /dev/null +++ b/src/gallium/state_trackers/g3dvl/vl_util.h @@ -0,0 +1,6 @@ +#ifndef vl_util_h +#define vl_util_h + +unsigned int vlRoundUpPOT(unsigned int x); + +#endif diff --git a/src/gallium/state_trackers/glx/Makefile b/src/gallium/state_trackers/glx/Makefile new file mode 100644 index 0000000000..f779035763 --- /dev/null +++ b/src/gallium/state_trackers/glx/Makefile @@ -0,0 +1,25 @@ +TOP = ../../../.. +include $(TOP)/configs/current + + +SUBDIRS = xlib + + +default: subdirs + + +subdirs: + @for dir in $(SUBDIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE)) || exit 1 ; \ + fi \ + done + + +clean: + rm -f `find . -name \*.[oa]` + rm -f `find . -name depend` + + +# Dummy install target +install: diff --git a/src/gallium/state_trackers/glx/dri/dri_context.c b/src/gallium/state_trackers/glx/dri/dri_context.c new file mode 100644 index 0000000000..9424e18bee --- /dev/null +++ b/src/gallium/state_trackers/glx/dri/dri_context.c @@ -0,0 +1,168 @@ +/************************************************************************** + * + * Copyright 2009, VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "dri_screen.h" +#include "dri_context.h" +#include "dri_winsys.h" + +#include "state_tracker/st_public.h" +#include "state_tracker/st_context.h" +#include "pipe/p_context.h" + +#include "util/u_memory.h" + + +GLboolean +dri_create_context(const __GLcontextModes *visual, + __DRIcontextPrivate *cPriv, + void *sharedContextPrivate) +{ + __DRIscreenPrivate *sPriv = cPriv->driScreenPriv; + struct dri_screen *screen = dri_screen(sPriv); + struct dri_context *ctx = NULL; + struct st_context *st_share = NULL; + + if (sharedContextPrivate) { + st_share = ((struct dri_context *) sharedContextPrivate)->st; + } + + ctx = CALLOC_STRUCT(dri_context); + if (ctx == NULL) + goto fail; + + cPriv->driverPrivate = ctx; + ctx->cPriv = cPriv; + ctx->sPriv = sPriv; + + driParseConfigFiles(&ctx->optionCache, + &screen->optionCache, + sPriv->myNum, + "dri"); + + ctx->pipe = screen->pipe_screen->create_context(screen->pipe_screen, + screen->pipe_winsys, + hw_winsys ); + if (ctx->pipe == NULL) + goto fail; + + ctx->pipe->priv = ctx; /* I guess */ + + ctx->st = st_create_context(ctx->pipe, visual, st_share); + if (ctx->st == NULL) + goto fail; + + dri_init_extensions( ctx ); + + return GL_TRUE; + +fail: + if (ctx && ctx->st) + st_destroy_context( ctx->st ); + + if (ctx && ctx->pipe) + ctx->pipe->destroy( ctx->pipe ); + + FREE(ctx); + return FALSE; +} + + +void +dri_destroy_context(__DRIcontextPrivate *cPriv) +{ + struct dri_context *ctx = dri_context(cPriv); + struct dri_screen *screen = dri_screen(cPriv->driScreenPriv); + struct pipe_winsys *winsys = screen->winsys; + + /* No particular reason to wait for command completion before + * destroying a context, but it is probably worthwhile flushing it + * to avoid having to add code elsewhere to cope with flushing a + * partially destroyed context. + */ + st_flush(ctx->st); + + if (screen->dummyContext == ctx) + screen->dummyContext = NULL; + + /* Also frees ctx->pipe? + */ + st_destroy_context(ctx->st); + + FREE(ctx); +} + + +GLboolean +dri_unbind_context(__DRIcontextPrivate *cPriv) +{ + struct dri_context *ctx = dri_context(cPriv); + st_flush(ctx->st, PIPE_FLUSH_RENDER_CACHE, NULL); + /* XXX make_current(NULL)? */ + return GL_TRUE; +} + + +GLboolean +dri_make_current(__DRIcontextPrivate *cPriv, + __DRIdrawablePrivate *driDrawPriv, + __DRIdrawablePrivate *driReadPriv) +{ + if (cPriv) { + struct dri_context *ctx = dri_context(cPriv); + struct dri_screen *screen = dri_screen(cPriv->driScreenPriv); + struct dri_drawable *draw = dri_drawable(driDrawPriv); + struct dri_drawable *read = dri_drawable(driReadPriv); + + /* This is for situations in which we need a rendering context but + * there may not be any currently bound. + */ + screen->dummyContext = ctx; + + st_make_current( ctx->st, + draw->stfb, + read->stfb ); + + ctx->dPriv = driDrawPriv; + + /* Update window sizes if necessary: + */ + if (draw->stamp != driDrawPriv->lastStamp) { + dri_update_window_size( draw ); + } + + if (read->stamp != driReadPriv->lastStamp) { + dri_update_window_size( read ); + } + + } + else { + st_make_current(NULL, NULL, NULL); + } + + return GL_TRUE; +} diff --git a/src/gallium/state_trackers/glx/dri/dri_context.h b/src/gallium/state_trackers/glx/dri/dri_context.h new file mode 100644 index 0000000000..4e6a305abb --- /dev/null +++ b/src/gallium/state_trackers/glx/dri/dri_context.h @@ -0,0 +1,95 @@ +/************************************************************************** + * + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef DRI_CONTEXT_H +#define DRI_CONTEXT_H + +#include "pipe/p_compiler.h" +#include "drm.h" +#include "dri_util.h" + + +struct pipe_context; +struct pipe_fence; +struct st_context; + + +struct dri_context +{ + __DRIcontextPrivate *cPriv; + __DRIdrawablePrivate *dPriv; + + struct st_context *st; + struct pipe_context *pipe; + + boolean locked; + + /** + * Configuration cache + */ + driOptionCache optionCache; +}; + + +static INLINE struct dri_context * +dri_context(__DRIcontextPrivate *driContextPriv) +{ + return (struct dri_context *) driContextPriv->driverPrivate; +} + +/*********************************************************************** + * dri_context.c + */ +void +dri_destroy_context(__DRIcontextPrivate * driContextPriv); + +boolean +dri_unbind_context(__DRIcontextPrivate * driContextPriv); + +boolean +dri_make_current(__DRIcontextPrivate * driContextPriv, + __DRIdrawablePrivate * driDrawPriv, + __DRIdrawablePrivate * driReadPriv); + +boolean +dri_create_context(const __GLcontextModes * visual, + __DRIcontextPrivate * driContextPriv, + void *sharedContextPrivate); + + + +/*********************************************************************** + * dri_lock.c + */ +void dri_lock_hardware( struct dri_context *context, + struct dri_drawable *drawable ); + +void dri_unlock_hardware( struct dri_context *dri ); +boolean dri_is_locked( struct dri_context *dri ); + + + +#endif diff --git a/src/gallium/state_trackers/glx/dri/dri_drawable.c b/src/gallium/state_trackers/glx/dri/dri_drawable.c new file mode 100644 index 0000000000..b712acda88 --- /dev/null +++ b/src/gallium/state_trackers/glx/dri/dri_drawable.c @@ -0,0 +1,363 @@ +/************************************************************************** + * + * Copyright 2009, VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "dri_screen.h" +#include "dri_context.h" +#include "dri_swapbuffers.h" + +#include "pipe/p_context.h" +#include "state_tracker/st_public.h" +#include "state_tracker/st_context.h" +#include "state_tracker/st_cb_fbo.h" + + +static void +blit_swapbuffers(__DRIdrawablePrivate *dPriv, + __DRIcontextPrivate *cPriv, + struct pipe_surface *src, + const drm_clip_rect_t *rect) +{ + struct dri_screen *screen = dri_screen(dPriv->driScreenPriv); + struct dri_drawable *fb = dri_drawable(dPriv); + struct dri_context *context = dri_context(cPriv); + + const int nbox = dPriv->numClipRects; + const drm_clip_rect_t *pbox = dPriv->pClipRects; + + struct pipe_surface *dest = fb->front_surface; + const int backWidth = fb->stfb->Base.Width; + const int backHeight = fb->stfb->Base.Height; + int i; + + for (i = 0; i < nbox; i++, pbox++) { + drm_clip_rect_t box; + drm_clip_rect_t sbox; + + if (pbox->x1 > pbox->x2 || + pbox->y1 > pbox->y2 || + (pbox->x2 - pbox->x1) > dest->width || + (pbox->y2 - pbox->y1) > dest->height) + continue; + + box = *pbox; + + if (rect) { + drm_clip_rect_t rrect; + + rrect.x1 = dPriv->x + rect->x1; + rrect.y1 = (dPriv->h - rect->y1 - rect->y2) + dPriv->y; + rrect.x2 = rect->x2 + rrect.x1; + rrect.y2 = rect->y2 + rrect.y1; + if (rrect.x1 > box.x1) + box.x1 = rrect.x1; + if (rrect.y1 > box.y1) + box.y1 = rrect.y1; + if (rrect.x2 < box.x2) + box.x2 = rrect.x2; + if (rrect.y2 < box.y2) + box.y2 = rrect.y2; + + if (box.x1 > box.x2 || box.y1 > box.y2) + continue; + } + + /* restrict blit to size of actually rendered area */ + if (box.x2 - box.x1 > backWidth) + box.x2 = backWidth + box.x1; + if (box.y2 - box.y1 > backHeight) + box.y2 = backHeight + box.y1; + + debug_printf("%s: box %d,%d-%d,%d\n", __FUNCTION__, + box.x1, box.y1, box.x2, box.y2); + + sbox.x1 = box.x1 - dPriv->x; + sbox.y1 = box.y1 - dPriv->y; + + ctx->st->pipe->surface_copy( ctx->st->pipe, + FALSE, + dest, + box.x1, box.y1, + src, + sbox.x1, sbox.y1, + box.x2 - box.x1, + box.y2 - box.y1 ); + } +} + +/** + * Display a colorbuffer surface in an X window. + * Used for SwapBuffers and flushing front buffer rendering. + * + * \param dPriv the window/drawable to display into + * \param surf the surface to display + * \param rect optional subrect of surface to display (may be NULL). + */ +static void +dri_display_surface(__DRIdrawablePrivate *dPriv, + struct pipe_surface *source, + const drm_clip_rect_t *rect) +{ + struct dri_drawable *drawable = dri_drawable(dPriv); + struct dri_screen *screen = dri_screen(dPriv->driScreenPriv); + struct dri_context *context = screen->dummy_context; + struct pipe_winsys *winsys = screen->winsys; + + if (!context) + return; + + if (drawable->last_swap_fence) { + winsys->fence_finish( winsys, + drawable->last_swap_fence, + 0 ); + + winsys->fence_reference( winsys, + &drawable->last_swap_fence, + NULL ); + } + + drawable->last_swap_fence = drawable->first_swap_fence; + drawable->first_swap_fence = NULL; + + /* Call lock_hardware to update dPriv cliprects. + */ + dri_lock_hardware(context, drawable); + { + if (dPriv->numClipRects) { + blit_swapbuffers( context, dPriv, source, rect ); + } + } + dri_unlock_hardware(context); + + if (drawble->stamp != drawable->dPriv->lastStamp) { + dri_update_window_size( dpriv ); + } +} + + + +/** + * This will be called a drawable is known to have moved/resized. + */ +void +dri_update_window_size(__DRIdrawablePrivate *dPriv) +{ + struct dri_drawable *drawable = dri_drawable(dPriv); + st_resize_framebuffer(drawable->stfb, dPriv->w, dPriv->h); + drawable->stamp = dPriv->lastStamp; +} + + + +void +dri_swap_buffers(__DRIdrawablePrivate * dPriv) +{ + struct dri_drawable *drawable = dri_drawable(dPriv); + struct pipe_surface *back_surf; + + assert(drawable); + assert(drawable->stfb); + + back_surf = st_get_framebuffer_surface(drawable->stfb, + ST_SURFACE_BACK_LEFT); + if (back_surf) { + st_notify_swapbuffers(drawable->stfb); + dri_display_surface(dPriv, back_surf, NULL); + st_notify_swapbuffers_complete(drawable->stfb); + } +} + + +/** + * Called via glXCopySubBufferMESA() to copy a subrect of the back + * buffer to the front buffer/screen. + */ +void +dri_copy_sub_buffer(__DRIdrawablePrivate * dPriv, int x, int y, int w, int h) +{ + struct dri_drawable *drawable = dri_drawable(dPriv); + struct pipe_surface *back_surf; + + assert(drawable); + assert(drawable->stfb); + + back_surf = st_get_framebuffer_surface(drawable->stfb, + ST_SURFACE_BACK_LEFT); + if (back_surf) { + drm_clip_rect_t rect; + rect.x1 = x; + rect.y1 = y; + rect.x2 = w; + rect.y2 = h; + + st_notify_swapbuffers(drawable->stfb); + dri_display_surface(dPriv, back_surf, &rect); + } +} + + + +/* + * The state tracker keeps track of whether the fake frontbuffer has + * been touched by any rendering since the last time we copied its + * contents to the real frontbuffer. Our task is easy: + */ +static void +dri_flush_frontbuffer( struct pipe_winsys *winsys, + struct pipe_surface *surf, + void *context_private) +{ + struct dri_context *dri = (struct dri_context *) context_private; + __DRIdrawablePrivate *dPriv = dri->driDrawable; + + dri_display_surface(dPriv, surf, NULL); +} + + + +/* Need to create a surface which wraps the front surface to support + * client-side swapbuffers. + */ +static void +dri_create_front_surface(struct dri_screen *screen, + struct pipe_winsys *winsys, + unsigned handle) +{ + struct pipe_screen *pipe_screen = screen->pipe_screen; + struct pipe_texture *texture; + struct pipe_texture templat; + struct pipe_surface *surface; + struct pipe_buffer *buffer; + unsigned pitch; + + assert(screen->front.cpp == 4); + +// buffer = dri_buffer_from_handle(screen->winsys, +// "front", handle); + + if (!buffer) + return; + + screen->front.buffer = dri_bo(buffer); + + memset(&templat, 0, sizeof(templat)); + templat.tex_usage |= PIPE_TEXTURE_USAGE_DISPLAY_TARGET; + templat.target = PIPE_TEXTURE_2D; + templat.last_level = 0; + templat.depth[0] = 1; + templat.format = PIPE_FORMAT_A8R8G8B8_UNORM; + templat.width[0] = screen->front.width; + templat.height[0] = screen->front.height; + pf_get_block(templat.format, &templat.block); + pitch = screen->front.pitch; + + texture = pipe_screen->texture_blanket(pipe_screen, + &templat, + &pitch, + buffer); + + /* Unref the buffer we don't need it anyways */ + pipe_buffer_reference(screen, &buffer, NULL); + + surface = pipe_screen->get_tex_surface(pipe_screen, + texture, + 0, + 0, + 0, + PIPE_BUFFER_USAGE_GPU_WRITE); + + screen->front.texture = texture; + screen->front.surface = surface; +} + +/** + * This is called when we need to set up GL rendering to a new X window. + */ +static boolean +dri_create_buffer(__DRIscreenPrivate *sPriv, + __DRIdrawablePrivate *dPriv, + const __GLcontextModes *visual, + boolean isPixmap) +{ + enum pipe_format colorFormat, depthFormat, stencilFormat; + struct dri_drawable *drawable; + + if (isPixmap) + goto fail; /* not implemented */ + + drawable = CALLOC_STRUCT(dri_drawable); + if (drawable == NULL) + goto fail; + + /* XXX: todo: use the pipe_screen queries to figure out which + * render targets are supportable. + */ + if (visual->redBits == 5) + colorFormat = PIPE_FORMAT_R5G6B5_UNORM; + else + colorFormat = PIPE_FORMAT_A8R8G8B8_UNORM; + + if (visual->depthBits == 16) + depthFormat = PIPE_FORMAT_Z16_UNORM; + else if (visual->depthBits == 24) { + if (visual->stencilBits == 8) + depthFormat = PIPE_FORMAT_S8Z24_UNORM; + else + depthFormat = PIPE_FORMAT_X8Z24_UNORM; + } + + drawable->stfb = st_create_framebuffer(visual, + colorFormat, + depthFormat, + dPriv->w, + dPriv->h, + (void*) drawable); + if (drawable->stfb == NULL) + goto fail; + + dPriv->driverPrivate = (void *) drawable; + return GL_TRUE; + +fail: + FREE(drawable); + return GL_FALSE; +} + +static void +dri_destroy_buffer(__DRIdrawablePrivate *dPriv) +{ + struct dri_drawable *drawable = dri_drawable(dPriv); + + /* No particular need to wait on fences before dereferencing them: + */ + winsys->fence_reference( winsys, &ctx->last_swap_fence, NULL ); + winsys->fence_reference( winsys, &ctx->first_swap_fence, NULL ); + + st_unreference_framebuffer(drawable->stfb); + + FREE(drawable); +} + diff --git a/src/gallium/winsys/drm/intel/egl/intel_reg.h b/src/gallium/state_trackers/glx/dri/dri_drawable.h index 4f33bee438..1001bb8c57 100644 --- a/src/gallium/winsys/drm/intel/egl/intel_reg.h +++ b/src/gallium/state_trackers/glx/dri/dri_drawable.h @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2009, VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -25,29 +25,49 @@ * **************************************************************************/ +#ifndef DRI_DRAWABLE_H +#define DRI_DRAWABLE_H -#ifndef _INTEL_REG_H_ -#define _INTEL_REG_H_ +#include "pipe/p_compiler.h" +struct pipe_surface; +struct pipe_fence; +struct st_framebuffer; -#define BR00_BITBLT_CLIENT 0x40000000 -#define BR00_OP_COLOR_BLT 0x10000000 -#define BR00_OP_SRC_COPY_BLT 0x10C00000 -#define BR13_SOLID_PATTERN 0x80000000 -#define XY_COLOR_BLT_CMD ((2<<29)|(0x50<<22)|0x4) -#define XY_COLOR_BLT_WRITE_ALPHA (1<<21) -#define XY_COLOR_BLT_WRITE_RGB (1<<20) +struct dri_drawable +{ + __DRIdrawablePrivate *dPriv; + unsigned stamp; -#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6) -#define XY_SRC_COPY_BLT_WRITE_ALPHA (1<<21) -#define XY_SRC_COPY_BLT_WRITE_RGB (1<<20) + struct pipe_fence *last_swap_fence; + struct pipe_fence *first_swap_fence; -#define MI_WAIT_FOR_EVENT ((0x3<<23)) -#define MI_WAIT_FOR_PLANE_B_FLIP (1<<6) -#define MI_WAIT_FOR_PLANE_A_FLIP (1<<2) + struct st_framebuffer *stfb; +}; -#define MI_BATCH_BUFFER_END (0xA<<23) + +static INLINE struct dri_drawable * +dri_drawable(__DRIdrawablePrivate * driDrawPriv) +{ + return (struct dri_drawable *) driDrawPriv->driverPrivate; +} + + +/*********************************************************************** + * dri_drawable.c + */ + +void +dri_swap_buffers(__DRIdrawablePrivate * dPriv); + +void +dri_copy_sub_buffer(__DRIdrawablePrivate * dPriv, + int x, int y, + int w, int h); + +void +dri_update_window_size(__DRIdrawablePrivate *dPriv); #endif diff --git a/src/gallium/state_trackers/glx/dri/dri_extensions.c b/src/gallium/state_trackers/glx/dri/dri_extensions.c new file mode 100644 index 0000000000..126faf7601 --- /dev/null +++ b/src/gallium/state_trackers/glx/dri/dri_extensions.c @@ -0,0 +1,108 @@ +/************************************************************************** + * + * Copyright 2009, VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + + + +#define need_GL_ARB_multisample +#define need_GL_ARB_point_parameters +#define need_GL_ARB_texture_compression +#define need_GL_ARB_vertex_buffer_object +#define need_GL_ARB_vertex_program +#define need_GL_ARB_window_pos +#define need_GL_EXT_blend_color +#define need_GL_EXT_blend_equation_separate +#define need_GL_EXT_blend_func_separate +#define need_GL_EXT_blend_minmax +#define need_GL_EXT_cull_vertex +#define need_GL_EXT_fog_coord +#define need_GL_EXT_framebuffer_object +#define need_GL_EXT_multi_draw_arrays +#define need_GL_EXT_secondary_color +#define need_GL_NV_vertex_program +#include "extension_helper.h" + + +/** + * Extension strings exported by the driver. + */ +const struct dri_extension card_extensions[] = { + {"GL_ARB_multisample", GL_ARB_multisample_functions}, + {"GL_ARB_multitexture", NULL}, + {"GL_ARB_point_parameters", GL_ARB_point_parameters_functions}, + {"GL_ARB_texture_border_clamp", NULL}, + {"GL_ARB_texture_compression", GL_ARB_texture_compression_functions}, + {"GL_ARB_texture_cube_map", NULL}, + {"GL_ARB_texture_env_add", NULL}, + {"GL_ARB_texture_env_combine", NULL}, + {"GL_ARB_texture_env_dot3", NULL}, + {"GL_ARB_texture_mirrored_repeat", NULL}, + {"GL_ARB_texture_rectangle", NULL}, + {"GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions}, + {"GL_ARB_pixel_buffer_object", NULL}, + {"GL_ARB_vertex_program", GL_ARB_vertex_program_functions}, + {"GL_ARB_window_pos", GL_ARB_window_pos_functions}, + {"GL_EXT_blend_color", GL_EXT_blend_color_functions}, + {"GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions}, + {"GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions}, + {"GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions}, + {"GL_EXT_blend_subtract", NULL}, + {"GL_EXT_cull_vertex", GL_EXT_cull_vertex_functions}, + {"GL_EXT_fog_coord", GL_EXT_fog_coord_functions}, + {"GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions}, + {"GL_EXT_multi_draw_arrays", GL_EXT_multi_draw_arrays_functions}, + {"GL_EXT_packed_depth_stencil", NULL}, + {"GL_EXT_pixel_buffer_object", NULL}, + {"GL_EXT_secondary_color", GL_EXT_secondary_color_functions}, + {"GL_EXT_stencil_wrap", NULL}, + {"GL_EXT_texture_edge_clamp", NULL}, + {"GL_EXT_texture_env_combine", NULL}, + {"GL_EXT_texture_env_dot3", NULL}, + {"GL_EXT_texture_filter_anisotropic", NULL}, + {"GL_EXT_texture_lod_bias", NULL}, + {"GL_3DFX_texture_compression_FXT1", NULL}, + {"GL_APPLE_client_storage", NULL}, + {"GL_MESA_pack_invert", NULL}, + {"GL_MESA_ycbcr_texture", NULL}, + {"GL_NV_blend_square", NULL}, + {"GL_NV_vertex_program", GL_NV_vertex_program_functions}, + {"GL_NV_vertex_program1_1", NULL}, + {"GL_SGIS_generate_mipmap", NULL }, + {NULL, NULL} +}; + + + +void +dri_init_extensions( void ) +{ + /* The card_extensions list should be pruned according to the + * capabilities of the pipe_screen. This is actually something + * that can/should be done inside st_create_context(). + */ + driInitExtensions( ctx->st->ctx, card_extensions, GL_TRUE ); +} diff --git a/src/gallium/winsys/drm/intel/dri/intel_lock.c b/src/gallium/state_trackers/glx/dri/dri_lock.c index ad1c202429..b272ab55f3 100644 --- a/src/gallium/winsys/drm/intel/dri/intel_lock.c +++ b/src/gallium/state_trackers/glx/dri/dri_lock.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2009 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -26,77 +26,65 @@ **************************************************************************/ -#include "main/glheader.h" #include "pipe/p_thread.h" -#include <GL/internal/glcore.h> -#include "state_tracker/st_public.h" -#include "intel_context.h" -#include "i830_dri.h" - - +#include "dri_context.h" +#include "xf86drm.h" pipe_static_mutex( lockMutex ); - static void -intelContendedLock(struct intel_context *intel, uint flags) +dri_contended_lock(struct dri_context *ctx) { - __DRIdrawablePrivate *dPriv = intel->driDrawable; - __DRIscreenPrivate *sPriv = intel->driScreen; - struct intel_screen *intelScreen = intel_screen(sPriv); - drmI830Sarea *sarea = intel->sarea; + __DRIdrawablePrivate *dPriv = ctx->dPriv; + __DRIcontextPrivate *cPriv = ctx->cPriv; + __DRIscreenPrivate *sPriv = cPriv->driScreenPriv; - drmGetLock(intel->driFd, intel->hHWContext, flags); + drmGetLock(sPriv->fd, cPriv->hHWContext, 0); - DBG(LOCK, "%s - got contended lock\n", __progname); - - /* If the window moved, may need to set a new cliprect now. - * - * NOTE: This releases and regains the hw lock, so all state - * checking must be done *after* this call: + /* Perform round trip communication with server (including dropping + * and retaking the above lock) to update window dimensions: */ if (dPriv) DRI_VALIDATE_DRAWABLE_INFO(sPriv, dPriv); - - if (sarea->width != intelScreen->front.width || - sarea->height != intelScreen->front.height) { - - intelUpdateScreenRotation(sPriv, sarea); - } } /* Lock the hardware and validate our state. */ -void LOCK_HARDWARE( struct intel_context *intel ) +void dri_lock_hardware( struct dri_context *ctx ) { - char __ret = 0; - - pipe_mutex_lock(lockMutex); - assert(!intel->locked); + __DRIcontextPrivate *cPriv = ctx->cPriv; + __DRIscreenPrivate *sPriv = cPriv->driScreenPriv; + char __ret = 0; - DRM_CAS(intel->driHwLock, intel->hHWContext, - (DRM_LOCK_HELD|intel->hHWContext), __ret); + pipe_mutex_lock(lockMutex); + assert(!ctx->locked); - if (__ret) - intelContendedLock( intel, 0 ); + DRM_CAS((drmLock *) &sPriv->pSAREA->lock, + cPriv->hHWContext, + (DRM_LOCK_HELD | cPriv->hHWContext), + __ret); - DBG(LOCK, "%s - locked\n", __progname); + if (__ret) + dri_contended_lock( ctx ); - intel->locked = 1; + ctx->locked = TRUE; } /* Unlock the hardware using the global current context */ -void UNLOCK_HARDWARE( struct intel_context *intel ) +void dri_unlock_hardware( struct dri_context *ctx ) { - assert(intel->locked); - intel->locked = 0; + __DRIcontextPrivate *cPriv = ctx->cPriv; + __DRIscreenPrivate *sPriv = cPriv->driScreenPriv; - DRM_UNLOCK(intel->driFd, intel->driHwLock, intel->hHWContext); + assert(ctx->locked); + ctx->locked = FALSE; - pipe_mutex_unlock(lockMutex); + DRM_UNLOCK(sPriv->fd, + (drmLock *) &sPriv->pSAREA->lock, + cPriv->hHWContext); - DBG(LOCK, "%s - unlocked\n", __progname); + pipe_mutex_unlock(lockMutex); } diff --git a/src/gallium/state_trackers/glx/dri/dri_screen.c b/src/gallium/state_trackers/glx/dri/dri_screen.c new file mode 100644 index 0000000000..f7119b949a --- /dev/null +++ b/src/gallium/state_trackers/glx/dri/dri_screen.c @@ -0,0 +1,255 @@ +/************************************************************************** + * + * Copyright 2009, VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "utils.h" +#include "vblank.h" +#include "xmlpool.h" + +#include "dri_context.h" +#include "dri_screen.h" + +#include "pipe/p_context.h" +#include "pipe/p_screen.h" +#include "pipe/p_inlines.h" +#include "state_tracker/st_public.h" +#include "state_tracker/st_cb_fbo.h" + + +PUBLIC const char __driConfigOptions[] = + DRI_CONF_BEGIN DRI_CONF_SECTION_PERFORMANCE + DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS) + DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0) + DRI_CONF_SECTION_END DRI_CONF_SECTION_QUALITY +// DRI_CONF_FORCE_S3TC_ENABLE(false) + DRI_CONF_ALLOW_LARGE_TEXTURES(1) + DRI_CONF_SECTION_END DRI_CONF_END; + +const uint __driNConfigOptions = 3; + +static PFNGLXCREATECONTEXTMODES create_context_modes = NULL; + +extern const struct dri_extension card_extensions[]; + + + +static const __DRIextension *driScreenExtensions[] = { + &driReadDrawableExtension, + &driCopySubBufferExtension.base, + &driSwapControlExtension.base, + &driFrameTrackingExtension.base, + &driMediaStreamCounterExtension.base, + NULL +}; + + + + +static const char * +dri_get_name( struct pipe_winsys *winsys ) +{ + return "dri"; +} + + + +static void +dri_destroy_screen(__DRIscreenPrivate * sPriv) +{ + struct dri_screen *screen = dri_screen(sPriv); + + screen->pipe_screen->destroy( screen->pipe_screen ); + screen->pipe_winsys->destroy( screen->pipe_winsys ); + FREE(screen); + sPriv->private = NULL; +} + + +/** + * Get information about previous buffer swaps. + */ +static int +dri_get_swap_info(__DRIdrawablePrivate * dPriv, + __DRIswapInfo * sInfo) +{ + if (dPriv == NULL || + dPriv->driverPrivate == NULL || + sInfo == NULL) + return -1; + else + return 0; +} + +static const __DRIconfig ** +dri_fill_in_modes(__DRIscreenPrivate *psp, + unsigned pixel_bits ) +{ + __DRIconfig **configs; + __GLcontextModes *m; + unsigned num_modes; + uint8_t depth_bits_array[3]; + uint8_t stencil_bits_array[3]; + uint8_t msaa_samples_array[1]; + unsigned depth_buffer_factor; + unsigned back_buffer_factor; + GLenum fb_format; + GLenum fb_type; + int i; + + static const GLenum back_buffer_modes[] = { + GLX_NONE, GLX_SWAP_UNDEFINED_OML + }; + + depth_bits_array[0] = 0; + depth_bits_array[1] = depth_bits; + depth_bits_array[2] = depth_bits; + + stencil_bits_array[0] = 0; /* no depth or stencil */ + stencil_bits_array[1] = 0; /* z24x8 */ + stencil_bits_array[2] = 8; /* z24s8 */ + + msaa_samples_array[0] = 0; + + depth_buffer_factor = 3; + back_buffer_factor = 1; + + num_modes = depth_buffer_factor * back_buffer_factor * 4; + + if (pixel_bits == 16) { + fb_format = GL_RGB; + fb_type = GL_UNSIGNED_SHORT_5_6_5; + } + else { + fb_format = GL_BGRA; + fb_type = GL_UNSIGNED_INT_8_8_8_8_REV; + } + + configs = driCreateConfigs(fb_format, fb_type, + depth_bits_array, + stencil_bits_array, depth_buffer_factor, + back_buffer_modes, back_buffer_factor, + msaa_samples_array, 1); + if (configs == NULL) { + debug_printf("%s: driCreateConfigs failed\n", __FUNCTION__); + return NULL; + } + + return configs; +} + + + +/* This is the driver specific part of the createNewScreen entry point. + * + * Returns the __GLcontextModes supported by this driver. + */ +static const __DRIconfig **dri_init_screen(__DRIscreenPrivate *sPriv) +{ + static const __DRIversion ddx_expected = { 1, 6, 0 }; /* hw query */ + static const __DRIversion dri_expected = { 4, 0, 0 }; + static const __DRIversion drm_expected = { 1, 5, 0 }; /* hw query */ + struct dri_screen *screen; + + if (!driCheckDriDdxDrmVersions2("dri", + &sPriv->dri_version, &dri_expected, + &sPriv->ddx_version, &ddx_expected, + &sPriv->drm_version, &drm_expected)) { + return NULL; + } + + /* Set up dispatch table to cope with all known extensions: + */ + driInitExtensions( NULL, card_extensions, GL_FALSE ); + + + screen = CALLOC_STRUCT(dri_screen); + if (!screen) + goto fail; + + screen->sPriv = sPriv; + sPriv->private = (void *) screen; + + + /* Search the registered winsys' for one that likes this sPriv. + * This is required in situations where multiple devices speak to + * the same DDX and are built into the same binary. + * + * Note that cases like Intel i915 vs i965 doesn't fall into this + * category because they are built into separate binaries. + * + * Nonetheless, it's healthy to keep that level of detail out of + * this state_tracker. + */ + for (i = 0; + i < dri1_winsys_count && + screen->st_winsys == NULL; + i++) + { + screen->dri_winsys = + dri_winsys[i]->check_dri_privates( sPriv->pDevPriv, + sPriv->pSAREA + /* versions, etc?? */)); + } + + + driParseOptionInfo(&screen->optionCache, + __driConfigOptions, + __driNConfigOptions); + + + /* Plug our info back into the __DRIscreenPrivate: + */ + sPriv->private = (void *) screen; + sPriv->extensions = driScreenExtensions; + + return dri_fill_in_modes(sPriv, + dri_priv->cpp * 8, + 24, + 8, + 1); +fail: + return NULL; +} + + + +const struct __DriverAPIRec driDriverAPI = { + .InitScreen = dri_init_screen, + .DestroyScreen = dri_destroy_screen, + .CreateContext = dri_create_context, + .DestroyContext = dri_destroy_context, + .CreateBuffer = dri_create_buffer, + .DestroyBuffer = dri_destroy_buffer, + .SwapBuffers = dri_swap_buffers, + .MakeCurrent = dri_make_current, + .UnbindContext = dri_unbind_context, + .GetSwapInfo = dri_get_swap_info, + .GetDrawableMSC = driDrawableGetMSC32, + .WaitForMSC = driWaitForMSC32, + .CopySubBuffer = dri_copy_sub_buffer, + + //.InitScreen2 = dri_init_screen2, +}; diff --git a/src/gallium/winsys/drm/intel/dri/intel_screen.h b/src/gallium/state_trackers/glx/dri/dri_screen.h index 0bb43a915c..12ed86d22a 100644 --- a/src/gallium/winsys/drm/intel/dri/intel_screen.h +++ b/src/gallium/state_trackers/glx/dri/dri_screen.h @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2009, VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -25,33 +25,27 @@ * **************************************************************************/ -#ifndef _INTEL_SCREEN_H_ -#define _INTEL_SCREEN_H_ +#ifndef DRI_SCREEN_H +#define DRI_SCREEN_H #include "dri_util.h" -#include "i830_common.h" #include "xmlconfig.h" -#include "ws_dri_bufpool.h" #include "pipe/p_compiler.h" -#include "intel_be_device.h" - -struct intel_screen +struct dri_screen { - struct intel_be_device base; + __DRIScreenPrivate *sPriv; + struct pipe_winsys *pipe_winsys; + struct pipe_screen *pipe_screen; struct { - drm_handle_t handle; - - /* We create a static dri buffer for the frontbuffer. + /* Need a pipe_surface pointer to do client-side swapbuffers: */ - struct _DriBufferObject *buffer; + unsigned long buffer_handle; struct pipe_surface *surface; struct pipe_texture *texture; - char *map; /* memory map */ - int offset; /* from start of video mem, in bytes */ int pitch; /* row stride, in bytes */ int width; int height; @@ -62,61 +56,28 @@ struct intel_screen int deviceID; int drmMinor; - drmI830Sarea *sarea; /** - * Configuration cache with default values for all contexts - */ + * Configuration cache with default values for all contexts + */ driOptionCache optionCache; - boolean havePools; - /** * Temporary(?) context to use for SwapBuffers or other situations in * which we need a rendering context, but none is currently bound. */ - struct intel_context *dummyContext; - - /* - * New stuff form the i915tex integration - */ - unsigned batch_id; - - - struct pipe_winsys *winsys; + struct dri_context *dummyContext; }; /** cast wrapper */ -static INLINE struct intel_screen * -intel_screen(__DRIscreenPrivate *sPriv) +static INLINE struct dri_screen * +dri_screen(__DRIscreenPrivate *sPriv) { - return (struct intel_screen *) sPriv->private; + return (struct dri_screen *) sPriv->private; } -extern void -intelUpdateScreenRotation(__DRIscreenPrivate * sPriv, drmI830Sarea * sarea); - - -extern void intelDestroyContext(__DRIcontextPrivate * driContextPriv); - -extern boolean intelUnbindContext(__DRIcontextPrivate * driContextPriv); - -extern boolean -intelMakeCurrent(__DRIcontextPrivate * driContextPriv, - __DRIdrawablePrivate * driDrawPriv, - __DRIdrawablePrivate * driReadPriv); - - -extern boolean -intelCreatePools(__DRIscreenPrivate *sPriv); - -extern boolean -intelCreateContext(const __GLcontextModes * visual, - __DRIcontextPrivate * driContextPriv, - void *sharedContextPrivate); - #endif diff --git a/src/gallium/state_trackers/glx/xlib/Makefile b/src/gallium/state_trackers/glx/xlib/Makefile new file mode 100644 index 0000000000..6d10b090aa --- /dev/null +++ b/src/gallium/state_trackers/glx/xlib/Makefile @@ -0,0 +1,17 @@ +TOP = ../../../../.. +include $(TOP)/configs/current + +LIBNAME = xlib + +LIBRARY_INCLUDES = \ + -I$(TOP)/include \ + -I$(TOP)/src/mesa \ + -I$(TOP)/src/mesa/main + +C_SOURCES = \ + glxapi.c \ + fakeglx.c \ + fakeglx_fonts.c \ + xm_api.c + +include ../../../Makefile.template diff --git a/src/gallium/state_trackers/glx/xlib/SConscript b/src/gallium/state_trackers/glx/xlib/SConscript new file mode 100644 index 0000000000..01641e90e4 --- /dev/null +++ b/src/gallium/state_trackers/glx/xlib/SConscript @@ -0,0 +1,25 @@ +####################################################################### +# SConscript for xlib state_tracker + +Import('*') + +if env['platform'] == 'linux' \ + and 'mesa' in env['statetrackers'] \ + and ('softpipe' or 'i915simple' or 'trace') in env['drivers']: + + env = env.Clone() + + env.Append(CPPPATH = [ + '#/src/mesa', + '#/src/mesa/main', + ]) + + st_xlib = env.ConvenienceLibrary( + target = 'st_xlib', + source = [ 'glxapi.c', + 'fakeglx.c', + 'fakeglx_fonts.c', + 'xm_api.c', + ] + ) + Export('st_xlib') diff --git a/src/gallium/winsys/xlib/fakeglx.c b/src/gallium/state_trackers/glx/xlib/fakeglx.c index fd2d222c85..65e7048188 100644 --- a/src/gallium/winsys/xlib/fakeglx.c +++ b/src/gallium/state_trackers/glx/xlib/fakeglx.c @@ -40,25 +40,18 @@ -#include "glxheader.h" #include "glxapi.h" -#include "GL/xmesa.h" +#include "xm_api.h" #include "context.h" #include "config.h" #include "macros.h" #include "imports.h" -#include "mtypes.h" #include "version.h" -#include "xfonts.h" -#include "xmesaP.h" +#include "fakeglx.h" #include "state_tracker/st_context.h" #include "state_tracker/st_public.h" -#ifdef __VMS -#define _mesa_sprintf sprintf -#endif - /* This indicates the client-side GLX API and GLX encoder version. */ #define CLIENT_MAJOR_VERSION 1 #define CLIENT_MINOR_VERSION 4 /* but don't have 1.3's pbuffers, etc yet */ @@ -76,7 +69,6 @@ #define VENDOR "Brian Paul" #define EXTENSIONS \ - "GLX_MESA_set_3dfx_mode " \ "GLX_MESA_copy_sub_buffer " \ "GLX_MESA_pixmap_colormap " \ "GLX_MESA_release_buffers " \ @@ -116,22 +108,6 @@ static XMesaVisual *VisualTable = NULL; static int NumVisuals = 0; -/* - * This struct and some code fragments borrowed - * from Mark Kilgard's GLUT library. - */ -typedef struct _OverlayInfo { - /* Avoid 64-bit portability problems by being careful to use - longs due to the way XGetWindowProperty is specified. Note - that these parameters are passed as CARD32s over X - protocol. */ - unsigned long overlay_visual; - long transparent_type; - long value; - long layer; -} OverlayInfo; - - /* Macro to handle c_class vs class field name in XVisualInfo struct */ #if defined(__cplusplus) || defined(c_plusplus) @@ -173,98 +149,6 @@ is_usable_visual( XVisualInfo *vinfo ) } - -/** - * Get an array OverlayInfo records for specified screen. - * \param dpy the display - * \param screen screen number - * \param numOverlays returns numver of OverlayInfo records - * \return pointer to OverlayInfo array, free with XFree() - */ -static OverlayInfo * -GetOverlayInfo(Display *dpy, int screen, int *numOverlays) -{ - Atom overlayVisualsAtom; - Atom actualType; - Status status; - unsigned char *ovInfo; - unsigned long sizeData, bytesLeft; - int actualFormat; - - /* - * The SERVER_OVERLAY_VISUALS property on the root window contains - * a list of overlay visuals. Get that list now. - */ - overlayVisualsAtom = XInternAtom(dpy,"SERVER_OVERLAY_VISUALS", True); - if (overlayVisualsAtom == None) { - return 0; - } - - status = XGetWindowProperty(dpy, RootWindow(dpy, screen), - overlayVisualsAtom, 0L, (long) 10000, False, - overlayVisualsAtom, &actualType, &actualFormat, - &sizeData, &bytesLeft, - &ovInfo); - - if (status != Success || actualType != overlayVisualsAtom || - actualFormat != 32 || sizeData < 4) { - /* something went wrong */ - XFree((void *) ovInfo); - *numOverlays = 0; - return NULL; - } - - *numOverlays = sizeData / 4; - return (OverlayInfo *) ovInfo; -} - - - -/** - * Return the level (overlay, normal, underlay) of a given XVisualInfo. - * Input: dpy - the X display - * vinfo - the XVisualInfo to test - * Return: level of the visual: - * 0 = normal planes - * >0 = overlay planes - * <0 = underlay planes - */ -static int -level_of_visual( Display *dpy, XVisualInfo *vinfo ) -{ - OverlayInfo *overlay_info; - int numOverlaysPerScreen, i; - - overlay_info = GetOverlayInfo(dpy, vinfo->screen, &numOverlaysPerScreen); - if (!overlay_info) { - return 0; - } - - /* search the overlay visual list for the visual ID of interest */ - for (i = 0; i < numOverlaysPerScreen; i++) { - const OverlayInfo *ov = overlay_info + i; - if (ov->overlay_visual == vinfo->visualid) { - /* found the visual */ - if (/*ov->transparent_type==1 &&*/ ov->layer!=0) { - int level = ov->layer; - XFree((void *) overlay_info); - return level; - } - else { - XFree((void *) overlay_info); - return 0; - } - } - } - - /* The visual ID was not found in the overlay list. */ - XFree((void *) overlay_info); - return 0; -} - - - - /* * Given an XVisualInfo and RGB, Double, and Depth buffer flags, save the * configuration in our list of GLX visuals. @@ -421,60 +305,28 @@ default_accum_bits(void) static XMesaVisual create_glx_visual( Display *dpy, XVisualInfo *visinfo ) { - int vislevel; GLint zBits = default_depth_bits(); GLint accBits = default_accum_bits(); GLboolean alphaFlag = default_alpha_bits() > 0; - vislevel = level_of_visual( dpy, visinfo ); - if (vislevel) { - /* Configure this visual as a CI, single-buffered overlay */ + if (is_usable_visual( visinfo )) { + /* Configure this visual as RGB, double-buffered, depth-buffered. */ + /* This is surely wrong for some people's needs but what else */ + /* can be done? They should use glXChooseVisual(). */ return save_glx_visual( dpy, visinfo, - GL_FALSE, /* rgb */ - GL_FALSE, /* alpha */ - GL_FALSE, /* double */ + GL_TRUE, /* rgb */ + alphaFlag, /* alpha */ + GL_TRUE, /* double */ GL_FALSE, /* stereo */ - 0, /* depth bits */ - 0, /* stencil bits */ - 0,0,0,0, /* accum bits */ - vislevel, /* level */ + zBits, + STENCIL_BITS, + accBits, /* r */ + accBits, /* g */ + accBits, /* b */ + accBits, /* a */ + 0, /* level */ 0 /* numAux */ - ); - } - else if (is_usable_visual( visinfo )) { - if (_mesa_getenv("MESA_GLX_FORCE_CI")) { - /* Configure this visual as a COLOR INDEX visual. */ - return save_glx_visual( dpy, visinfo, - GL_FALSE, /* rgb */ - GL_FALSE, /* alpha */ - GL_TRUE, /* double */ - GL_FALSE, /* stereo */ - zBits, - STENCIL_BITS, - 0, 0, 0, 0, /* accum bits */ - 0, /* level */ - 0 /* numAux */ - ); - } - else { - /* Configure this visual as RGB, double-buffered, depth-buffered. */ - /* This is surely wrong for some people's needs but what else */ - /* can be done? They should use glXChooseVisual(). */ - return save_glx_visual( dpy, visinfo, - GL_TRUE, /* rgb */ - alphaFlag, /* alpha */ - GL_TRUE, /* double */ - GL_FALSE, /* stereo */ - zBits, - STENCIL_BITS, - accBits, /* r */ - accBits, /* g */ - accBits, /* b */ - accBits, /* a */ - 0, /* level */ - 0 /* numAux */ - ); - } + ); } else { _mesa_warning(NULL, "Mesa: error in glXCreateContext: bad visual\n"); @@ -512,45 +364,6 @@ find_glx_visual( Display *dpy, XVisualInfo *vinfo ) -/** - * Return the transparent pixel value for a GLX visual. - * Input: glxvis - the glx_visual - * Return: a pixel value or -1 if no transparent pixel - */ -static int -transparent_pixel( XMesaVisual glxvis ) -{ - Display *dpy = glxvis->display; - XVisualInfo *vinfo = glxvis->visinfo; - OverlayInfo *overlay_info; - int numOverlaysPerScreen, i; - - overlay_info = GetOverlayInfo(dpy, vinfo->screen, &numOverlaysPerScreen); - if (!overlay_info) { - return -1; - } - - for (i = 0; i < numOverlaysPerScreen; i++) { - const OverlayInfo *ov = overlay_info + i; - if (ov->overlay_visual == vinfo->visualid) { - /* found it! */ - if (ov->transparent_type == 0) { - /* type 0 indicates no transparency */ - XFree((void *) overlay_info); - return -1; - } - else { - /* ov->value is the transparent pixel */ - XFree((void *) overlay_info); - return ov->value; - } - } - } - - /* The visual ID was not found in the overlay list. */ - XFree((void *) overlay_info); - return -1; -} @@ -597,7 +410,7 @@ get_visual( Display *dpy, int scr, unsigned int depth, int xclass ) return NULL; } } - + return vis; } @@ -650,89 +463,48 @@ get_env_visual(Display *dpy, int scr, const char *varname) /* - * Select an X visual which satisfies the RGBA/CI flag and minimum depth. - * Input: dpy, screen - X display and screen number - * rgba - GL_TRUE = RGBA mode, GL_FALSE = CI mode + * Select an X visual which satisfies the RGBA flag and minimum depth. + * Input: dpy, + * screen - X display and screen number * min_depth - minimum visual depth * preferred_class - preferred GLX visual class or DONT_CARE * Return: pointer to an XVisualInfo or NULL. */ static XVisualInfo * -choose_x_visual( Display *dpy, int screen, GLboolean rgba, int min_depth, +choose_x_visual( Display *dpy, int screen, int min_depth, int preferred_class ) { XVisualInfo *vis; int xclass, visclass = 0; int depth; - if (rgba) { - Atom hp_cr_maps = XInternAtom(dpy, "_HP_RGB_SMOOTH_MAP_LIST", True); - /* First see if the MESA_RGB_VISUAL env var is defined */ - vis = get_env_visual( dpy, screen, "MESA_RGB_VISUAL" ); - if (vis) { - return vis; - } - /* Otherwise, search for a suitable visual */ - if (preferred_class==DONT_CARE) { - for (xclass=0;xclass<6;xclass++) { - switch (xclass) { - case 0: visclass = TrueColor; break; - case 1: visclass = DirectColor; break; - case 2: visclass = PseudoColor; break; - case 3: visclass = StaticColor; break; - case 4: visclass = GrayScale; break; - case 5: visclass = StaticGray; break; - } - if (min_depth==0) { - /* start with shallowest */ - for (depth=0;depth<=32;depth++) { - if (visclass==TrueColor && depth==8 && !hp_cr_maps) { - /* Special case: try to get 8-bit PseudoColor before */ - /* 8-bit TrueColor */ - vis = get_visual( dpy, screen, 8, PseudoColor ); - if (vis) { - return vis; - } - } - vis = get_visual( dpy, screen, depth, visclass ); - if (vis) { - return vis; - } - } - } - else { - /* start with deepest */ - for (depth=32;depth>=min_depth;depth--) { - if (visclass==TrueColor && depth==8 && !hp_cr_maps) { - /* Special case: try to get 8-bit PseudoColor before */ - /* 8-bit TrueColor */ - vis = get_visual( dpy, screen, 8, PseudoColor ); - if (vis) { - return vis; - } - } - vis = get_visual( dpy, screen, depth, visclass ); - if (vis) { - return vis; - } - } - } - } - } - else { - /* search for a specific visual class */ - switch (preferred_class) { - case GLX_TRUE_COLOR_EXT: visclass = TrueColor; break; - case GLX_DIRECT_COLOR_EXT: visclass = DirectColor; break; - case GLX_PSEUDO_COLOR_EXT: visclass = PseudoColor; break; - case GLX_STATIC_COLOR_EXT: visclass = StaticColor; break; - case GLX_GRAY_SCALE_EXT: visclass = GrayScale; break; - case GLX_STATIC_GRAY_EXT: visclass = StaticGray; break; - default: return NULL; + /* First see if the MESA_RGB_VISUAL env var is defined */ + vis = get_env_visual( dpy, screen, "MESA_RGB_VISUAL" ); + if (vis) { + return vis; + } + /* Otherwise, search for a suitable visual */ + if (preferred_class==DONT_CARE) { + for (xclass=0;xclass<6;xclass++) { + switch (xclass) { + case 0: visclass = TrueColor; break; + case 1: visclass = DirectColor; break; + case 2: visclass = PseudoColor; break; + case 3: visclass = StaticColor; break; + case 4: visclass = GrayScale; break; + case 5: visclass = StaticGray; break; } if (min_depth==0) { /* start with shallowest */ for (depth=0;depth<=32;depth++) { + if (visclass==TrueColor && depth==8) { + /* Special case: try to get 8-bit PseudoColor before */ + /* 8-bit TrueColor */ + vis = get_visual( dpy, screen, 8, PseudoColor ); + if (vis) { + return vis; + } + } vis = get_visual( dpy, screen, depth, visclass ); if (vis) { return vis; @@ -742,6 +514,14 @@ choose_x_visual( Display *dpy, int screen, GLboolean rgba, int min_depth, else { /* start with deepest */ for (depth=32;depth>=min_depth;depth--) { + if (visclass==TrueColor && depth==8) { + /* Special case: try to get 8-bit PseudoColor before */ + /* 8-bit TrueColor */ + vis = get_visual( dpy, screen, 8, PseudoColor ); + if (vis) { + return vis; + } + } vis = get_visual( dpy, screen, depth, visclass ); if (vis) { return vis; @@ -751,56 +531,28 @@ choose_x_visual( Display *dpy, int screen, GLboolean rgba, int min_depth, } } else { - /* First see if the MESA_CI_VISUAL env var is defined */ - vis = get_env_visual( dpy, screen, "MESA_CI_VISUAL" ); - if (vis) { - return vis; - } - /* Otherwise, search for a suitable visual, starting with shallowest */ - if (preferred_class==DONT_CARE) { - for (xclass=0;xclass<4;xclass++) { - switch (xclass) { - case 0: visclass = PseudoColor; break; - case 1: visclass = StaticColor; break; - case 2: visclass = GrayScale; break; - case 3: visclass = StaticGray; break; - } - /* try 8-bit up through 16-bit */ - for (depth=8;depth<=16;depth++) { - vis = get_visual( dpy, screen, depth, visclass ); - if (vis) { - return vis; - } - } - /* try min_depth up to 8-bit */ - for (depth=min_depth;depth<8;depth++) { - vis = get_visual( dpy, screen, depth, visclass ); - if (vis) { - return vis; - } - } - } + /* search for a specific visual class */ + switch (preferred_class) { + case GLX_TRUE_COLOR_EXT: visclass = TrueColor; break; + case GLX_DIRECT_COLOR_EXT: visclass = DirectColor; break; + case GLX_PSEUDO_COLOR_EXT: visclass = PseudoColor; break; + case GLX_STATIC_COLOR_EXT: visclass = StaticColor; break; + case GLX_GRAY_SCALE_EXT: visclass = GrayScale; break; + case GLX_STATIC_GRAY_EXT: visclass = StaticGray; break; + default: return NULL; } - else { - /* search for a specific visual class */ - switch (preferred_class) { - case GLX_TRUE_COLOR_EXT: visclass = TrueColor; break; - case GLX_DIRECT_COLOR_EXT: visclass = DirectColor; break; - case GLX_PSEUDO_COLOR_EXT: visclass = PseudoColor; break; - case GLX_STATIC_COLOR_EXT: visclass = StaticColor; break; - case GLX_GRAY_SCALE_EXT: visclass = GrayScale; break; - case GLX_STATIC_GRAY_EXT: visclass = StaticGray; break; - default: return NULL; - } - /* try 8-bit up through 16-bit */ - for (depth=8;depth<=16;depth++) { + if (min_depth==0) { + /* start with shallowest */ + for (depth=0;depth<=32;depth++) { vis = get_visual( dpy, screen, depth, visclass ); if (vis) { return vis; } } - /* try min_depth up to 8-bit */ - for (depth=min_depth;depth<8;depth++) { + } + else { + /* start with deepest */ + for (depth=32;depth>=min_depth;depth--) { vis = get_visual( dpy, screen, depth, visclass ); if (vis) { return vis; @@ -815,117 +567,6 @@ choose_x_visual( Display *dpy, int screen, GLboolean rgba, int min_depth, -/* - * Find the deepest X over/underlay visual of at least min_depth. - * Input: dpy, screen - X display and screen number - * level - the over/underlay level - * trans_type - transparent pixel type: GLX_NONE_EXT, - * GLX_TRANSPARENT_RGB_EXT, GLX_TRANSPARENT_INDEX_EXT, - * or DONT_CARE - * trans_value - transparent pixel value or DONT_CARE - * min_depth - minimum visual depth - * preferred_class - preferred GLX visual class or DONT_CARE - * Return: pointer to an XVisualInfo or NULL. - */ -static XVisualInfo * -choose_x_overlay_visual( Display *dpy, int scr, GLboolean rgbFlag, - int level, int trans_type, int trans_value, - int min_depth, int preferred_class ) -{ - OverlayInfo *overlay_info; - int numOverlaysPerScreen; - int i; - XVisualInfo *deepvis; - int deepest; - - /*DEBUG int tt, tv; */ - - switch (preferred_class) { - case GLX_TRUE_COLOR_EXT: preferred_class = TrueColor; break; - case GLX_DIRECT_COLOR_EXT: preferred_class = DirectColor; break; - case GLX_PSEUDO_COLOR_EXT: preferred_class = PseudoColor; break; - case GLX_STATIC_COLOR_EXT: preferred_class = StaticColor; break; - case GLX_GRAY_SCALE_EXT: preferred_class = GrayScale; break; - case GLX_STATIC_GRAY_EXT: preferred_class = StaticGray; break; - default: preferred_class = DONT_CARE; - } - - overlay_info = GetOverlayInfo(dpy, scr, &numOverlaysPerScreen); - if (!overlay_info) { - return NULL; - } - - /* Search for the deepest overlay which satisifies all criteria. */ - deepest = min_depth; - deepvis = NULL; - - for (i = 0; i < numOverlaysPerScreen; i++) { - const OverlayInfo *ov = overlay_info + i; - XVisualInfo *vislist, vistemplate; - int count; - - if (ov->layer!=level) { - /* failed overlay level criteria */ - continue; - } - if (!(trans_type==DONT_CARE - || (trans_type==GLX_TRANSPARENT_INDEX_EXT - && ov->transparent_type>0) - || (trans_type==GLX_NONE_EXT && ov->transparent_type==0))) { - /* failed transparent pixel type criteria */ - continue; - } - if (trans_value!=DONT_CARE && trans_value!=ov->value) { - /* failed transparent pixel value criteria */ - continue; - } - - /* get XVisualInfo and check the depth */ - vistemplate.visualid = ov->overlay_visual; - vistemplate.screen = scr; - vislist = XGetVisualInfo( dpy, VisualIDMask | VisualScreenMask, - &vistemplate, &count ); - - if (count!=1) { - /* something went wrong */ - continue; - } - if (preferred_class!=DONT_CARE && preferred_class!=vislist->CLASS) { - /* wrong visual class */ - continue; - } - - /* if RGB was requested, make sure we have True/DirectColor */ - if (rgbFlag && vislist->CLASS != TrueColor - && vislist->CLASS != DirectColor) - continue; - - /* if CI was requested, make sure we have a color indexed visual */ - if (!rgbFlag - && (vislist->CLASS == TrueColor || vislist->CLASS == DirectColor)) - continue; - - if (deepvis==NULL || vislist->depth > deepest) { - /* YES! found a satisfactory visual */ - if (deepvis) { - XFree( deepvis ); - } - deepest = vislist->depth; - deepvis = vislist; - /* DEBUG tt = ov->transparent_type;*/ - /* DEBUG tv = ov->value; */ - } - } - -/*DEBUG - if (deepvis) { - printf("chose 0x%x: layer=%d depth=%d trans_type=%d trans_value=%d\n", - deepvis->visualid, level, deepvis->depth, tt, tv ); - } -*/ - return deepvis; -} - /**********************************************************************/ /*** Display-related functions ***/ @@ -1273,6 +914,7 @@ choose_visual( Display *dpy, int screen, const int *list, GLboolean fbConfig ) (void) caveat; + /* * Since we're only simulating the GLX extension this function will never * find any real GL visuals. Instead, all we can do is try to find an RGB @@ -1290,8 +932,7 @@ choose_visual( Display *dpy, int screen, const int *list, GLboolean fbConfig ) if (vis) { /* give the visual some useful GLX attributes */ double_flag = GL_TRUE; - if (vis->depth > 8) - rgb_flag = GL_TRUE; + rgb_flag = GL_TRUE; depth_size = default_depth_bits(); stencil_size = STENCIL_BITS; /* XXX accum??? */ @@ -1299,38 +940,17 @@ choose_visual( Display *dpy, int screen, const int *list, GLboolean fbConfig ) } else if (level==0) { /* normal color planes */ - if (rgb_flag) { - /* Get an RGB visual */ - int min_rgb = min_red + min_green + min_blue; - if (min_rgb>1 && min_rgb<8) { - /* a special case to be sure we can get a monochrome visual */ - min_rgb = 1; - } - vis = choose_x_visual( dpy, screen, rgb_flag, min_rgb, visual_type ); - } - else { - /* Get a color index visual */ - vis = choose_x_visual( dpy, screen, rgb_flag, min_ci, visual_type ); - accumRedSize = accumGreenSize = accumBlueSize = accumAlphaSize = 0; + /* Get an RGB visual */ + int min_rgb = min_red + min_green + min_blue; + if (min_rgb>1 && min_rgb<8) { + /* a special case to be sure we can get a monochrome visual */ + min_rgb = 1; } + vis = choose_x_visual( dpy, screen, min_rgb, visual_type ); } else { - /* over/underlay planes */ - if (rgb_flag) { - /* rgba overlay */ - int min_rgb = min_red + min_green + min_blue; - if (min_rgb>1 && min_rgb<8) { - /* a special case to be sure we can get a monochrome visual */ - min_rgb = 1; - } - vis = choose_x_overlay_visual( dpy, screen, rgb_flag, level, - trans_type, trans_value, min_rgb, visual_type ); - } - else { - /* color index overlay */ - vis = choose_x_overlay_visual( dpy, screen, rgb_flag, level, - trans_type, trans_value, min_ci, visual_type ); - } + _mesa_warning(NULL, "overlay not supported"); + return NULL; } if (vis) { @@ -1356,11 +976,16 @@ choose_visual( Display *dpy, int screen, const int *list, GLboolean fbConfig ) /* we only support one size of stencil and accum buffers. */ if (stencil_size > 0) stencil_size = STENCIL_BITS; - if (accumRedSize > 0 || accumGreenSize > 0 || accumBlueSize > 0 || + + if (accumRedSize > 0 || + accumGreenSize > 0 || + accumBlueSize > 0 || accumAlphaSize > 0) { + accumRedSize = - accumGreenSize = - accumBlueSize = default_accum_bits(); + accumGreenSize = + accumBlueSize = default_accum_bits(); + accumAlphaSize = alpha_flag ? accumRedSize : 0; } @@ -1384,16 +1009,12 @@ Fake_glXChooseVisual( Display *dpy, int screen, int *list ) xmvis = choose_visual(dpy, screen, list, GL_FALSE); if (xmvis) { -#if 0 - return xmvis->vishandle; -#else /* create a new vishandle - the cached one may be stale */ xmvis->vishandle = (XVisualInfo *) _mesa_malloc(sizeof(XVisualInfo)); if (xmvis->vishandle) { _mesa_memcpy(xmvis->vishandle, xmvis->visinfo, sizeof(XVisualInfo)); } return xmvis->vishandle; -#endif } else return NULL; @@ -1489,9 +1110,6 @@ Fake_glXMakeContextCurrent( Display *dpy, GLXDrawable draw, /* Out of memory, or context/drawable depth mismatch */ return False; } -#ifdef FX - FXcreateContext( xmctx->xm_visual, draw, xmctx, drawBuffer ); -#endif } /* Find the XMesaBuffer which corresponds to the GLXDrawable 'read' */ @@ -1509,9 +1127,6 @@ Fake_glXMakeContextCurrent( Display *dpy, GLXDrawable draw, /* Out of memory, or context/drawable depth mismatch */ return False; } -#ifdef FX - FXcreateContext( xmctx->xm_visual, read, xmctx, readBuffer ); -#endif } if (no_rast && @@ -1541,7 +1156,7 @@ Fake_glXMakeContextCurrent( Display *dpy, GLXDrawable draw, } else if (!ctx && !draw && !read) { /* release current context w/out assigning new one. */ - XMesaMakeCurrent( NULL, NULL ); + XMesaMakeCurrent2( NULL, NULL, NULL ); MakeCurrent_PrevContext = 0; MakeCurrent_PrevDrawable = 0; MakeCurrent_PrevReadable = 0; @@ -1824,32 +1439,11 @@ get_config( XMesaVisual xmvis, int attrib, int *value, GLboolean fbconfig ) } return 0; case GLX_TRANSPARENT_TYPE_EXT: - if (xmvis->mesa_visual.level==0) { - /* normal planes */ - *value = GLX_NONE_EXT; - } - else if (xmvis->mesa_visual.level>0) { - /* overlay */ - if (xmvis->mesa_visual.rgbMode) { - *value = GLX_TRANSPARENT_RGB_EXT; - } - else { - *value = GLX_TRANSPARENT_INDEX_EXT; - } - } - else if (xmvis->mesa_visual.level<0) { - /* underlay */ - *value = GLX_NONE_EXT; - } + /* normal planes */ + *value = GLX_NONE_EXT; return 0; case GLX_TRANSPARENT_INDEX_VALUE_EXT: - { - int pixel = transparent_pixel( xmvis ); - if (pixel>=0) { - *value = pixel; - } - /* else undefined */ - } + /* undefined */ return 0; case GLX_TRANSPARENT_RED_VALUE_EXT: /* undefined */ @@ -2017,13 +1611,7 @@ Fake_glXWaitX( void ) static const char * get_extensions( void ) { -#ifdef FX - const char *fx = _mesa_getenv("MESA_GLX_FX"); - if (fx && fx[0] != 'd') { - return EXTENSIONS; - } -#endif - return EXTENSIONS + 23; /* skip "GLX_MESA_set_3dfx_mode" */ + return EXTENSIONS; } @@ -2198,11 +1786,6 @@ Fake_glXCreateWindow( Display *dpy, GLXFBConfig config, Window win, if (!xmbuf) return 0; -#ifdef FX - /* XXX this will segfault if actually called */ - FXcreateContext(xmvis, win, NULL, xmbuf); -#endif - (void) dpy; (void) attribList; /* Ignored in GLX 1.3 */ @@ -2213,7 +1796,7 @@ Fake_glXCreateWindow( Display *dpy, GLXFBConfig config, Window win, static void Fake_glXDestroyWindow( Display *dpy, GLXWindow window ) { - XMesaBuffer b = XMesaFindBuffer(dpy, (XMesaDrawable) window); + XMesaBuffer b = XMesaFindBuffer(dpy, (Drawable) window); if (b) XMesaDestroyBuffer(b); /* don't destroy X window */ @@ -2334,7 +1917,7 @@ Fake_glXCreatePixmap( Display *dpy, GLXFBConfig config, Pixmap pixmap, static void Fake_glXDestroyPixmap( Display *dpy, GLXPixmap pixmap ) { - XMesaBuffer b = XMesaFindBuffer(dpy, (XMesaDrawable)pixmap); + XMesaBuffer b = XMesaFindBuffer(dpy, (Drawable)pixmap); if (b) XMesaDestroyBuffer(b); /* don't destroy X pixmap */ @@ -2988,50 +2571,6 @@ Fake_glXReleaseBuffersMESA( Display *dpy, GLXDrawable d ) return False; } - - -/*** GLX_MESA_set_3dfx_mode ***/ - -static Bool -Fake_glXSet3DfxModeMESA( int mode ) -{ - return XMesaSetFXmode( mode ); -} - - - -/*** GLX_NV_vertex_array range ***/ -static void * -Fake_glXAllocateMemoryNV( GLsizei size, - GLfloat readFrequency, - GLfloat writeFrequency, - GLfloat priority ) -{ - (void) size; - (void) readFrequency; - (void) writeFrequency; - (void) priority; - return NULL; -} - - -static void -Fake_glXFreeMemoryNV( GLvoid *pointer ) -{ - (void) pointer; -} - - -/*** GLX_MESA_agp_offset ***/ - -static GLuint -Fake_glXGetAGPOffsetMESA( const GLvoid *pointer ) -{ - (void) pointer; - return ~0; -} - - /*** GLX_EXT_texture_from_pixmap ***/ static void @@ -3052,16 +2591,15 @@ Fake_glXReleaseTexImageEXT(Display *dpy, GLXDrawable drawable, int buffer) } -/* silence warning */ -extern struct _glxapi_table *_mesa_GetGLXDispatchTable(void); - /** * Create a new GLX API dispatch table with its function pointers * initialized to point to Mesa's "fake" GLX API functions. - * Note: there's a similar function (_real_GetGLXDispatchTable) that - * returns a new dispatch table with all pointers initalized to point - * to "real" GLX functions (which understand GLX wire protocol, etc). + * + * Note: there used to be a similar function + * (_real_GetGLXDispatchTable) that returns a new dispatch table with + * all pointers initalized to point to "real" GLX functions (which + * understand GLX wire protocol, etc). */ struct _glxapi_table * _mesa_GetGLXDispatchTable(void) @@ -3197,16 +2735,6 @@ _mesa_GetGLXDispatchTable(void) /*** GLX_MESA_pixmap_colormap ***/ glx.CreateGLXPixmapMESA = Fake_glXCreateGLXPixmapMESA; - /*** GLX_MESA_set_3dfx_mode ***/ - glx.Set3DfxModeMESA = Fake_glXSet3DfxModeMESA; - - /*** GLX_NV_vertex_array_range ***/ - glx.AllocateMemoryNV = Fake_glXAllocateMemoryNV; - glx.FreeMemoryNV = Fake_glXFreeMemoryNV; - - /*** GLX_MESA_agp_offset ***/ - glx.GetAGPOffsetMESA = Fake_glXGetAGPOffsetMESA; - /*** GLX_EXT_texture_from_pixmap ***/ glx.BindTexImageEXT = Fake_glXBindTexImageEXT; glx.ReleaseTexImageEXT = Fake_glXReleaseTexImageEXT; diff --git a/src/gallium/winsys/xlib/xfonts.h b/src/gallium/state_trackers/glx/xlib/fakeglx.h index e36f42f817..e5fd960072 100644 --- a/src/gallium/winsys/xlib/xfonts.h +++ b/src/gallium/state_trackers/glx/xlib/fakeglx.h @@ -24,15 +24,15 @@ */ -#ifndef XFONTS_H -#define XFONTS_H +#ifndef FAKEGLX_H +#define FAKEGLX_H -#ifdef __VMS -#include <GL/vms_x_fix.h> -#endif #include <X11/Xlib.h> +struct _glxapi_table; + +extern struct _glxapi_table *_mesa_GetGLXDispatchTable(void); extern void Fake_glXUseXFont( Font font, int first, int count, int listbase ); diff --git a/src/gallium/winsys/xlib/xfonts.c b/src/gallium/state_trackers/glx/xlib/fakeglx_fonts.c index d72c600bd1..e359046756 100644 --- a/src/gallium/winsys/xlib/xfonts.c +++ b/src/gallium/state_trackers/glx/xlib/fakeglx_fonts.c @@ -28,14 +28,10 @@ * Copyright (C) 1995 Thorsten.Ohl @ Physik.TH-Darmstadt.de */ -#ifdef __VMS -#include <GL/vms_x_fix.h> -#endif - -#include "glxheader.h" #include "context.h" #include "imports.h" -#include "xfonts.h" +#include "fakeglx.h" +#include <GL/glx.h> /* Some debugging info. */ diff --git a/src/gallium/winsys/xlib/glxapi.c b/src/gallium/state_trackers/glx/xlib/glxapi.c index c059fc3edb..c2cb34d7cf 100644 --- a/src/gallium/winsys/xlib/glxapi.c +++ b/src/gallium/state_trackers/glx/xlib/glxapi.c @@ -34,14 +34,19 @@ #include <stdlib.h> #include <stdio.h> #include <string.h> -#include "main/glheader.h" #include "glapi/glapi.h" #include "glxapi.h" +#include "fakeglx.h" #include "pipe/p_thread.h" -extern struct _glxapi_table *_real_GetGLXDispatchTable(void); -extern struct _glxapi_table *_mesa_GetGLXDispatchTable(void); +#if defined(__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__) >= 303 +# define PUBLIC __attribute__((visibility("default"))) +# define USED __attribute__((used)) +#else +# define PUBLIC +# define USED +#endif struct display_dispatch { @@ -999,93 +1004,6 @@ glXCreateGLXPixmapMESA(Display *dpy, XVisualInfo *visinfo, Pixmap pixmap, Colorm return (t->CreateGLXPixmapMESA)(dpy, visinfo, pixmap, cmap); } - - -/*** GLX_MESA_set_3dfx_mode ***/ - -Bool PUBLIC -glXSet3DfxModeMESA(int mode) -{ - struct _glxapi_table *t; - Display *dpy = glXGetCurrentDisplay(); - GET_DISPATCH(dpy, t); - if (!t) - return False; - return (t->Set3DfxModeMESA)(mode); -} - - - -/*** GLX_NV_vertex_array_range ***/ - -void PUBLIC * -glXAllocateMemoryNV( GLsizei size, - GLfloat readFrequency, - GLfloat writeFrequency, - GLfloat priority ) -{ - struct _glxapi_table *t; - Display *dpy = glXGetCurrentDisplay(); - GET_DISPATCH(dpy, t); - if (!t) - return NULL; - return (t->AllocateMemoryNV)(size, readFrequency, writeFrequency, priority); -} - - -void PUBLIC -glXFreeMemoryNV( GLvoid *pointer ) -{ - struct _glxapi_table *t; - Display *dpy = glXGetCurrentDisplay(); - GET_DISPATCH(dpy, t); - if (!t) - return; - (t->FreeMemoryNV)(pointer); -} - - - - -/*** GLX_MESA_agp_offset */ - -GLuint PUBLIC -glXGetAGPOffsetMESA( const GLvoid *pointer ) -{ - struct _glxapi_table *t; - Display *dpy = glXGetCurrentDisplay(); - GET_DISPATCH(dpy, t); - if (!t) - return ~0; - return (t->GetAGPOffsetMESA)(pointer); -} - - -/*** GLX_MESA_allocate_memory */ - -void * -glXAllocateMemoryMESA(Display *dpy, int scrn, size_t size, - float readfreq, float writefreq, float priority) -{ - /* dummy */ - return NULL; -} - -void -glXFreeMemoryMESA(Display *dpy, int scrn, void *pointer) -{ - /* dummy */ -} - - -GLuint -glXGetMemoryOffsetMESA(Display *dpy, int scrn, const void *pointer) -{ - /* dummy */ - return 0; -} - - /*** GLX_EXT_texture_from_pixmap */ void @@ -1120,45 +1038,6 @@ _glxapi_get_version(void) } -/* - * Return array of extension strings. - */ -const char ** -_glxapi_get_extensions(void) -{ - static const char *extensions[] = { -#ifdef GLX_EXT_import_context - "GLX_EXT_import_context", -#endif -#ifdef GLX_SGI_video_sync - "GLX_SGI_video_sync", -#endif -#ifdef GLX_MESA_copy_sub_buffer - "GLX_MESA_copy_sub_buffer", -#endif -#ifdef GLX_MESA_release_buffers - "GLX_MESA_release_buffers", -#endif -#ifdef GLX_MESA_pixmap_colormap - "GLX_MESA_pixmap_colormap", -#endif -#ifdef GLX_MESA_set_3dfx_mode - "GLX_MESA_set_3dfx_mode", -#endif -#ifdef GLX_SGIX_fbconfig - "GLX_SGIX_fbconfig", -#endif -#ifdef GLX_SGIX_pbuffer - "GLX_SGIX_pbuffer", -#endif -#ifdef GLX_EXT_texture_from_pixmap - "GLX_EXT_texture_from_pixmap", -#endif - NULL - }; - return extensions; -} - /* * Return size of the GLX dispatch table, in entries, not bytes. @@ -1321,24 +1200,9 @@ static struct name_address_pair GLX_functions[] = { /*** GLX_MESA_release_buffers ***/ { "glXReleaseBuffersMESA", (__GLXextFuncPtr) glXReleaseBuffersMESA }, - /*** GLX_MESA_set_3dfx_mode ***/ - { "glXSet3DfxModeMESA", (__GLXextFuncPtr) glXSet3DfxModeMESA }, - /*** GLX_ARB_get_proc_address ***/ { "glXGetProcAddressARB", (__GLXextFuncPtr) glXGetProcAddressARB }, - /*** GLX_NV_vertex_array_range ***/ - { "glXAllocateMemoryNV", (__GLXextFuncPtr) glXAllocateMemoryNV }, - { "glXFreeMemoryNV", (__GLXextFuncPtr) glXFreeMemoryNV }, - - /*** GLX_MESA_agp_offset ***/ - { "glXGetAGPOffsetMESA", (__GLXextFuncPtr) glXGetAGPOffsetMESA }, - - /*** GLX_MESA_allocate_memory ***/ - { "glXAllocateMemoryMESA", (__GLXextFuncPtr) glXAllocateMemoryMESA }, - { "glXFreeMemoryMESA", (__GLXextFuncPtr) glXFreeMemoryMESA }, - { "glXGetMemoryOffsetMESA", (__GLXextFuncPtr) glXGetMemoryOffsetMESA }, - /*** GLX_EXT_texture_from_pixmap ***/ { "glXBindTexImageEXT", (__GLXextFuncPtr) glXBindTexImageEXT }, { "glXReleaseTexImageEXT", (__GLXextFuncPtr) glXReleaseTexImageEXT }, diff --git a/src/gallium/winsys/xlib/glxapi.h b/src/gallium/state_trackers/glx/xlib/glxapi.h index 37de81e55a..b4e12b4162 100644 --- a/src/gallium/winsys/xlib/glxapi.h +++ b/src/gallium/state_trackers/glx/xlib/glxapi.h @@ -184,19 +184,6 @@ struct _glxapi_table { /*** GLX_MESA_pixmap_colormap ***/ GLXPixmap (*CreateGLXPixmapMESA)(Display *dpy, XVisualInfo *visinfo, Pixmap pixmap, Colormap cmap); - /*** GLX_MESA_set_3dfx_mode ***/ - Bool (*Set3DfxModeMESA)(int mode); - - /*** GLX_NV_vertex_array_range ***/ - void * (*AllocateMemoryNV)( GLsizei size, - GLfloat readFrequency, - GLfloat writeFrequency, - GLfloat priority ); - void (*FreeMemoryNV)( GLvoid *pointer ); - - /*** GLX_MESA_agp_offset ***/ - GLuint (*GetAGPOffsetMESA)( const GLvoid *pointer ); - /*** GLX_EXT_texture_from_pixmap ***/ void (*BindTexImageEXT)(Display *dpy, GLXDrawable drawable, int buffer, const int *attrib_list); @@ -209,8 +196,6 @@ extern const char * _glxapi_get_version(void); -extern const char ** -_glxapi_get_extensions(void); extern GLuint diff --git a/src/gallium/winsys/xlib/xm_api.c b/src/gallium/state_trackers/glx/xlib/xm_api.c index d28a6423b9..33dc044ad5 100644 --- a/src/gallium/winsys/xlib/xm_api.c +++ b/src/gallium/state_trackers/glx/xlib/xm_api.c @@ -57,9 +57,7 @@ #undef __WIN32__ #endif -#include "glxheader.h" -#include "GL/xmesa.h" -#include "xmesaP.h" +#include "xm_api.h" #include "main/context.h" #include "main/framebuffer.h" @@ -69,7 +67,20 @@ #include "pipe/p_screen.h" #include "pipe/p_context.h" -#include "xm_winsys_aub.h" +#include "xm_winsys.h" +#include <GL/glx.h> + + +/* Driver interface routines, set up by xlib backend on library + * _init(). These are global in the same way that function names are + * global. + */ +static struct xm_driver driver; + +void xmesa_set_driver( const struct xm_driver *templ ) +{ + driver = *templ; +} /** * Global X driver lock @@ -77,8 +88,6 @@ pipe_mutex _xmesa_lock; -int xmesa_mode; - /**********************************************************************/ /***** X Utility Functions *****/ @@ -88,14 +97,12 @@ int xmesa_mode; /** * Return the host's byte order as LSBFirst or MSBFirst ala X. */ -#ifndef XFree86Server static int host_byte_order( void ) { int i = 1; char *cptr = (char *) &i; return (*cptr==1) ? LSBFirst : MSBFirst; } -#endif /** @@ -104,9 +111,9 @@ static int host_byte_order( void ) * 1 = shared XImage support available * 2 = shared Pixmap support available also */ -int xmesa_check_for_xshm( XMesaDisplay *display ) +int xmesa_check_for_xshm( Display *display ) { -#if defined(USE_XSHM) && !defined(XFree86Server) +#if defined(USE_XSHM) int major, minor, ignore; Bool pixmaps; @@ -146,19 +153,9 @@ int xmesa_check_for_xshm( XMesaDisplay *display ) static int bits_per_pixel( XMesaVisual xmv ) { -#ifdef XFree86Server - const int depth = xmv->nplanes; - int i; - assert(depth > 0); - for (i = 0; i < screenInfo.numPixmapFormats; i++) { - if (screenInfo.formats[i].depth == depth) - return screenInfo.formats[i].bitsPerPixel; - } - return depth; /* should never get here, but this should be safe */ -#else - XMesaDisplay *dpy = xmv->display; - XMesaVisualInfo visinfo = xmv->visinfo; - XMesaImage *img; + Display *dpy = xmv->display; + XVisualInfo * visinfo = xmv->visinfo; + XImage *img; int bitsPerPixel; /* Create a temporary XImage */ img = XCreateImage( dpy, visinfo->visual, visinfo->depth, @@ -174,9 +171,8 @@ bits_per_pixel( XMesaVisual xmv ) /* free the XImage */ _mesa_free( img->data ); img->data = NULL; - XMesaDestroyImage( img ); + XDestroyImage( img ); return bitsPerPixel; -#endif } @@ -190,10 +186,9 @@ bits_per_pixel( XMesaVisual xmv ) * Return: GL_TRUE - window exists * GL_FALSE - window doesn't exist */ -#ifndef XFree86Server static GLboolean WindowExistsFlag; -static int window_exists_err_handler( XMesaDisplay* dpy, XErrorEvent* xerr ) +static int window_exists_err_handler( Display* dpy, XErrorEvent* xerr ) { (void) dpy; if (xerr->error_code == BadWindow) { @@ -202,10 +197,10 @@ static int window_exists_err_handler( XMesaDisplay* dpy, XErrorEvent* xerr ) return 0; } -static GLboolean window_exists( XMesaDisplay *dpy, Window win ) +static GLboolean window_exists( Display *dpy, Window win ) { XWindowAttributes wa; - int (*old_handler)( XMesaDisplay*, XErrorEvent* ); + int (*old_handler)( Display*, XErrorEvent* ); WindowExistsFlag = GL_TRUE; old_handler = XSetErrorHandler(window_exists_err_handler); XGetWindowAttributes( dpy, win, &wa ); /* dummy request */ @@ -214,7 +209,7 @@ static GLboolean window_exists( XMesaDisplay *dpy, Window win ) } static Status -get_drawable_size( XMesaDisplay *dpy, Drawable d, uint *width, uint *height ) +get_drawable_size( Display *dpy, Drawable d, uint *width, uint *height ) { Window root; Status stat; @@ -225,7 +220,6 @@ get_drawable_size( XMesaDisplay *dpy, Drawable d, uint *width, uint *height ) *height = h; return stat; } -#endif /** @@ -235,13 +229,9 @@ get_drawable_size( XMesaDisplay *dpy, Drawable d, uint *width, uint *height ) * \param height returns height in pixels */ static void -xmesa_get_window_size(XMesaDisplay *dpy, XMesaBuffer b, +xmesa_get_window_size(Display *dpy, XMesaBuffer b, GLuint *width, GLuint *height) { -#ifdef XFree86Server - *width = MIN2(b->drawable->width, MAX_WIDTH); - *height = MIN2(b->drawable->height, MAX_HEIGHT); -#else Status stat; pipe_mutex_lock(_xmesa_lock); @@ -254,9 +244,12 @@ xmesa_get_window_size(XMesaDisplay *dpy, XMesaBuffer b, _mesa_warning(NULL, "XGetGeometry failed!\n"); *width = *height = 1; } -#endif } +#define GET_REDMASK(__v) __v->mesa_visual.redMask +#define GET_GREENMASK(__v) __v->mesa_visual.greenMask +#define GET_BLUEMASK(__v) __v->mesa_visual.blueMask + /** * Choose the pixel format for the given visual. @@ -266,11 +259,14 @@ xmesa_get_window_size(XMesaDisplay *dpy, XMesaBuffer b, static GLuint choose_pixel_format(XMesaVisual v) { + boolean native_byte_order = (host_byte_order() == + ImageByteOrder(v->display)); + if ( GET_REDMASK(v) == 0x0000ff && GET_GREENMASK(v) == 0x00ff00 && GET_BLUEMASK(v) == 0xff0000 && v->BitsPerPixel == 32) { - if (CHECK_BYTE_ORDER(v)) { + if (native_byte_order) { /* no byteswapping needed */ return 0 /* PIXEL_FORMAT_U_A8_B8_G8_R8 */; } @@ -282,7 +278,7 @@ choose_pixel_format(XMesaVisual v) && GET_GREENMASK(v) == 0x00ff00 && GET_BLUEMASK(v) == 0x0000ff && v->BitsPerPixel == 32) { - if (CHECK_BYTE_ORDER(v)) { + if (native_byte_order) { /* no byteswapping needed */ return PIPE_FORMAT_A8R8G8B8_UNORM; } @@ -293,7 +289,7 @@ choose_pixel_format(XMesaVisual v) else if ( GET_REDMASK(v) == 0xf800 && GET_GREENMASK(v) == 0x07e0 && GET_BLUEMASK(v) == 0x001f - && CHECK_BYTE_ORDER(v) + && native_byte_order && v->BitsPerPixel == 16) { /* 5-6-5 RGB */ return PIPE_FORMAT_R5G6B5_UNORM; @@ -324,8 +320,8 @@ XMesaBuffer XMesaBufferList = NULL; * \return new XMesaBuffer or NULL if any problem */ static XMesaBuffer -create_xmesa_buffer(XMesaDrawable d, BufferType type, - XMesaVisual vis, XMesaColormap cmap) +create_xmesa_buffer(Drawable d, BufferType type, + XMesaVisual vis, Colormap cmap) { XMesaBuffer b; GLframebuffer *fb; @@ -418,7 +414,7 @@ create_xmesa_buffer(XMesaDrawable d, BufferType type, * the notThis buffer. */ XMesaBuffer -xmesa_find_buffer(XMesaDisplay *dpy, XMesaColormap cmap, XMesaBuffer notThis) +xmesa_find_buffer(Display *dpy, Colormap cmap, XMesaBuffer notThis) { XMesaBuffer b; for (b = XMesaBufferList; b; b = b->Next) { @@ -496,13 +492,9 @@ xmesa_free_buffer(XMesaBuffer buffer) */ static GLboolean initialize_visual_and_buffer(XMesaVisual v, XMesaBuffer b, - GLboolean rgb_flag, XMesaDrawable window, - XMesaColormap cmap) + GLboolean rgb_flag, Drawable window, + Colormap cmap) { -#ifdef XFree86Server - int client = (window) ? CLIENT_ID(window->id) : 0; -#endif - ASSERT(!b || b->xm_visual == v); /* Save true bits/pixel */ @@ -542,7 +534,7 @@ initialize_visual_and_buffer(XMesaVisual v, XMesaBuffer b, if (_mesa_getenv("MESA_INFO")) { _mesa_printf("X/Mesa visual = %p\n", (void *) v); _mesa_printf("X/Mesa level = %d\n", v->mesa_visual.level); - _mesa_printf("X/Mesa depth = %d\n", GET_VISUAL_DEPTH(v)); + _mesa_printf("X/Mesa depth = %d\n", v->visinfo->depth); _mesa_printf("X/Mesa bits per pixel = %d\n", v->BitsPerPixel); } @@ -557,12 +549,8 @@ initialize_visual_and_buffer(XMesaVisual v, XMesaBuffer b, } /* X11 graphics context */ -#ifdef XFree86Server - b->gc = CreateScratchGC(v->display, window->depth); -#else b->gc = XCreateGC( v->display, window, 0, NULL ); -#endif - XMesaSetFunction( v->display, b->gc, GXcopy ); + XSetFunction( v->display, b->gc, GXcopy ); } return GL_TRUE; @@ -627,8 +615,8 @@ xmesa_convert_from_x_visual_type( int visualType ) * Return; a new XMesaVisual or 0 if error. */ PUBLIC -XMesaVisual XMesaCreateVisual( XMesaDisplay *display, - XMesaVisualInfo visinfo, +XMesaVisual XMesaCreateVisual( Display *display, + XVisualInfo * visinfo, GLboolean rgb_flag, GLboolean alpha_flag, GLboolean db_flag, @@ -647,7 +635,6 @@ XMesaVisual XMesaCreateVisual( XMesaDisplay *display, XMesaVisual v; GLint red_bits, green_bits, blue_bits, alpha_bits; -#ifndef XFree86Server /* For debugging only */ if (_mesa_getenv("MESA_XSYNC")) { /* This makes debugging X easier. @@ -656,7 +643,6 @@ XMesaVisual XMesaCreateVisual( XMesaDisplay *display, */ XSynchronize( display, 1 ); } -#endif v = (XMesaVisual) CALLOC_STRUCT(xmesa_visual); if (!v) { @@ -669,41 +655,22 @@ XMesaVisual XMesaCreateVisual( XMesaDisplay *display, * the struct but we may need some of the information contained in it * at a later time. */ -#ifndef XFree86Server v->visinfo = (XVisualInfo *) MALLOC(sizeof(*visinfo)); if(!v->visinfo) { _mesa_free(v); return NULL; } MEMCPY(v->visinfo, visinfo, sizeof(*visinfo)); -#endif v->ximage_flag = ximage_flag; -#ifdef XFree86Server - /* We could calculate these values by ourselves. nplanes is either the sum - * of the red, green, and blue bits or the number index bits. - * ColormapEntries is either (1U << index_bits) or - * (1U << max(redBits, greenBits, blueBits)). - */ - assert(visinfo->nplanes > 0); - v->nplanes = visinfo->nplanes; - v->ColormapEntries = visinfo->ColormapEntries; - - v->mesa_visual.redMask = visinfo->redMask; - v->mesa_visual.greenMask = visinfo->greenMask; - v->mesa_visual.blueMask = visinfo->blueMask; - v->mesa_visual.visualID = visinfo->vid; - v->mesa_visual.screen = 0; /* FIXME: What should be done here? */ -#else v->mesa_visual.redMask = visinfo->red_mask; v->mesa_visual.greenMask = visinfo->green_mask; v->mesa_visual.blueMask = visinfo->blue_mask; v->mesa_visual.visualID = visinfo->visualid; v->mesa_visual.screen = visinfo->screen; -#endif -#if defined(XFree86Server) || !(defined(__cplusplus) || defined(c_plusplus)) +#if !(defined(__cplusplus) || defined(c_plusplus)) v->mesa_visual.visualType = xmesa_convert_from_x_visual_type(visinfo->class); #else v->mesa_visual.visualType = xmesa_convert_from_x_visual_type(visinfo->c_class); @@ -726,14 +693,14 @@ XMesaVisual XMesaCreateVisual( XMesaDisplay *display, else { /* this is an approximation */ int depth; - depth = GET_VISUAL_DEPTH(v); + depth = v->visinfo->depth; red_bits = depth / 3; depth -= red_bits; green_bits = depth / 2; depth -= green_bits; blue_bits = depth; alpha_bits = 0; - assert( red_bits + green_bits + blue_bits == GET_VISUAL_DEPTH(v) ); + assert( red_bits + green_bits + blue_bits == v->visinfo->depth ); } alpha_bits = v->mesa_visual.alphaBits; } @@ -758,9 +725,7 @@ XMesaVisual XMesaCreateVisual( XMesaDisplay *display, PUBLIC void XMesaDestroyVisual( XMesaVisual v ) { -#ifndef XFree86Server _mesa_free(v->visinfo); -#endif _mesa_free(v); } @@ -777,6 +742,7 @@ PUBLIC XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list ) { static GLboolean firstTime = GL_TRUE; + struct pipe_screen *screen; struct pipe_context *pipe; XMesaContext c; GLcontext *mesaCtx; @@ -797,24 +763,24 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list ) c->xm_visual = v; c->xm_buffer = NULL; /* set later by XMesaMakeCurrent */ + + /* XXX: create once per Xlib Display. + */ + screen = driver.create_pipe_screen(); + if (screen == NULL) + goto fail; - if (!getenv("XM_AUB")) { - xmesa_mode = XMESA_SOFTPIPE; - pipe = xmesa_create_pipe_context( c, pf ); - } - else { - xmesa_mode = XMESA_AUB; - pipe = xmesa_create_i965simple(xmesa_get_pipe_winsys_aub(v)); - } - + pipe = driver.create_pipe_context( screen, + (void *)c ); if (pipe == NULL) goto fail; - c->st = st_create_context(pipe, &v->mesa_visual, + c->st = st_create_context(pipe, + &v->mesa_visual, share_list ? share_list->st : NULL); if (c->st == NULL) goto fail; - + mesaCtx = c->st->ctx; c->st->ctx->DriverCtx = c; @@ -826,13 +792,6 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list ) _mesa_enable_2_0_extensions(mesaCtx); #endif -#ifdef XFree86Server - /* If we're running in the X server, do bounds checking to prevent - * segfaults and server crashes! - */ - mesaCtx->Const.CheckArrayBounds = GL_TRUE; -#endif - return c; fail: @@ -840,6 +799,10 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list ) st_destroy_context(c->st); else if (pipe) pipe->destroy(pipe); + + if (screen) + screen->destroy( screen ); + FREE(c); return NULL; } @@ -849,12 +812,14 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list ) PUBLIC void XMesaDestroyContext( XMesaContext c ) { - struct pipe_screen *screen = c->st->pipe->screen; st_destroy_context(c->st); + /* FIXME: We should destroy the screen here, but if we do so, surfaces may * outlive it, causing segfaults + struct pipe_screen *screen = c->st->pipe->screen; screen->destroy(screen); */ + _mesa_free(c); } @@ -868,35 +833,26 @@ void XMesaDestroyContext( XMesaContext c ) * \return new XMesaBuffer or NULL if error */ PUBLIC XMesaBuffer -XMesaCreateWindowBuffer(XMesaVisual v, XMesaWindow w) +XMesaCreateWindowBuffer(XMesaVisual v, Window w) { -#ifndef XFree86Server XWindowAttributes attr; -#endif XMesaBuffer b; - XMesaColormap cmap; + Colormap cmap; int depth; assert(v); assert(w); /* Check that window depth matches visual depth */ -#ifdef XFree86Server - depth = ((XMesaDrawable)w)->depth; -#else XGetWindowAttributes( v->display, w, &attr ); depth = attr.depth; -#endif - if (GET_VISUAL_DEPTH(v) != depth) { + if (v->visinfo->depth != depth) { _mesa_warning(NULL, "XMesaCreateWindowBuffer: depth mismatch between visual (%d) and window (%d)!\n", - GET_VISUAL_DEPTH(v), depth); + v->visinfo->depth, depth); return NULL; } /* Find colormap */ -#ifdef XFree86Server - cmap = (ColormapPtr)LookupIDByType(wColormap(w), RT_COLORMAP); -#else if (attr.colormap) { cmap = attr.colormap; } @@ -906,14 +862,13 @@ XMesaCreateWindowBuffer(XMesaVisual v, XMesaWindow w) /* OK, let's just allocate a new one and hope for the best */ cmap = XCreateColormap(v->display, w, attr.visual, AllocNone); } -#endif - b = create_xmesa_buffer((XMesaDrawable) w, WINDOW, v, cmap); + b = create_xmesa_buffer((Drawable) w, WINDOW, v, cmap); if (!b) return NULL; if (!initialize_visual_and_buffer( v, b, v->mesa_visual.rgbMode, - (XMesaDrawable) w, cmap )) { + (Drawable) w, cmap )) { xmesa_free_buffer(b); return NULL; } @@ -933,18 +888,18 @@ XMesaCreateWindowBuffer(XMesaVisual v, XMesaWindow w) * \returns new XMesaBuffer or NULL if error */ PUBLIC XMesaBuffer -XMesaCreatePixmapBuffer(XMesaVisual v, XMesaPixmap p, XMesaColormap cmap) +XMesaCreatePixmapBuffer(XMesaVisual v, Pixmap p, Colormap cmap) { XMesaBuffer b; assert(v); - b = create_xmesa_buffer((XMesaDrawable) p, PIXMAP, v, cmap); + b = create_xmesa_buffer((Drawable) p, PIXMAP, v, cmap); if (!b) return NULL; if (!initialize_visual_and_buffer(v, b, v->mesa_visual.rgbMode, - (XMesaDrawable) p, cmap)) { + (Drawable) p, cmap)) { xmesa_free_buffer(b); return NULL; } @@ -957,8 +912,8 @@ XMesaCreatePixmapBuffer(XMesaVisual v, XMesaPixmap p, XMesaColormap cmap) * For GLX_EXT_texture_from_pixmap */ XMesaBuffer -XMesaCreatePixmapTextureBuffer(XMesaVisual v, XMesaPixmap p, - XMesaColormap cmap, +XMesaCreatePixmapTextureBuffer(XMesaVisual v, Pixmap p, + Colormap cmap, int format, int target, int mipmap) { GET_CURRENT_CONTEXT(ctx); @@ -967,7 +922,7 @@ XMesaCreatePixmapTextureBuffer(XMesaVisual v, XMesaPixmap p, assert(v); - b = create_xmesa_buffer((XMesaDrawable) p, PIXMAP, v, cmap); + b = create_xmesa_buffer((Drawable) p, PIXMAP, v, cmap); if (!b) return NULL; @@ -1005,7 +960,7 @@ XMesaCreatePixmapTextureBuffer(XMesaVisual v, XMesaPixmap p, b->TextureMipmap = mipmap; if (!initialize_visual_and_buffer(v, b, v->mesa_visual.rgbMode, - (XMesaDrawable) p, cmap)) { + (Drawable) p, cmap)) { xmesa_free_buffer(b); return NULL; } @@ -1016,12 +971,11 @@ XMesaCreatePixmapTextureBuffer(XMesaVisual v, XMesaPixmap p, XMesaBuffer -XMesaCreatePBuffer(XMesaVisual v, XMesaColormap cmap, +XMesaCreatePBuffer(XMesaVisual v, Colormap cmap, unsigned int width, unsigned int height) { -#ifndef XFree86Server - XMesaWindow root; - XMesaDrawable drawable; /* X Pixmap Drawable */ + Window root; + Drawable drawable; /* X Pixmap Drawable */ XMesaBuffer b; /* allocate pixmap for front buffer */ @@ -1042,9 +996,6 @@ XMesaCreatePBuffer(XMesaVisual v, XMesaColormap cmap, } return b; -#else - return 0; -#endif } @@ -1064,8 +1015,7 @@ XMesaDestroyBuffer(XMesaBuffer b) * and all attached renderbuffers. * Called when: * 1. the first time a buffer is bound to a context. - * 2. from the XMesaResizeBuffers() API function. - * 3. SwapBuffers. XXX probabaly from xm_flush_frontbuffer() too... + * 2. SwapBuffers. XXX probabaly from xm_flush_frontbuffer() too... * Note: it's possible (and legal) for xmctx to be NULL. That can happen * when resizing a buffer when no rendering context is bound. */ @@ -1078,13 +1028,6 @@ xmesa_check_and_update_buffer_size(XMesaContext xmctx, XMesaBuffer drawBuffer) } -/* - * Bind buffer b to context c and make c the current rendering context. - */ -GLboolean XMesaMakeCurrent( XMesaContext c, XMesaBuffer b ) -{ - return XMesaMakeCurrent2( c, b, b ); -} /* @@ -1156,76 +1099,8 @@ XMesaContext XMesaGetCurrentContext( void ) } -XMesaBuffer XMesaGetCurrentBuffer( void ) -{ - GET_CURRENT_CONTEXT(ctx); - if (ctx) { - XMesaBuffer xmbuf = xmesa_buffer(ctx->DrawBuffer); - return xmbuf; - } - else { - return 0; - } -} - - -/* New in Mesa 3.1 */ -XMesaBuffer XMesaGetCurrentReadBuffer( void ) -{ - GET_CURRENT_CONTEXT(ctx); - if (ctx) { - return xmesa_buffer(ctx->ReadBuffer); - } - else { - return 0; - } -} - - -#ifdef XFree86Server -PUBLIC -GLboolean XMesaForceCurrent(XMesaContext c) -{ - if (c) { - _glapi_set_dispatch(c->mesa.CurrentDispatch); - - if (&(c->mesa) != _mesa_get_current_context()) { - _mesa_make_current(&c->mesa, c->mesa.DrawBuffer, c->mesa.ReadBuffer); - } - } - else { - _mesa_make_current(NULL, NULL, NULL); - } - return GL_TRUE; -} - - -PUBLIC -GLboolean XMesaLoseCurrent(XMesaContext c) -{ - (void) c; - _mesa_make_current(NULL, NULL, NULL); - return GL_TRUE; -} -PUBLIC -GLboolean XMesaCopyContext( XMesaContext xm_src, XMesaContext xm_dst, GLuint mask ) -{ - _mesa_copy_context(&xm_src->mesa, &xm_dst->mesa, mask); - return GL_TRUE; -} -#endif /* XFree86Server */ - - -#ifndef FX -GLboolean XMesaSetFXmode( GLint mode ) -{ - (void) mode; - return GL_FALSE; -} -#endif - /* @@ -1242,12 +1117,9 @@ void XMesaSwapBuffers( XMesaBuffer b ) */ st_notify_swapbuffers(b->stfb); - surf = st_get_framebuffer_surface(b->stfb, ST_SURFACE_BACK_LEFT); + st_get_framebuffer_surface(b->stfb, ST_SURFACE_BACK_LEFT, &surf); if (surf) { - if (xmesa_mode == XMESA_AUB) - xmesa_display_aub( surf ); - else - xmesa_display_surface(b, surf); + driver.display_surface(b, surf); } xmesa_check_and_update_buffer_size(NULL, b); @@ -1260,12 +1132,13 @@ void XMesaSwapBuffers( XMesaBuffer b ) */ void XMesaCopySubBuffer( XMesaBuffer b, int x, int y, int width, int height ) { - struct pipe_surface *surf_front - = st_get_framebuffer_surface(b->stfb, ST_SURFACE_FRONT_LEFT); - struct pipe_surface *surf_back - = st_get_framebuffer_surface(b->stfb, ST_SURFACE_BACK_LEFT); + struct pipe_surface *surf_front; + struct pipe_surface *surf_back; struct pipe_context *pipe = NULL; /* XXX fix */ + st_get_framebuffer_surface(b->stfb, ST_SURFACE_FRONT_LEFT, &surf_front); + st_get_framebuffer_surface(b->stfb, ST_SURFACE_BACK_LEFT, &surf_back); + if (!surf_front || !surf_back) return; @@ -1278,56 +1151,19 @@ void XMesaCopySubBuffer( XMesaBuffer b, int x, int y, int width, int height ) -/* - * Return the depth buffer associated with an XMesaBuffer. - * Input: b - the XMesa buffer handle - * Output: width, height - size of buffer in pixels - * bytesPerValue - bytes per depth value (2 or 4) - * buffer - pointer to depth buffer values - * Return: GL_TRUE or GL_FALSE to indicate success or failure. - */ -GLboolean XMesaGetDepthBuffer( XMesaBuffer b, GLint *width, GLint *height, - GLint *bytesPerValue, void **buffer ) -{ - *width = 0; - *height = 0; - *bytesPerValue = 0; - *buffer = 0; - return GL_FALSE; -} - - void XMesaFlush( XMesaContext c ) { if (c && c->xm_visual->display) { -#ifdef XFree86Server - /* NOT_NEEDED */ -#else st_finish(c->st); XSync( c->xm_visual->display, False ); -#endif } } -const char *XMesaGetString( XMesaContext c, int name ) -{ - (void) c; - if (name==XMESA_VERSION) { - return "5.0"; - } - else if (name==XMESA_EXTENSIONS) { - return ""; - } - else { - return NULL; - } -} - -XMesaBuffer XMesaFindBuffer( XMesaDisplay *dpy, XMesaDrawable d ) +XMesaBuffer XMesaFindBuffer( Display *dpy, Drawable d ) { XMesaBuffer b; for (b=XMesaBufferList; b; b=b->Next) { @@ -1342,7 +1178,7 @@ XMesaBuffer XMesaFindBuffer( XMesaDisplay *dpy, XMesaDrawable d ) /** * Free/destroy all XMesaBuffers associated with given display. */ -void xmesa_destroy_buffers_on_display(XMesaDisplay *dpy) +void xmesa_destroy_buffers_on_display(Display *dpy) { XMesaBuffer b, next; for (b = XMesaBufferList; b; b = next) { @@ -1367,48 +1203,20 @@ void XMesaGarbageCollect( void ) b->xm_visual->display && b->drawable && b->type == WINDOW) { -#ifdef XFree86Server - /* NOT_NEEDED */ -#else XSync(b->xm_visual->display, False); if (!window_exists( b->xm_visual->display, b->drawable )) { /* found a dead window, free the ancillary info */ XMesaDestroyBuffer( b ); } -#endif } } } -unsigned long XMesaDitherColor( XMesaContext xmesa, GLint x, GLint y, - GLfloat red, GLfloat green, - GLfloat blue, GLfloat alpha ) -{ - /* no longer supported */ - return 0; -} - - -/* - * This is typically called when the window size changes and we need - * to reallocate the buffer's back/depth/stencil/accum buffers. - */ -PUBLIC void -XMesaResizeBuffers( XMesaBuffer b ) -{ - GET_CURRENT_CONTEXT(ctx); - XMesaContext xmctx = xmesa_context(ctx); - if (!xmctx) - return; - xmesa_check_and_update_buffer_size(xmctx, b); -} - - PUBLIC void -XMesaBindTexImage(XMesaDisplay *dpy, XMesaBuffer drawable, int buffer, +XMesaBindTexImage(Display *dpy, XMesaBuffer drawable, int buffer, const int *attrib_list) { } @@ -1416,7 +1224,7 @@ XMesaBindTexImage(XMesaDisplay *dpy, XMesaBuffer drawable, int buffer, PUBLIC void -XMesaReleaseTexImage(XMesaDisplay *dpy, XMesaBuffer drawable, int buffer) +XMesaReleaseTexImage(Display *dpy, XMesaBuffer drawable, int buffer) { } diff --git a/src/gallium/state_trackers/glx/xlib/xm_api.h b/src/gallium/state_trackers/glx/xlib/xm_api.h new file mode 100644 index 0000000000..2b8302d174 --- /dev/null +++ b/src/gallium/state_trackers/glx/xlib/xm_api.h @@ -0,0 +1,393 @@ +/* + * Mesa 3-D graphics library + * Version: 7.1 + * + * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + + +/* Sample Usage: + +In addition to the usual X calls to select a visual, create a colormap +and create a window, you must do the following to use the X/Mesa interface: + +1. Call XMesaCreateVisual() to make an XMesaVisual from an XVisualInfo. + +2. Call XMesaCreateContext() to create an X/Mesa rendering context, given + the XMesaVisual. + +3. Call XMesaCreateWindowBuffer() to create an XMesaBuffer from an X window + and XMesaVisual. + +4. Call XMesaMakeCurrent() to bind the XMesaBuffer to an XMesaContext and + to make the context the current one. + +5. Make gl* calls to render your graphics. + +6. Use XMesaSwapBuffers() when double buffering to swap front/back buffers. + +7. Before the X window is destroyed, call XMesaDestroyBuffer(). + +8. Before exiting, call XMesaDestroyVisual and XMesaDestroyContext. + +*/ + + + + +#ifndef XMESA_H +#define XMESA_H + + +#include "mtypes.h" +#include "state_tracker/st_context.h" +#include "state_tracker/st_public.h" +#include "pipe/p_thread.h" + + +# include <X11/Xlib.h> +# include <X11/Xlibint.h> +# include <X11/Xutil.h> +# ifdef USE_XSHM /* was SHM */ +# include <sys/ipc.h> +# include <sys/shm.h> +# include <X11/extensions/XShm.h> +# endif + +typedef struct xmesa_buffer *XMesaBuffer; +typedef struct xmesa_context *XMesaContext; +typedef struct xmesa_visual *XMesaVisual; + + + +/* + * Create a new X/Mesa visual. + * Input: display - X11 display + * visinfo - an XVisualInfo pointer + * rgb_flag - GL_TRUE = RGB mode, + * GL_FALSE = color index mode + * alpha_flag - alpha buffer requested? + * db_flag - GL_TRUE = double-buffered, + * GL_FALSE = single buffered + * stereo_flag - stereo visual? + * ximage_flag - GL_TRUE = use an XImage for back buffer, + * GL_FALSE = use an off-screen pixmap for back buffer + * depth_size - requested bits/depth values, or zero + * stencil_size - requested bits/stencil values, or zero + * accum_red_size - requested bits/red accum values, or zero + * accum_green_size - requested bits/green accum values, or zero + * accum_blue_size - requested bits/blue accum values, or zero + * accum_alpha_size - requested bits/alpha accum values, or zero + * num_samples - number of samples/pixel if multisampling, or zero + * level - visual level, usually 0 + * visualCaveat - ala the GLX extension, usually GLX_NONE_EXT + * Return; a new XMesaVisual or 0 if error. + */ +extern XMesaVisual XMesaCreateVisual( Display *display, + XVisualInfo * visinfo, + GLboolean rgb_flag, + GLboolean alpha_flag, + GLboolean db_flag, + GLboolean stereo_flag, + GLboolean ximage_flag, + GLint depth_size, + GLint stencil_size, + GLint accum_red_size, + GLint accum_green_size, + GLint accum_blue_size, + GLint accum_alpha_size, + GLint num_samples, + GLint level, + GLint visualCaveat ); + +/* + * Destroy an XMesaVisual, but not the associated XVisualInfo. + */ +extern void XMesaDestroyVisual( XMesaVisual v ); + + + +/* + * Create a new XMesaContext for rendering into an X11 window. + * + * Input: visual - an XMesaVisual + * share_list - another XMesaContext with which to share display + * lists or NULL if no sharing is wanted. + * Return: an XMesaContext or NULL if error. + */ +extern XMesaContext XMesaCreateContext( XMesaVisual v, + XMesaContext share_list ); + + +/* + * Destroy a rendering context as returned by XMesaCreateContext() + */ +extern void XMesaDestroyContext( XMesaContext c ); + + + +/* + * Create an XMesaBuffer from an X window. + */ +extern XMesaBuffer XMesaCreateWindowBuffer( XMesaVisual v, Window w ); + + +/* + * Create an XMesaBuffer from an X pixmap. + */ +extern XMesaBuffer XMesaCreatePixmapBuffer( XMesaVisual v, + Pixmap p, + Colormap cmap ); + + +/* + * Destroy an XMesaBuffer, but not the corresponding window or pixmap. + */ +extern void XMesaDestroyBuffer( XMesaBuffer b ); + + +/* + * Return the XMesaBuffer handle which corresponds to an X drawable, if any. + * + * New in Mesa 2.3. + */ +extern XMesaBuffer XMesaFindBuffer( Display *dpy, + Drawable d ); + + + +/* + * Bind two buffers (read and draw) to a context and make the + * context the current one. + * New in Mesa 3.3 + */ +extern GLboolean XMesaMakeCurrent2( XMesaContext c, + XMesaBuffer drawBuffer, + XMesaBuffer readBuffer ); + + +/* + * Unbind the current context from its buffer. + */ +extern GLboolean XMesaUnbindContext( XMesaContext c ); + + +/* + * Return a handle to the current context. + */ +extern XMesaContext XMesaGetCurrentContext( void ); + + +/* + * Swap the front and back buffers for the given buffer. No action is + * taken if the buffer is not double buffered. + */ +extern void XMesaSwapBuffers( XMesaBuffer b ); + + +/* + * Copy a sub-region of the back buffer to the front buffer. + * + * New in Mesa 2.6 + */ +extern void XMesaCopySubBuffer( XMesaBuffer b, + int x, + int y, + int width, + int height ); + + + + + +/* + * Flush/sync a context + */ +extern void XMesaFlush( XMesaContext c ); + + + +/* + * Scan for XMesaBuffers whose window/pixmap has been destroyed, then free + * any memory used by that buffer. + * + * New in Mesa 2.3. + */ +extern void XMesaGarbageCollect( void ); + + + +/* + * Create a pbuffer. + * New in Mesa 4.1 + */ +extern XMesaBuffer XMesaCreatePBuffer(XMesaVisual v, Colormap cmap, + unsigned int width, unsigned int height); + + + +/* + * Texture from Pixmap + * New in Mesa 7.1 + */ +extern void +XMesaBindTexImage(Display *dpy, XMesaBuffer drawable, int buffer, + const int *attrib_list); + +extern void +XMesaReleaseTexImage(Display *dpy, XMesaBuffer drawable, int buffer); + + +extern XMesaBuffer +XMesaCreatePixmapTextureBuffer(XMesaVisual v, Pixmap p, + Colormap cmap, + int format, int target, int mipmap); + + + + +/*********************************************************************** + */ + +extern pipe_mutex _xmesa_lock; + +extern struct xmesa_buffer *XMesaBufferList; + + +/** + * Visual inforation, derived from GLvisual. + * Basically corresponds to an XVisualInfo. + */ +struct xmesa_visual { + GLvisual mesa_visual; /* Device independent visual parameters */ + Display *display; /* The X11 display */ + XVisualInfo * visinfo; /* X's visual info (pointer to private copy) */ + XVisualInfo *vishandle; /* Only used in fakeglx.c */ + GLint BitsPerPixel; /* True bits per pixel for XImages */ + + GLboolean ximage_flag; /* Use XImage for back buffer (not pixmap)? */ +}; + + +/** + * Context info, derived from st_context. + * Basically corresponds to a GLXContext. + */ +struct xmesa_context { + struct st_context *st; + XMesaVisual xm_visual; /** pixel format info */ + XMesaBuffer xm_buffer; /** current drawbuffer */ +}; + + +/** + * Types of X/GLX drawables we might render into. + */ +typedef enum { + WINDOW, /* An X window */ + GLXWINDOW, /* GLX window */ + PIXMAP, /* GLX pixmap */ + PBUFFER /* GLX Pbuffer */ +} BufferType; + + +/** + * Framebuffer information, derived from. + * Basically corresponds to a GLXDrawable. + */ +struct xmesa_buffer { + struct st_framebuffer *stfb; + + GLboolean wasCurrent; /* was ever the current buffer? */ + XMesaVisual xm_visual; /* the X/Mesa visual */ + Drawable drawable; /* Usually the X window ID */ + Colormap cmap; /* the X colormap */ + BufferType type; /* window, pixmap, pbuffer or glxwindow */ + + XImage *tempImage; + unsigned long selectedEvents;/* for pbuffers only */ + + GLuint shm; /* X Shared Memory extension status: */ + /* 0 = not available */ + /* 1 = XImage support available */ + /* 2 = Pixmap support available too */ +#if defined(USE_XSHM) + XShmSegmentInfo shminfo; +#endif + + GC gc; /* scratch GC for span, line, tri drawing */ + + /* GLX_EXT_texture_from_pixmap */ + GLint TextureTarget; /** GLX_TEXTURE_1D_EXT, for example */ + GLint TextureFormat; /** GLX_TEXTURE_FORMAT_RGB_EXT, for example */ + GLint TextureMipmap; /** 0 or 1 */ + + struct xmesa_buffer *Next; /* Linked list pointer: */ +}; + + + +/** cast wrapper */ +static INLINE XMesaContext +xmesa_context(GLcontext *ctx) +{ + return (XMesaContext) ctx->DriverCtx; +} + + +/** cast wrapper */ +static INLINE XMesaBuffer +xmesa_buffer(GLframebuffer *fb) +{ + struct st_framebuffer *stfb = (struct st_framebuffer *) fb; + return (XMesaBuffer) st_framebuffer_private(stfb); +} + + +extern void +xmesa_delete_framebuffer(struct gl_framebuffer *fb); + +extern XMesaBuffer +xmesa_find_buffer(Display *dpy, Colormap cmap, XMesaBuffer notThis); + +extern void +xmesa_check_and_update_buffer_size(XMesaContext xmctx, XMesaBuffer drawBuffer); + +extern void +xmesa_destroy_buffers_on_display(Display *dpy); + +static INLINE GLuint +xmesa_buffer_width(XMesaBuffer b) +{ + return b->stfb->Base.Width; +} + +static INLINE GLuint +xmesa_buffer_height(XMesaBuffer b) +{ + return b->stfb->Base.Height; +} + +extern int +xmesa_check_for_xshm(Display *display); + + +#endif diff --git a/src/gallium/winsys/xlib/xm_winsys_aub.h b/src/gallium/state_trackers/glx/xlib/xm_winsys.h index cc2a755277..0e57605c34 100644 --- a/src/gallium/winsys/xlib/xm_winsys_aub.h +++ b/src/gallium/state_trackers/glx/xlib/xm_winsys.h @@ -1,3 +1,4 @@ + /************************************************************************** * * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. @@ -25,44 +26,34 @@ * **************************************************************************/ -#ifndef AUB_WINSYS_H -#define AUB_WINSYS_H +#ifndef XM_WINSYS_H +#define XM_WINSYS_H struct pipe_context; -struct pipe_winsys; -struct pipe_buffer; +struct pipe_screen; struct pipe_surface; - -struct pipe_winsys * -xmesa_create_pipe_winsys_aub( void ); - -void -xmesa_destroy_pipe_winsys_aub( struct pipe_winsys *winsys ); - +struct xmesa_buffer; -struct pipe_context * -xmesa_create_i965simple( struct pipe_winsys *winsys ); +struct xm_driver { + struct pipe_screen *(*create_pipe_screen)( void ); + /* The context_private argument needs to go away. Is currently used + * in a round-about way to associate a display-target surface with its + * Xlib window. + */ + struct pipe_context *(*create_pipe_context)( struct pipe_screen *, + void *context_private ); -void xmesa_buffer_subdata_aub(struct pipe_winsys *winsys, - struct pipe_buffer *buf, - unsigned long offset, - unsigned long size, - const void *data, - unsigned aub_type, - unsigned aub_sub_type); + void (*display_surface)( struct xmesa_buffer *, + struct pipe_surface * ); -void xmesa_commands_aub(struct pipe_winsys *winsys, - unsigned *cmds, - unsigned nr_dwords); +}; -void xmesa_display_aub( /* struct pipe_winsys *winsys, */ - struct pipe_surface *surface ); +extern void +xmesa_set_driver( const struct xm_driver *driver ); -extern struct pipe_winsys * -xmesa_get_pipe_winsys_aub(struct xmesa_visual *xm_vis); #endif diff --git a/src/gallium/state_trackers/python/p_format.i b/src/gallium/state_trackers/python/p_format.i index 51ad4bebcd..26fb12b387 100644 --- a/src/gallium/state_trackers/python/p_format.i +++ b/src/gallium/state_trackers/python/p_format.i @@ -1,6 +1,7 @@ /************************************************************************** * * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright (c) 2008 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -128,10 +129,14 @@ enum pipe_format { PIPE_FORMAT_R32G32B32A32_FIXED, PIPE_FORMAT_L8_SRGB, - PIPE_FORMAT_A8_L8_SRGB, + PIPE_FORMAT_A8L8_SRGB, PIPE_FORMAT_R8G8B8_SRGB, PIPE_FORMAT_R8G8B8A8_SRGB, PIPE_FORMAT_R8G8B8X8_SRGB, + PIPE_FORMAT_A8R8G8B8_SRGB, + PIPE_FORMAT_X8R8G8B8_SRGB, + PIPE_FORMAT_B8G8R8A8_SRGB, + PIPE_FORMAT_B8G8R8X8_SRGB, PIPE_FORMAT_X8UB8UG8SR8S_NORM, PIPE_FORMAT_B6UG5SR5S_NORM, @@ -140,6 +145,11 @@ enum pipe_format { PIPE_FORMAT_DXT1_RGBA, PIPE_FORMAT_DXT3_RGBA, PIPE_FORMAT_DXT5_RGBA, + + PIPE_FORMAT_DXT1_SRGB, + PIPE_FORMAT_DXT1_SRGBA, + PIPE_FORMAT_DXT3_SRGBA, + PIPE_FORMAT_DXT5_SRGBA, }; diff --git a/src/gallium/state_trackers/python/st_device.c b/src/gallium/state_trackers/python/st_device.c index 95c1378a03..20dd8d269d 100644 --- a/src/gallium/state_trackers/python/st_device.c +++ b/src/gallium/state_trackers/python/st_device.c @@ -26,7 +26,7 @@ **************************************************************************/ -#include "pipe/p_winsys.h" +#include "pipe/p_screen.h" #include "pipe/p_context.h" #include "pipe/p_shader_tokens.h" #include "pipe/p_inlines.h" diff --git a/src/gallium/state_trackers/python/st_softpipe_winsys.c b/src/gallium/state_trackers/python/st_softpipe_winsys.c index f62113a469..4d798df99b 100644 --- a/src/gallium/state_trackers/python/st_softpipe_winsys.c +++ b/src/gallium/state_trackers/python/st_softpipe_winsys.c @@ -36,7 +36,7 @@ */ -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h"/* port to just p_screen */ #include "pipe/p_format.h" #include "pipe/p_context.h" #include "pipe/p_inlines.h" @@ -168,63 +168,25 @@ round_up(unsigned n, unsigned multiple) } -static int -st_softpipe_surface_alloc_storage(struct pipe_winsys *winsys, - struct pipe_surface *surf, +static struct pipe_buffer * +st_softpipe_surface_buffer_create(struct pipe_winsys *winsys, unsigned width, unsigned height, - enum pipe_format format, - unsigned flags, - unsigned tex_usage) + enum pipe_format format, + unsigned usage, + unsigned *stride) { const unsigned alignment = 64; + struct pipe_format_block block; + unsigned nblocksx, nblocksy; - surf->width = width; - surf->height = height; - surf->format = format; - pf_get_block(format, &surf->block); - surf->nblocksx = pf_get_nblocksx(&surf->block, width); - surf->nblocksy = pf_get_nblocksy(&surf->block, height); - surf->stride = round_up(surf->nblocksx * surf->block.size, alignment); - surf->usage = flags; - - assert(!surf->buffer); - surf->buffer = winsys->buffer_create(winsys, alignment, - PIPE_BUFFER_USAGE_PIXEL, - surf->stride * surf->nblocksy); - if(!surf->buffer) - return -1; - - return 0; -} - - -static struct pipe_surface * -st_softpipe_surface_alloc(struct pipe_winsys *winsys) -{ - struct pipe_surface *surface = CALLOC_STRUCT(pipe_surface); - - assert(winsys); - - surface->refcount = 1; - surface->winsys = winsys; - - return surface; -} - + pf_get_block(format, &block); + nblocksx = pf_get_nblocksx(&block, width); + nblocksy = pf_get_nblocksy(&block, height); + *stride = round_up(nblocksx * block.size, alignment); -static void -st_softpipe_surface_release(struct pipe_winsys *winsys, - struct pipe_surface **s) -{ - struct pipe_surface *surf = *s; - assert(!surf->texture); - surf->refcount--; - if (surf->refcount == 0) { - if (surf->buffer) - winsys_buffer_reference(winsys, &surf->buffer, NULL); - free(surf); - } - *s = NULL; + return winsys->buffer_create(winsys, alignment, + usage, + *stride * nblocksy); } @@ -279,9 +241,7 @@ st_softpipe_screen_create(void) winsys->buffer_unmap = st_softpipe_buffer_unmap; winsys->buffer_destroy = st_softpipe_buffer_destroy; - winsys->surface_alloc = st_softpipe_surface_alloc; - winsys->surface_alloc_storage = st_softpipe_surface_alloc_storage; - winsys->surface_release = st_softpipe_surface_release; + winsys->surface_buffer_create = st_softpipe_surface_buffer_create; winsys->fence_reference = st_softpipe_fence_reference; winsys->fence_signalled = st_softpipe_fence_signalled; diff --git a/src/gallium/state_trackers/wgl/SConscript b/src/gallium/state_trackers/wgl/SConscript new file mode 100644 index 0000000000..2141b02d68 --- /dev/null +++ b/src/gallium/state_trackers/wgl/SConscript @@ -0,0 +1,40 @@ +import os + +Import('*') + +if env['platform'] in ['windows']: + + env = env.Clone() + + env.Append(CPPPATH = [ + '#src/mesa', + '.', + ]) + + env.Append(CPPDEFINES = [ + '_GDI32_', # prevent wgl* being declared __declspec(dllimport) + 'BUILD_GL32', # declare gl* as __declspec(dllexport) in Mesa headers + 'WIN32_THREADS', # use Win32 thread API + ]) + + sources = [ + 'icd/stw_icd.c', + + 'wgl/stw_wgl.c', + + 'shared/stw_context.c', + 'shared/stw_device.c', + 'shared/stw_framebuffer.c', + 'shared/stw_pixelformat.c', + 'shared/stw_quirks.c', + 'shared/stw_arbextensionsstring.c', + 'shared/stw_getprocaddress.c', + 'shared/stw_arbpixelformat.c', + ] + + wgl = env.ConvenienceLibrary( + target ='wgl', + source = sources, + ) + + Export('wgl') diff --git a/src/gallium/state_trackers/wgl/icd/stw_icd.c b/src/gallium/state_trackers/wgl/icd/stw_icd.c new file mode 100644 index 0000000000..8ae6aa1f3e --- /dev/null +++ b/src/gallium/state_trackers/wgl/icd/stw_icd.c @@ -0,0 +1,594 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <windows.h> +#include <stdio.h> + +#include "GL/gl.h" + +#include "util/u_debug.h" +#include "pipe/p_thread.h" + +#include "shared/stw_public.h" +#include "icd/stw_icd.h" + + +static GLCLTPROCTABLE cpt; +static boolean cpt_initialized = FALSE; + + +BOOL APIENTRY +DrvCopyContext( + DHGLRC dhrcSource, + DHGLRC dhrcDest, + UINT fuMask ) +{ + return stw_copy_context(dhrcSource, dhrcDest, fuMask); +} + + +DHGLRC APIENTRY +DrvCreateLayerContext( + HDC hdc, + INT iLayerPlane ) +{ + return stw_create_layer_context( hdc, iLayerPlane ); +} + +DHGLRC APIENTRY +DrvCreateContext( + HDC hdc ) +{ + return DrvCreateLayerContext( hdc, 0 ); +} + +BOOL APIENTRY +DrvDeleteContext( + DHGLRC dhglrc ) +{ + return stw_delete_context( dhglrc ); +} + +BOOL APIENTRY +DrvDescribeLayerPlane( + HDC hdc, + INT iPixelFormat, + INT iLayerPlane, + UINT nBytes, + LPLAYERPLANEDESCRIPTOR plpd ) +{ + debug_printf( "%s\n", __FUNCTION__ ); + + return FALSE; +} + +LONG APIENTRY +DrvDescribePixelFormat( + HDC hdc, + INT iPixelFormat, + ULONG cjpfd, + PIXELFORMATDESCRIPTOR *ppfd ) +{ + LONG r; + + r = stw_pixelformat_describe( hdc, iPixelFormat, cjpfd, ppfd ); + + debug_printf( "%s( %p, %d, %u, %p ) = %d\n", + __FUNCTION__, hdc, iPixelFormat, cjpfd, ppfd, r ); + + return r; +} + +int APIENTRY +DrvGetLayerPaletteEntries( + HDC hdc, + INT iLayerPlane, + INT iStart, + INT cEntries, + COLORREF *pcr ) +{ + debug_printf( "%s\n", __FUNCTION__ ); + + return 0; +} + +PROC APIENTRY +DrvGetProcAddress( + LPCSTR lpszProc ) +{ + PROC r; + + r = stw_get_proc_address( lpszProc ); + + debug_printf( "%s( \", __FUNCTION__%s\" ) = %p\n", lpszProc, r ); + + return r; +} + +BOOL APIENTRY +DrvRealizeLayerPalette( + HDC hdc, + INT iLayerPlane, + BOOL bRealize ) +{ + debug_printf( "%s\n", __FUNCTION__ ); + + return FALSE; +} + +BOOL APIENTRY +DrvReleaseContext( + DHGLRC dhglrc ) +{ + return stw_release_context(dhglrc); +} + +void APIENTRY +DrvSetCallbackProcs( + INT nProcs, + PROC *pProcs ) +{ + debug_printf( "%s( %d, %p )\n", __FUNCTION__, nProcs, pProcs ); + + return; +} + + +static void init_proc_table( GLCLTPROCTABLE *cpt ) +{ + GLDISPATCHTABLE *disp = &cpt->glDispatchTable; + + memset( cpt, 0, sizeof *cpt ); + cpt->cEntries = OPENGL_VERSION_110_ENTRIES; + +#define GPA_GL( NAME ) disp->NAME = gl##NAME + GPA_GL( NewList ); + GPA_GL( EndList ); + GPA_GL( CallList ); + GPA_GL( CallLists ); + GPA_GL( DeleteLists ); + GPA_GL( GenLists ); + GPA_GL( ListBase ); + GPA_GL( Begin ); + GPA_GL( Bitmap ); + GPA_GL( Color3b ); + GPA_GL( Color3bv ); + GPA_GL( Color3d ); + GPA_GL( Color3dv ); + GPA_GL( Color3f ); + GPA_GL( Color3fv ); + GPA_GL( Color3i ); + GPA_GL( Color3iv ); + GPA_GL( Color3s ); + GPA_GL( Color3sv ); + GPA_GL( Color3ub ); + GPA_GL( Color3ubv ); + GPA_GL( Color3ui ); + GPA_GL( Color3uiv ); + GPA_GL( Color3us ); + GPA_GL( Color3usv ); + GPA_GL( Color4b ); + GPA_GL( Color4bv ); + GPA_GL( Color4d ); + GPA_GL( Color4dv ); + GPA_GL( Color4f ); + GPA_GL( Color4fv ); + GPA_GL( Color4i ); + GPA_GL( Color4iv ); + GPA_GL( Color4s ); + GPA_GL( Color4sv ); + GPA_GL( Color4ub ); + GPA_GL( Color4ubv ); + GPA_GL( Color4ui ); + GPA_GL( Color4uiv ); + GPA_GL( Color4us ); + GPA_GL( Color4usv ); + GPA_GL( EdgeFlag ); + GPA_GL( EdgeFlagv ); + GPA_GL( End ); + GPA_GL( Indexd ); + GPA_GL( Indexdv ); + GPA_GL( Indexf ); + GPA_GL( Indexfv ); + GPA_GL( Indexi ); + GPA_GL( Indexiv ); + GPA_GL( Indexs ); + GPA_GL( Indexsv ); + GPA_GL( Normal3b ); + GPA_GL( Normal3bv ); + GPA_GL( Normal3d ); + GPA_GL( Normal3dv ); + GPA_GL( Normal3f ); + GPA_GL( Normal3fv ); + GPA_GL( Normal3i ); + GPA_GL( Normal3iv ); + GPA_GL( Normal3s ); + GPA_GL( Normal3sv ); + GPA_GL( RasterPos2d ); + GPA_GL( RasterPos2dv ); + GPA_GL( RasterPos2f ); + GPA_GL( RasterPos2fv ); + GPA_GL( RasterPos2i ); + GPA_GL( RasterPos2iv ); + GPA_GL( RasterPos2s ); + GPA_GL( RasterPos2sv ); + GPA_GL( RasterPos3d ); + GPA_GL( RasterPos3dv ); + GPA_GL( RasterPos3f ); + GPA_GL( RasterPos3fv ); + GPA_GL( RasterPos3i ); + GPA_GL( RasterPos3iv ); + GPA_GL( RasterPos3s ); + GPA_GL( RasterPos3sv ); + GPA_GL( RasterPos4d ); + GPA_GL( RasterPos4dv ); + GPA_GL( RasterPos4f ); + GPA_GL( RasterPos4fv ); + GPA_GL( RasterPos4i ); + GPA_GL( RasterPos4iv ); + GPA_GL( RasterPos4s ); + GPA_GL( RasterPos4sv ); + GPA_GL( Rectd ); + GPA_GL( Rectdv ); + GPA_GL( Rectf ); + GPA_GL( Rectfv ); + GPA_GL( Recti ); + GPA_GL( Rectiv ); + GPA_GL( Rects ); + GPA_GL( Rectsv ); + GPA_GL( TexCoord1d ); + GPA_GL( TexCoord1dv ); + GPA_GL( TexCoord1f ); + GPA_GL( TexCoord1fv ); + GPA_GL( TexCoord1i ); + GPA_GL( TexCoord1iv ); + GPA_GL( TexCoord1s ); + GPA_GL( TexCoord1sv ); + GPA_GL( TexCoord2d ); + GPA_GL( TexCoord2dv ); + GPA_GL( TexCoord2f ); + GPA_GL( TexCoord2fv ); + GPA_GL( TexCoord2i ); + GPA_GL( TexCoord2iv ); + GPA_GL( TexCoord2s ); + GPA_GL( TexCoord2sv ); + GPA_GL( TexCoord3d ); + GPA_GL( TexCoord3dv ); + GPA_GL( TexCoord3f ); + GPA_GL( TexCoord3fv ); + GPA_GL( TexCoord3i ); + GPA_GL( TexCoord3iv ); + GPA_GL( TexCoord3s ); + GPA_GL( TexCoord3sv ); + GPA_GL( TexCoord4d ); + GPA_GL( TexCoord4dv ); + GPA_GL( TexCoord4f ); + GPA_GL( TexCoord4fv ); + GPA_GL( TexCoord4i ); + GPA_GL( TexCoord4iv ); + GPA_GL( TexCoord4s ); + GPA_GL( TexCoord4sv ); + GPA_GL( Vertex2d ); + GPA_GL( Vertex2dv ); + GPA_GL( Vertex2f ); + GPA_GL( Vertex2fv ); + GPA_GL( Vertex2i ); + GPA_GL( Vertex2iv ); + GPA_GL( Vertex2s ); + GPA_GL( Vertex2sv ); + GPA_GL( Vertex3d ); + GPA_GL( Vertex3dv ); + GPA_GL( Vertex3f ); + GPA_GL( Vertex3fv ); + GPA_GL( Vertex3i ); + GPA_GL( Vertex3iv ); + GPA_GL( Vertex3s ); + GPA_GL( Vertex3sv ); + GPA_GL( Vertex4d ); + GPA_GL( Vertex4dv ); + GPA_GL( Vertex4f ); + GPA_GL( Vertex4fv ); + GPA_GL( Vertex4i ); + GPA_GL( Vertex4iv ); + GPA_GL( Vertex4s ); + GPA_GL( Vertex4sv ); + GPA_GL( ClipPlane ); + GPA_GL( ColorMaterial ); + GPA_GL( CullFace ); + GPA_GL( Fogf ); + GPA_GL( Fogfv ); + GPA_GL( Fogi ); + GPA_GL( Fogiv ); + GPA_GL( FrontFace ); + GPA_GL( Hint ); + GPA_GL( Lightf ); + GPA_GL( Lightfv ); + GPA_GL( Lighti ); + GPA_GL( Lightiv ); + GPA_GL( LightModelf ); + GPA_GL( LightModelfv ); + GPA_GL( LightModeli ); + GPA_GL( LightModeliv ); + GPA_GL( LineStipple ); + GPA_GL( LineWidth ); + GPA_GL( Materialf ); + GPA_GL( Materialfv ); + GPA_GL( Materiali ); + GPA_GL( Materialiv ); + GPA_GL( PointSize ); + GPA_GL( PolygonMode ); + GPA_GL( PolygonStipple ); + GPA_GL( Scissor ); + GPA_GL( ShadeModel ); + GPA_GL( TexParameterf ); + GPA_GL( TexParameterfv ); + GPA_GL( TexParameteri ); + GPA_GL( TexParameteriv ); + GPA_GL( TexImage1D ); + GPA_GL( TexImage2D ); + GPA_GL( TexEnvf ); + GPA_GL( TexEnvfv ); + GPA_GL( TexEnvi ); + GPA_GL( TexEnviv ); + GPA_GL( TexGend ); + GPA_GL( TexGendv ); + GPA_GL( TexGenf ); + GPA_GL( TexGenfv ); + GPA_GL( TexGeni ); + GPA_GL( TexGeniv ); + GPA_GL( FeedbackBuffer ); + GPA_GL( SelectBuffer ); + GPA_GL( RenderMode ); + GPA_GL( InitNames ); + GPA_GL( LoadName ); + GPA_GL( PassThrough ); + GPA_GL( PopName ); + GPA_GL( PushName ); + GPA_GL( DrawBuffer ); + GPA_GL( Clear ); + GPA_GL( ClearAccum ); + GPA_GL( ClearIndex ); + GPA_GL( ClearColor ); + GPA_GL( ClearStencil ); + GPA_GL( ClearDepth ); + GPA_GL( StencilMask ); + GPA_GL( ColorMask ); + GPA_GL( DepthMask ); + GPA_GL( IndexMask ); + GPA_GL( Accum ); + GPA_GL( Disable ); + GPA_GL( Enable ); + GPA_GL( Finish ); + GPA_GL( Flush ); + GPA_GL( PopAttrib ); + GPA_GL( PushAttrib ); + GPA_GL( Map1d ); + GPA_GL( Map1f ); + GPA_GL( Map2d ); + GPA_GL( Map2f ); + GPA_GL( MapGrid1d ); + GPA_GL( MapGrid1f ); + GPA_GL( MapGrid2d ); + GPA_GL( MapGrid2f ); + GPA_GL( EvalCoord1d ); + GPA_GL( EvalCoord1dv ); + GPA_GL( EvalCoord1f ); + GPA_GL( EvalCoord1fv ); + GPA_GL( EvalCoord2d ); + GPA_GL( EvalCoord2dv ); + GPA_GL( EvalCoord2f ); + GPA_GL( EvalCoord2fv ); + GPA_GL( EvalMesh1 ); + GPA_GL( EvalPoint1 ); + GPA_GL( EvalMesh2 ); + GPA_GL( EvalPoint2 ); + GPA_GL( AlphaFunc ); + GPA_GL( BlendFunc ); + GPA_GL( LogicOp ); + GPA_GL( StencilFunc ); + GPA_GL( StencilOp ); + GPA_GL( DepthFunc ); + GPA_GL( PixelZoom ); + GPA_GL( PixelTransferf ); + GPA_GL( PixelTransferi ); + GPA_GL( PixelStoref ); + GPA_GL( PixelStorei ); + GPA_GL( PixelMapfv ); + GPA_GL( PixelMapuiv ); + GPA_GL( PixelMapusv ); + GPA_GL( ReadBuffer ); + GPA_GL( CopyPixels ); + GPA_GL( ReadPixels ); + GPA_GL( DrawPixels ); + GPA_GL( GetBooleanv ); + GPA_GL( GetClipPlane ); + GPA_GL( GetDoublev ); + GPA_GL( GetError ); + GPA_GL( GetFloatv ); + GPA_GL( GetIntegerv ); + GPA_GL( GetLightfv ); + GPA_GL( GetLightiv ); + GPA_GL( GetMapdv ); + GPA_GL( GetMapfv ); + GPA_GL( GetMapiv ); + GPA_GL( GetMaterialfv ); + GPA_GL( GetMaterialiv ); + GPA_GL( GetPixelMapfv ); + GPA_GL( GetPixelMapuiv ); + GPA_GL( GetPixelMapusv ); + GPA_GL( GetPolygonStipple ); + GPA_GL( GetString ); + GPA_GL( GetTexEnvfv ); + GPA_GL( GetTexEnviv ); + GPA_GL( GetTexGendv ); + GPA_GL( GetTexGenfv ); + GPA_GL( GetTexGeniv ); + GPA_GL( GetTexImage ); + GPA_GL( GetTexParameterfv ); + GPA_GL( GetTexParameteriv ); + GPA_GL( GetTexLevelParameterfv ); + GPA_GL( GetTexLevelParameteriv ); + GPA_GL( IsEnabled ); + GPA_GL( IsList ); + GPA_GL( DepthRange ); + GPA_GL( Frustum ); + GPA_GL( LoadIdentity ); + GPA_GL( LoadMatrixf ); + GPA_GL( LoadMatrixd ); + GPA_GL( MatrixMode ); + GPA_GL( MultMatrixf ); + GPA_GL( MultMatrixd ); + GPA_GL( Ortho ); + GPA_GL( PopMatrix ); + GPA_GL( PushMatrix ); + GPA_GL( Rotated ); + GPA_GL( Rotatef ); + GPA_GL( Scaled ); + GPA_GL( Scalef ); + GPA_GL( Translated ); + GPA_GL( Translatef ); + GPA_GL( Viewport ); + GPA_GL( ArrayElement ); + GPA_GL( BindTexture ); + GPA_GL( ColorPointer ); + GPA_GL( DisableClientState ); + GPA_GL( DrawArrays ); + GPA_GL( DrawElements ); + GPA_GL( EdgeFlagPointer ); + GPA_GL( EnableClientState ); + GPA_GL( IndexPointer ); + GPA_GL( Indexub ); + GPA_GL( Indexubv ); + GPA_GL( InterleavedArrays ); + GPA_GL( NormalPointer ); + GPA_GL( PolygonOffset ); + GPA_GL( TexCoordPointer ); + GPA_GL( VertexPointer ); + GPA_GL( AreTexturesResident ); + GPA_GL( CopyTexImage1D ); + GPA_GL( CopyTexImage2D ); + GPA_GL( CopyTexSubImage1D ); + GPA_GL( CopyTexSubImage2D ); + GPA_GL( DeleteTextures ); + GPA_GL( GenTextures ); + GPA_GL( GetPointerv ); + GPA_GL( IsTexture ); + GPA_GL( PrioritizeTextures ); + GPA_GL( TexSubImage1D ); + GPA_GL( TexSubImage2D ); + GPA_GL( PopClientAttrib ); + GPA_GL( PushClientAttrib ); +} + +PGLCLTPROCTABLE APIENTRY +DrvSetContext( + HDC hdc, + DHGLRC dhglrc, + PFN_SETPROCTABLE pfnSetProcTable ) +{ + debug_printf( "%s( 0x%p, %u, 0x%p )\n", + __FUNCTION__, hdc, dhglrc, pfnSetProcTable ); + + /* Although WGL allows different dispatch entrypoints per + */ + if (!cpt_initialized) { + init_proc_table( &cpt ); + cpt_initialized = TRUE; + } + + if (!stw_make_current( hdc, dhglrc )) + return NULL; + + return &cpt; +} + +int APIENTRY +DrvSetLayerPaletteEntries( + HDC hdc, + INT iLayerPlane, + INT iStart, + INT cEntries, + CONST COLORREF *pcr ) +{ + debug_printf( "%s\n", __FUNCTION__ ); + + return 0; +} + +BOOL APIENTRY +DrvSetPixelFormat( + HDC hdc, + LONG iPixelFormat ) +{ + BOOL r; + + r = stw_pixelformat_set( hdc, iPixelFormat ); + + debug_printf( "%s( %p, %d ) = %s\n", __FUNCTION__, hdc, iPixelFormat, r ? "TRUE" : "FALSE" ); + + return r; +} + +BOOL APIENTRY +DrvShareLists( + DHGLRC dhglrc1, + DHGLRC dhglrc2 ) +{ + debug_printf( "%s\n", __FUNCTION__ ); + + return FALSE; +} + +BOOL APIENTRY +DrvSwapBuffers( + HDC hdc ) +{ + debug_printf( "%s( %p )\n", __FUNCTION__, hdc ); + + return stw_swap_buffers( hdc ); +} + +BOOL APIENTRY +DrvSwapLayerBuffers( + HDC hdc, + UINT fuPlanes ) +{ + debug_printf( "%s\n", __FUNCTION__ ); + + return FALSE; +} + +BOOL APIENTRY +DrvValidateVersion( + ULONG ulVersion ) +{ + debug_printf( "%s( %u )\n", __FUNCTION__, ulVersion ); + + /* TODO: get the expected version from the winsys */ + + return ulVersion == 1; +} diff --git a/src/gallium/state_trackers/wgl/icd/stw_icd.h b/src/gallium/state_trackers/wgl/icd/stw_icd.h new file mode 100644 index 0000000000..8e676fb5b7 --- /dev/null +++ b/src/gallium/state_trackers/wgl/icd/stw_icd.h @@ -0,0 +1,489 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef DRV_H +#define DRV_H + + +#include <windows.h> + +#include "GL/gl.h" + + +typedef ULONG DHGLRC; + +#define OPENGL_VERSION_110_ENTRIES 336 + +struct __GLdispatchTableRec +{ + void (GLAPIENTRY * NewList)(GLuint, GLenum); + void (GLAPIENTRY * EndList)(void); + void (GLAPIENTRY * CallList)(GLuint); + void (GLAPIENTRY * CallLists)(GLsizei, GLenum, const GLvoid *); + void (GLAPIENTRY * DeleteLists)(GLuint, GLsizei); + GLuint (GLAPIENTRY * GenLists)(GLsizei); + void (GLAPIENTRY * ListBase)(GLuint); + void (GLAPIENTRY * Begin)(GLenum); + void (GLAPIENTRY * Bitmap)(GLsizei, GLsizei, GLfloat, GLfloat, GLfloat, GLfloat, const GLubyte *); + void (GLAPIENTRY * Color3b)(GLbyte, GLbyte, GLbyte); + void (GLAPIENTRY * Color3bv)(const GLbyte *); + void (GLAPIENTRY * Color3d)(GLdouble, GLdouble, GLdouble); + void (GLAPIENTRY * Color3dv)(const GLdouble *); + void (GLAPIENTRY * Color3f)(GLfloat, GLfloat, GLfloat); + void (GLAPIENTRY * Color3fv)(const GLfloat *); + void (GLAPIENTRY * Color3i)(GLint, GLint, GLint); + void (GLAPIENTRY * Color3iv)(const GLint *); + void (GLAPIENTRY * Color3s)(GLshort, GLshort, GLshort); + void (GLAPIENTRY * Color3sv)(const GLshort *); + void (GLAPIENTRY * Color3ub)(GLubyte, GLubyte, GLubyte); + void (GLAPIENTRY * Color3ubv)(const GLubyte *); + void (GLAPIENTRY * Color3ui)(GLuint, GLuint, GLuint); + void (GLAPIENTRY * Color3uiv)(const GLuint *); + void (GLAPIENTRY * Color3us)(GLushort, GLushort, GLushort); + void (GLAPIENTRY * Color3usv)(const GLushort *); + void (GLAPIENTRY * Color4b)(GLbyte, GLbyte, GLbyte, GLbyte); + void (GLAPIENTRY * Color4bv)(const GLbyte *); + void (GLAPIENTRY * Color4d)(GLdouble, GLdouble, GLdouble, GLdouble); + void (GLAPIENTRY * Color4dv)(const GLdouble *); + void (GLAPIENTRY * Color4f)(GLfloat, GLfloat, GLfloat, GLfloat); + void (GLAPIENTRY * Color4fv)(const GLfloat *); + void (GLAPIENTRY * Color4i)(GLint, GLint, GLint, GLint); + void (GLAPIENTRY * Color4iv)(const GLint *); + void (GLAPIENTRY * Color4s)(GLshort, GLshort, GLshort, GLshort); + void (GLAPIENTRY * Color4sv)(const GLshort *); + void (GLAPIENTRY * Color4ub)(GLubyte, GLubyte, GLubyte, GLubyte); + void (GLAPIENTRY * Color4ubv)(const GLubyte *); + void (GLAPIENTRY * Color4ui)(GLuint, GLuint, GLuint, GLuint); + void (GLAPIENTRY * Color4uiv)(const GLuint *); + void (GLAPIENTRY * Color4us)(GLushort, GLushort, GLushort, GLushort); + void (GLAPIENTRY * Color4usv)(const GLushort *); + void (GLAPIENTRY * EdgeFlag)(GLboolean); + void (GLAPIENTRY * EdgeFlagv)(const GLboolean *); + void (GLAPIENTRY * End)(void); + void (GLAPIENTRY * Indexd)(GLdouble); + void (GLAPIENTRY * Indexdv)(const GLdouble *); + void (GLAPIENTRY * Indexf)(GLfloat); + void (GLAPIENTRY * Indexfv)(const GLfloat *); + void (GLAPIENTRY * Indexi)(GLint); + void (GLAPIENTRY * Indexiv)(const GLint *); + void (GLAPIENTRY * Indexs)(GLshort); + void (GLAPIENTRY * Indexsv)(const GLshort *); + void (GLAPIENTRY * Normal3b)(GLbyte, GLbyte, GLbyte); + void (GLAPIENTRY * Normal3bv)(const GLbyte *); + void (GLAPIENTRY * Normal3d)(GLdouble, GLdouble, GLdouble); + void (GLAPIENTRY * Normal3dv)(const GLdouble *); + void (GLAPIENTRY * Normal3f)(GLfloat, GLfloat, GLfloat); + void (GLAPIENTRY * Normal3fv)(const GLfloat *); + void (GLAPIENTRY * Normal3i)(GLint, GLint, GLint); + void (GLAPIENTRY * Normal3iv)(const GLint *); + void (GLAPIENTRY * Normal3s)(GLshort, GLshort, GLshort); + void (GLAPIENTRY * Normal3sv)(const GLshort *); + void (GLAPIENTRY * RasterPos2d)(GLdouble, GLdouble); + void (GLAPIENTRY * RasterPos2dv)(const GLdouble *); + void (GLAPIENTRY * RasterPos2f)(GLfloat, GLfloat); + void (GLAPIENTRY * RasterPos2fv)(const GLfloat *); + void (GLAPIENTRY * RasterPos2i)(GLint, GLint); + void (GLAPIENTRY * RasterPos2iv)(const GLint *); + void (GLAPIENTRY * RasterPos2s)(GLshort, GLshort); + void (GLAPIENTRY * RasterPos2sv)(const GLshort *); + void (GLAPIENTRY * RasterPos3d)(GLdouble, GLdouble, GLdouble); + void (GLAPIENTRY * RasterPos3dv)(const GLdouble *); + void (GLAPIENTRY * RasterPos3f)(GLfloat, GLfloat, GLfloat); + void (GLAPIENTRY * RasterPos3fv)(const GLfloat *); + void (GLAPIENTRY * RasterPos3i)(GLint, GLint, GLint); + void (GLAPIENTRY * RasterPos3iv)(const GLint *); + void (GLAPIENTRY * RasterPos3s)(GLshort, GLshort, GLshort); + void (GLAPIENTRY * RasterPos3sv)(const GLshort *); + void (GLAPIENTRY * RasterPos4d)(GLdouble, GLdouble, GLdouble, GLdouble); + void (GLAPIENTRY * RasterPos4dv)(const GLdouble *); + void (GLAPIENTRY * RasterPos4f)(GLfloat, GLfloat, GLfloat, GLfloat); + void (GLAPIENTRY * RasterPos4fv)(const GLfloat *); + void (GLAPIENTRY * RasterPos4i)(GLint, GLint, GLint, GLint); + void (GLAPIENTRY * RasterPos4iv)(const GLint *); + void (GLAPIENTRY * RasterPos4s)(GLshort, GLshort, GLshort, GLshort); + void (GLAPIENTRY * RasterPos4sv)(const GLshort *); + void (GLAPIENTRY * Rectd)(GLdouble, GLdouble, GLdouble, GLdouble); + void (GLAPIENTRY * Rectdv)(const GLdouble *, const GLdouble *); + void (GLAPIENTRY * Rectf)(GLfloat, GLfloat, GLfloat, GLfloat); + void (GLAPIENTRY * Rectfv)(const GLfloat *, const GLfloat *); + void (GLAPIENTRY * Recti)(GLint, GLint, GLint, GLint); + void (GLAPIENTRY * Rectiv)(const GLint *, const GLint *); + void (GLAPIENTRY * Rects)(GLshort, GLshort, GLshort, GLshort); + void (GLAPIENTRY * Rectsv)(const GLshort *, const GLshort *); + void (GLAPIENTRY * TexCoord1d)(GLdouble); + void (GLAPIENTRY * TexCoord1dv)(const GLdouble *); + void (GLAPIENTRY * TexCoord1f)(GLfloat); + void (GLAPIENTRY * TexCoord1fv)(const GLfloat *); + void (GLAPIENTRY * TexCoord1i)(GLint); + void (GLAPIENTRY * TexCoord1iv)(const GLint *); + void (GLAPIENTRY * TexCoord1s)(GLshort); + void (GLAPIENTRY * TexCoord1sv)(const GLshort *); + void (GLAPIENTRY * TexCoord2d)(GLdouble, GLdouble); + void (GLAPIENTRY * TexCoord2dv)(const GLdouble *); + void (GLAPIENTRY * TexCoord2f)(GLfloat, GLfloat); + void (GLAPIENTRY * TexCoord2fv)(const GLfloat *); + void (GLAPIENTRY * TexCoord2i)(GLint, GLint); + void (GLAPIENTRY * TexCoord2iv)(const GLint *); + void (GLAPIENTRY * TexCoord2s)(GLshort, GLshort); + void (GLAPIENTRY * TexCoord2sv)(const GLshort *); + void (GLAPIENTRY * TexCoord3d)(GLdouble, GLdouble, GLdouble); + void (GLAPIENTRY * TexCoord3dv)(const GLdouble *); + void (GLAPIENTRY * TexCoord3f)(GLfloat, GLfloat, GLfloat); + void (GLAPIENTRY * TexCoord3fv)(const GLfloat *); + void (GLAPIENTRY * TexCoord3i)(GLint, GLint, GLint); + void (GLAPIENTRY * TexCoord3iv)(const GLint *); + void (GLAPIENTRY * TexCoord3s)(GLshort, GLshort, GLshort); + void (GLAPIENTRY * TexCoord3sv)(const GLshort *); + void (GLAPIENTRY * TexCoord4d)(GLdouble, GLdouble, GLdouble, GLdouble); + void (GLAPIENTRY * TexCoord4dv)(const GLdouble *); + void (GLAPIENTRY * TexCoord4f)(GLfloat, GLfloat, GLfloat, GLfloat); + void (GLAPIENTRY * TexCoord4fv)(const GLfloat *); + void (GLAPIENTRY * TexCoord4i)(GLint, GLint, GLint, GLint); + void (GLAPIENTRY * TexCoord4iv)(const GLint *); + void (GLAPIENTRY * TexCoord4s)(GLshort, GLshort, GLshort, GLshort); + void (GLAPIENTRY * TexCoord4sv)(const GLshort *); + void (GLAPIENTRY * Vertex2d)(GLdouble, GLdouble); + void (GLAPIENTRY * Vertex2dv)(const GLdouble *); + void (GLAPIENTRY * Vertex2f)(GLfloat, GLfloat); + void (GLAPIENTRY * Vertex2fv)(const GLfloat *); + void (GLAPIENTRY * Vertex2i)(GLint, GLint); + void (GLAPIENTRY * Vertex2iv)(const GLint *); + void (GLAPIENTRY * Vertex2s)(GLshort, GLshort); + void (GLAPIENTRY * Vertex2sv)(const GLshort *); + void (GLAPIENTRY * Vertex3d)(GLdouble, GLdouble, GLdouble); + void (GLAPIENTRY * Vertex3dv)(const GLdouble *); + void (GLAPIENTRY * Vertex3f)(GLfloat, GLfloat, GLfloat); + void (GLAPIENTRY * Vertex3fv)(const GLfloat *); + void (GLAPIENTRY * Vertex3i)(GLint, GLint, GLint); + void (GLAPIENTRY * Vertex3iv)(const GLint *); + void (GLAPIENTRY * Vertex3s)(GLshort, GLshort, GLshort); + void (GLAPIENTRY * Vertex3sv)(const GLshort *); + void (GLAPIENTRY * Vertex4d)(GLdouble, GLdouble, GLdouble, GLdouble); + void (GLAPIENTRY * Vertex4dv)(const GLdouble *); + void (GLAPIENTRY * Vertex4f)(GLfloat, GLfloat, GLfloat, GLfloat); + void (GLAPIENTRY * Vertex4fv)(const GLfloat *); + void (GLAPIENTRY * Vertex4i)(GLint, GLint, GLint, GLint); + void (GLAPIENTRY * Vertex4iv)(const GLint *); + void (GLAPIENTRY * Vertex4s)(GLshort, GLshort, GLshort, GLshort); + void (GLAPIENTRY * Vertex4sv)(const GLshort *); + void (GLAPIENTRY * ClipPlane)(GLenum, const GLdouble *); + void (GLAPIENTRY * ColorMaterial)(GLenum, GLenum); + void (GLAPIENTRY * CullFace)(GLenum); + void (GLAPIENTRY * Fogf)(GLenum, GLfloat); + void (GLAPIENTRY * Fogfv)(GLenum, const GLfloat *); + void (GLAPIENTRY * Fogi)(GLenum, GLint); + void (GLAPIENTRY * Fogiv)(GLenum, const GLint *); + void (GLAPIENTRY * FrontFace)(GLenum); + void (GLAPIENTRY * Hint)(GLenum, GLenum); + void (GLAPIENTRY * Lightf)(GLenum, GLenum, GLfloat); + void (GLAPIENTRY * Lightfv)(GLenum, GLenum, const GLfloat *); + void (GLAPIENTRY * Lighti)(GLenum, GLenum, GLint); + void (GLAPIENTRY * Lightiv)(GLenum, GLenum, const GLint *); + void (GLAPIENTRY * LightModelf)(GLenum, GLfloat); + void (GLAPIENTRY * LightModelfv)(GLenum, const GLfloat *); + void (GLAPIENTRY * LightModeli)(GLenum, GLint); + void (GLAPIENTRY * LightModeliv)(GLenum, const GLint *); + void (GLAPIENTRY * LineStipple)(GLint, GLushort); + void (GLAPIENTRY * LineWidth)(GLfloat); + void (GLAPIENTRY * Materialf)(GLenum, GLenum, GLfloat); + void (GLAPIENTRY * Materialfv)(GLenum, GLenum, const GLfloat *); + void (GLAPIENTRY * Materiali)(GLenum, GLenum, GLint); + void (GLAPIENTRY * Materialiv)(GLenum, GLenum, const GLint *); + void (GLAPIENTRY * PointSize)(GLfloat); + void (GLAPIENTRY * PolygonMode)(GLenum, GLenum); + void (GLAPIENTRY * PolygonStipple)(const GLubyte *); + void (GLAPIENTRY * Scissor)(GLint, GLint, GLsizei, GLsizei); + void (GLAPIENTRY * ShadeModel)(GLenum); + void (GLAPIENTRY * TexParameterf)(GLenum, GLenum, GLfloat); + void (GLAPIENTRY * TexParameterfv)(GLenum, GLenum, const GLfloat *); + void (GLAPIENTRY * TexParameteri)(GLenum, GLenum, GLint); + void (GLAPIENTRY * TexParameteriv)(GLenum, GLenum, const GLint *); + void (GLAPIENTRY * TexImage1D)(GLenum, GLint, GLint, GLsizei, GLint, GLenum, GLenum, const GLvoid *); + void (GLAPIENTRY * TexImage2D)(GLenum, GLint, GLint, GLsizei, GLsizei, GLint, GLenum, GLenum, const GLvoid *); + void (GLAPIENTRY * TexEnvf)(GLenum, GLenum, GLfloat); + void (GLAPIENTRY * TexEnvfv)(GLenum, GLenum, const GLfloat *); + void (GLAPIENTRY * TexEnvi)(GLenum, GLenum, GLint); + void (GLAPIENTRY * TexEnviv)(GLenum, GLenum, const GLint *); + void (GLAPIENTRY * TexGend)(GLenum, GLenum, GLdouble); + void (GLAPIENTRY * TexGendv)(GLenum, GLenum, const GLdouble *); + void (GLAPIENTRY * TexGenf)(GLenum, GLenum, GLfloat); + void (GLAPIENTRY * TexGenfv)(GLenum, GLenum, const GLfloat *); + void (GLAPIENTRY * TexGeni)(GLenum, GLenum, GLint); + void (GLAPIENTRY * TexGeniv)(GLenum, GLenum, const GLint *); + void (GLAPIENTRY * FeedbackBuffer)(GLsizei, GLenum, GLfloat *); + void (GLAPIENTRY * SelectBuffer)(GLsizei, GLuint *); + GLint (GLAPIENTRY * RenderMode)(GLenum); + void (GLAPIENTRY * InitNames)(void); + void (GLAPIENTRY * LoadName)(GLuint); + void (GLAPIENTRY * PassThrough)(GLfloat); + void (GLAPIENTRY * PopName)(void); + void (GLAPIENTRY * PushName)(GLuint); + void (GLAPIENTRY * DrawBuffer)(GLenum); + void (GLAPIENTRY * Clear)(GLbitfield); + void (GLAPIENTRY * ClearAccum)(GLfloat, GLfloat, GLfloat, GLfloat); + void (GLAPIENTRY * ClearIndex)(GLfloat); + void (GLAPIENTRY * ClearColor)(GLclampf, GLclampf, GLclampf, GLclampf); + void (GLAPIENTRY * ClearStencil)(GLint); + void (GLAPIENTRY * ClearDepth)(GLclampd); + void (GLAPIENTRY * StencilMask)(GLuint); + void (GLAPIENTRY * ColorMask)(GLboolean, GLboolean, GLboolean, GLboolean); + void (GLAPIENTRY * DepthMask)(GLboolean); + void (GLAPIENTRY * IndexMask)(GLuint); + void (GLAPIENTRY * Accum)(GLenum, GLfloat); + void (GLAPIENTRY * Disable)(GLenum); + void (GLAPIENTRY * Enable)(GLenum); + void (GLAPIENTRY * Finish)(void); + void (GLAPIENTRY * Flush)(void); + void (GLAPIENTRY * PopAttrib)(void); + void (GLAPIENTRY * PushAttrib)(GLbitfield); + void (GLAPIENTRY * Map1d)(GLenum, GLdouble, GLdouble, GLint, GLint, const GLdouble *); + void (GLAPIENTRY * Map1f)(GLenum, GLfloat, GLfloat, GLint, GLint, const GLfloat *); + void (GLAPIENTRY * Map2d)(GLenum, GLdouble, GLdouble, GLint, GLint, GLdouble, GLdouble, GLint, GLint, const GLdouble *); + void (GLAPIENTRY * Map2f)(GLenum, GLfloat, GLfloat, GLint, GLint, GLfloat, GLfloat, GLint, GLint, const GLfloat *); + void (GLAPIENTRY * MapGrid1d)(GLint, GLdouble, GLdouble); + void (GLAPIENTRY * MapGrid1f)(GLint, GLfloat, GLfloat); + void (GLAPIENTRY * MapGrid2d)(GLint, GLdouble, GLdouble, GLint, GLdouble, GLdouble); + void (GLAPIENTRY * MapGrid2f)(GLint, GLfloat, GLfloat, GLint, GLfloat, GLfloat); + void (GLAPIENTRY * EvalCoord1d)(GLdouble); + void (GLAPIENTRY * EvalCoord1dv)(const GLdouble *); + void (GLAPIENTRY * EvalCoord1f)(GLfloat); + void (GLAPIENTRY * EvalCoord1fv)(const GLfloat *); + void (GLAPIENTRY * EvalCoord2d)(GLdouble, GLdouble); + void (GLAPIENTRY * EvalCoord2dv)(const GLdouble *); + void (GLAPIENTRY * EvalCoord2f)(GLfloat, GLfloat); + void (GLAPIENTRY * EvalCoord2fv)(const GLfloat *); + void (GLAPIENTRY * EvalMesh1)(GLenum, GLint, GLint); + void (GLAPIENTRY * EvalPoint1)(GLint); + void (GLAPIENTRY * EvalMesh2)(GLenum, GLint, GLint, GLint, GLint); + void (GLAPIENTRY * EvalPoint2)(GLint, GLint); + void (GLAPIENTRY * AlphaFunc)(GLenum, GLclampf); + void (GLAPIENTRY * BlendFunc)(GLenum, GLenum); + void (GLAPIENTRY * LogicOp)(GLenum); + void (GLAPIENTRY * StencilFunc)(GLenum, GLint, GLuint); + void (GLAPIENTRY * StencilOp)(GLenum, GLenum, GLenum); + void (GLAPIENTRY * DepthFunc)(GLenum); + void (GLAPIENTRY * PixelZoom)(GLfloat, GLfloat); + void (GLAPIENTRY * PixelTransferf)(GLenum, GLfloat); + void (GLAPIENTRY * PixelTransferi)(GLenum, GLint); + void (GLAPIENTRY * PixelStoref)(GLenum, GLfloat); + void (GLAPIENTRY * PixelStorei)(GLenum, GLint); + void (GLAPIENTRY * PixelMapfv)(GLenum, GLint, const GLfloat *); + void (GLAPIENTRY * PixelMapuiv)(GLenum, GLint, const GLuint *); + void (GLAPIENTRY * PixelMapusv)(GLenum, GLint, const GLushort *); + void (GLAPIENTRY * ReadBuffer)(GLenum); + void (GLAPIENTRY * CopyPixels)(GLint, GLint, GLsizei, GLsizei, GLenum); + void (GLAPIENTRY * ReadPixels)(GLint, GLint, GLsizei, GLsizei, GLenum, GLenum, GLvoid *); + void (GLAPIENTRY * DrawPixels)(GLsizei, GLsizei, GLenum, GLenum, const GLvoid *); + void (GLAPIENTRY * GetBooleanv)(GLenum, GLboolean *); + void (GLAPIENTRY * GetClipPlane)(GLenum, GLdouble *); + void (GLAPIENTRY * GetDoublev)(GLenum, GLdouble *); + GLenum (GLAPIENTRY * GetError)(void); + void (GLAPIENTRY * GetFloatv)(GLenum, GLfloat *); + void (GLAPIENTRY * GetIntegerv)(GLenum, GLint *); + void (GLAPIENTRY * GetLightfv)(GLenum, GLenum, GLfloat *); + void (GLAPIENTRY * GetLightiv)(GLenum, GLenum, GLint *); + void (GLAPIENTRY * GetMapdv)(GLenum, GLenum, GLdouble *); + void (GLAPIENTRY * GetMapfv)(GLenum, GLenum, GLfloat *); + void (GLAPIENTRY * GetMapiv)(GLenum, GLenum, GLint *); + void (GLAPIENTRY * GetMaterialfv)(GLenum, GLenum, GLfloat *); + void (GLAPIENTRY * GetMaterialiv)(GLenum, GLenum, GLint *); + void (GLAPIENTRY * GetPixelMapfv)(GLenum, GLfloat *); + void (GLAPIENTRY * GetPixelMapuiv)(GLenum, GLuint *); + void (GLAPIENTRY * GetPixelMapusv)(GLenum, GLushort *); + void (GLAPIENTRY * GetPolygonStipple)(GLubyte *); + const GLubyte * (GLAPIENTRY * GetString)(GLenum); + void (GLAPIENTRY * GetTexEnvfv)(GLenum, GLenum, GLfloat *); + void (GLAPIENTRY * GetTexEnviv)(GLenum, GLenum, GLint *); + void (GLAPIENTRY * GetTexGendv)(GLenum, GLenum, GLdouble *); + void (GLAPIENTRY * GetTexGenfv)(GLenum, GLenum, GLfloat *); + void (GLAPIENTRY * GetTexGeniv)(GLenum, GLenum, GLint *); + void (GLAPIENTRY * GetTexImage)(GLenum, GLint, GLenum, GLenum, GLvoid *); + void (GLAPIENTRY * GetTexParameterfv)(GLenum, GLenum, GLfloat *); + void (GLAPIENTRY * GetTexParameteriv)(GLenum, GLenum, GLint *); + void (GLAPIENTRY * GetTexLevelParameterfv)(GLenum, GLint, GLenum, GLfloat *); + void (GLAPIENTRY * GetTexLevelParameteriv)(GLenum, GLint, GLenum, GLint *); + GLboolean (GLAPIENTRY * IsEnabled)(GLenum); + GLboolean (GLAPIENTRY * IsList)(GLuint); + void (GLAPIENTRY * DepthRange)(GLclampd, GLclampd); + void (GLAPIENTRY * Frustum)(GLdouble, GLdouble, GLdouble, GLdouble, GLdouble, GLdouble); + void (GLAPIENTRY * LoadIdentity)(void); + void (GLAPIENTRY * LoadMatrixf)(const GLfloat *); + void (GLAPIENTRY * LoadMatrixd)(const GLdouble *); + void (GLAPIENTRY * MatrixMode)(GLenum); + void (GLAPIENTRY * MultMatrixf)(const GLfloat *); + void (GLAPIENTRY * MultMatrixd)(const GLdouble *); + void (GLAPIENTRY * Ortho)(GLdouble, GLdouble, GLdouble, GLdouble, GLdouble, GLdouble); + void (GLAPIENTRY * PopMatrix)(void); + void (GLAPIENTRY * PushMatrix)(void); + void (GLAPIENTRY * Rotated)(GLdouble, GLdouble, GLdouble, GLdouble); + void (GLAPIENTRY * Rotatef)(GLfloat, GLfloat, GLfloat, GLfloat); + void (GLAPIENTRY * Scaled)(GLdouble, GLdouble, GLdouble); + void (GLAPIENTRY * Scalef)(GLfloat, GLfloat, GLfloat); + void (GLAPIENTRY * Translated)(GLdouble, GLdouble, GLdouble); + void (GLAPIENTRY * Translatef)(GLfloat, GLfloat, GLfloat); + void (GLAPIENTRY * Viewport)(GLint, GLint, GLsizei, GLsizei); + void (GLAPIENTRY * ArrayElement)(GLint); + void (GLAPIENTRY * BindTexture)(GLenum, GLuint); + void (GLAPIENTRY * ColorPointer)(GLint, GLenum, GLsizei, const GLvoid *); + void (GLAPIENTRY * DisableClientState)(GLenum); + void (GLAPIENTRY * DrawArrays)(GLenum, GLint, GLsizei); + void (GLAPIENTRY * DrawElements)(GLenum, GLsizei, GLenum, const GLvoid *); + void (GLAPIENTRY * EdgeFlagPointer)(GLsizei, const GLvoid *); + void (GLAPIENTRY * EnableClientState)(GLenum); + void (GLAPIENTRY * IndexPointer)(GLenum, GLsizei, const GLvoid *); + void (GLAPIENTRY * Indexub)(GLubyte); + void (GLAPIENTRY * Indexubv)(const GLubyte *); + void (GLAPIENTRY * InterleavedArrays)(GLenum, GLsizei, const GLvoid *); + void (GLAPIENTRY * NormalPointer)(GLenum, GLsizei, const GLvoid *); + void (GLAPIENTRY * PolygonOffset)(GLfloat, GLfloat); + void (GLAPIENTRY * TexCoordPointer)(GLint, GLenum, GLsizei, const GLvoid *); + void (GLAPIENTRY * VertexPointer)(GLint, GLenum, GLsizei, const GLvoid *); + GLboolean (GLAPIENTRY * AreTexturesResident)(GLsizei, const GLuint *, GLboolean *); + void (GLAPIENTRY * CopyTexImage1D)(GLenum, GLint, GLenum, GLint, GLint, GLsizei, GLint); + void (GLAPIENTRY * CopyTexImage2D)(GLenum, GLint, GLenum, GLint, GLint, GLsizei, GLsizei, GLint); + void (GLAPIENTRY * CopyTexSubImage1D)(GLenum, GLint, GLint, GLint, GLint, GLsizei); + void (GLAPIENTRY * CopyTexSubImage2D)(GLenum, GLint, GLint, GLint, GLint, GLint, GLsizei, GLsizei); + void (GLAPIENTRY * DeleteTextures)(GLsizei, const GLuint *); + void (GLAPIENTRY * GenTextures)(GLsizei, GLuint *); + void (GLAPIENTRY * GetPointerv)(GLenum, GLvoid **); + GLboolean (GLAPIENTRY * IsTexture)(GLuint); + void (GLAPIENTRY * PrioritizeTextures)(GLsizei, const GLuint *, const GLclampf *); + void (GLAPIENTRY * TexSubImage1D)(GLenum, GLint, GLint, GLsizei, GLenum, GLenum, const GLvoid *); + void (GLAPIENTRY * TexSubImage2D)(GLenum, GLint, GLint, GLint, GLsizei, GLsizei, GLenum, GLenum, const GLvoid *); + void (GLAPIENTRY * PopClientAttrib)(void); + void (GLAPIENTRY * PushClientAttrib)(GLbitfield); +}; + +typedef struct __GLdispatchTableRec GLDISPATCHTABLE; + +typedef struct _GLCLTPROCTABLE +{ + int cEntries; + GLDISPATCHTABLE glDispatchTable; +} GLCLTPROCTABLE, * PGLCLTPROCTABLE; + +typedef VOID (APIENTRY * PFN_SETPROCTABLE)(PGLCLTPROCTABLE); + +BOOL APIENTRY +DrvCopyContext( + DHGLRC dhrcSource, + DHGLRC dhrcDest, + UINT fuMask ); + +DHGLRC APIENTRY +DrvCreateLayerContext( + HDC hdc, + INT iLayerPlane ); + +DHGLRC APIENTRY +DrvCreateContext( + HDC hdc ); + +BOOL APIENTRY +DrvDeleteContext( + DHGLRC dhglrc ); + +BOOL APIENTRY +DrvDescribeLayerPlane( + HDC hdc, + INT iPixelFormat, + INT iLayerPlane, + UINT nBytes, + LPLAYERPLANEDESCRIPTOR plpd ); + +LONG APIENTRY +DrvDescribePixelFormat( + HDC hdc, + INT iPixelFormat, + ULONG cjpfd, + PIXELFORMATDESCRIPTOR *ppfd ); + +int APIENTRY +DrvGetLayerPaletteEntries( + HDC hdc, + INT iLayerPlane, + INT iStart, + INT cEntries, + COLORREF *pcr ); + +PROC APIENTRY +DrvGetProcAddress( + LPCSTR lpszProc ); + +BOOL APIENTRY +DrvRealizeLayerPalette( + HDC hdc, + INT iLayerPlane, + BOOL bRealize ); + +BOOL APIENTRY +DrvReleaseContext( + DHGLRC dhglrc ); + +void APIENTRY +DrvSetCallbackProcs( + INT nProcs, + PROC *pProcs ); + +PGLCLTPROCTABLE APIENTRY +DrvSetContext( + HDC hdc, + DHGLRC dhglrc, + PFN_SETPROCTABLE pfnSetProcTable ); + +int APIENTRY +DrvSetLayerPaletteEntries( + HDC hdc, + INT iLayerPlane, + INT iStart, + INT cEntries, + CONST COLORREF *pcr ); + +BOOL APIENTRY +DrvSetPixelFormat( + HDC hdc, + LONG iPixelFormat ); + +BOOL APIENTRY +DrvShareLists( + DHGLRC dhglrc1, + DHGLRC dhglrc2 ); + +BOOL APIENTRY +DrvSwapBuffers( + HDC hdc ); + +BOOL APIENTRY +DrvSwapLayerBuffers( + HDC hdc, + UINT fuPlanes ); + +BOOL APIENTRY +DrvValidateVersion( + ULONG ulVersion ); + +#endif /* DRV_H */ diff --git a/src/gallium/state_trackers/wgl/opengl32.def b/src/gallium/state_trackers/wgl/opengl32.def new file mode 100644 index 0000000000..596417ed84 --- /dev/null +++ b/src/gallium/state_trackers/wgl/opengl32.def @@ -0,0 +1,388 @@ +EXPORTS +; GlmfBeginGlsBlock +; GlmfCloseMetaFile +; GlmfEndGlsBlock +; GlmfEndPlayback +; GlmfInitPlayback +; GlmfPlayGlsRecord + glAccum + glAlphaFunc + glAreTexturesResident + glArrayElement + glBegin + glBindTexture + glBitmap + glBlendFunc + glCallList + glCallLists + glClear + glClearAccum + glClearColor + glClearDepth + glClearIndex + glClearStencil + glClipPlane + glColor3b + glColor3bv + glColor3d + glColor3dv + glColor3f + glColor3fv + glColor3i + glColor3iv + glColor3s + glColor3sv + glColor3ub + glColor3ubv + glColor3ui + glColor3uiv + glColor3us + glColor3usv + glColor4b + glColor4bv + glColor4d + glColor4dv + glColor4f + glColor4fv + glColor4i + glColor4iv + glColor4s + glColor4sv + glColor4ub + glColor4ubv + glColor4ui + glColor4uiv + glColor4us + glColor4usv + glColorMask + glColorMaterial + glColorPointer + glCopyPixels + glCopyTexImage1D + glCopyTexImage2D + glCopyTexSubImage1D + glCopyTexSubImage2D + glCullFace +; glDebugEntry + glDeleteLists + glDeleteTextures + glDepthFunc + glDepthMask + glDepthRange + glDisable + glDisableClientState + glDrawArrays + glDrawBuffer + glDrawElements + glDrawPixels + glEdgeFlag + glEdgeFlagPointer + glEdgeFlagv + glEnable + glEnableClientState + glEnd + glEndList + glEvalCoord1d + glEvalCoord1dv + glEvalCoord1f + glEvalCoord1fv + glEvalCoord2d + glEvalCoord2dv + glEvalCoord2f + glEvalCoord2fv + glEvalMesh1 + glEvalMesh2 + glEvalPoint1 + glEvalPoint2 + glFeedbackBuffer + glFinish + glFlush + glFogf + glFogfv + glFogi + glFogiv + glFrontFace + glFrustum + glGenLists + glGenTextures + glGetBooleanv + glGetClipPlane + glGetDoublev + glGetError + glGetFloatv + glGetIntegerv + glGetLightfv + glGetLightiv + glGetMapdv + glGetMapfv + glGetMapiv + glGetMaterialfv + glGetMaterialiv + glGetPixelMapfv + glGetPixelMapuiv + glGetPixelMapusv + glGetPointerv + glGetPolygonStipple + glGetString + glGetTexEnvfv + glGetTexEnviv + glGetTexGendv + glGetTexGenfv + glGetTexGeniv + glGetTexImage + glGetTexLevelParameterfv + glGetTexLevelParameteriv + glGetTexParameterfv + glGetTexParameteriv + glHint + glIndexMask + glIndexPointer + glIndexd + glIndexdv + glIndexf + glIndexfv + glIndexi + glIndexiv + glIndexs + glIndexsv + glIndexub + glIndexubv + glInitNames + glInterleavedArrays + glIsEnabled + glIsList + glIsTexture + glLightModelf + glLightModelfv + glLightModeli + glLightModeliv + glLightf + glLightfv + glLighti + glLightiv + glLineStipple + glLineWidth + glListBase + glLoadIdentity + glLoadMatrixd + glLoadMatrixf + glLoadName + glLogicOp + glMap1d + glMap1f + glMap2d + glMap2f + glMapGrid1d + glMapGrid1f + glMapGrid2d + glMapGrid2f + glMaterialf + glMaterialfv + glMateriali + glMaterialiv + glMatrixMode + glMultMatrixd + glMultMatrixf + glNewList + glNormal3b + glNormal3bv + glNormal3d + glNormal3dv + glNormal3f + glNormal3fv + glNormal3i + glNormal3iv + glNormal3s + glNormal3sv + glNormalPointer + glOrtho + glPassThrough + glPixelMapfv + glPixelMapuiv + glPixelMapusv + glPixelStoref + glPixelStorei + glPixelTransferf + glPixelTransferi + glPixelZoom + glPointSize + glPolygonMode + glPolygonOffset + glPolygonStipple + glPopAttrib + glPopClientAttrib + glPopMatrix + glPopName + glPrioritizeTextures + glPushAttrib + glPushClientAttrib + glPushMatrix + glPushName + glRasterPos2d + glRasterPos2dv + glRasterPos2f + glRasterPos2fv + glRasterPos2i + glRasterPos2iv + glRasterPos2s + glRasterPos2sv + glRasterPos3d + glRasterPos3dv + glRasterPos3f + glRasterPos3fv + glRasterPos3i + glRasterPos3iv + glRasterPos3s + glRasterPos3sv + glRasterPos4d + glRasterPos4dv + glRasterPos4f + glRasterPos4fv + glRasterPos4i + glRasterPos4iv + glRasterPos4s + glRasterPos4sv + glReadBuffer + glReadPixels + glRectd + glRectdv + glRectf + glRectfv + glRecti + glRectiv + glRects + glRectsv + glRenderMode + glRotated + glRotatef + glScaled + glScalef + glScissor + glSelectBuffer + glShadeModel + glStencilFunc + glStencilMask + glStencilOp + glTexCoord1d + glTexCoord1dv + glTexCoord1f + glTexCoord1fv + glTexCoord1i + glTexCoord1iv + glTexCoord1s + glTexCoord1sv + glTexCoord2d + glTexCoord2dv + glTexCoord2f + glTexCoord2fv + glTexCoord2i + glTexCoord2iv + glTexCoord2s + glTexCoord2sv + glTexCoord3d + glTexCoord3dv + glTexCoord3f + glTexCoord3fv + glTexCoord3i + glTexCoord3iv + glTexCoord3s + glTexCoord3sv + glTexCoord4d + glTexCoord4dv + glTexCoord4f + glTexCoord4fv + glTexCoord4i + glTexCoord4iv + glTexCoord4s + glTexCoord4sv + glTexCoordPointer + glTexEnvf + glTexEnvfv + glTexEnvi + glTexEnviv + glTexGend + glTexGendv + glTexGenf + glTexGenfv + glTexGeni + glTexGeniv + glTexImage1D + glTexImage2D + glTexParameterf + glTexParameterfv + glTexParameteri + glTexParameteriv + glTexSubImage1D + glTexSubImage2D + glTranslated + glTranslatef + glVertex2d + glVertex2dv + glVertex2f + glVertex2fv + glVertex2i + glVertex2iv + glVertex2s + glVertex2sv + glVertex3d + glVertex3dv + glVertex3f + glVertex3fv + glVertex3i + glVertex3iv + glVertex3s + glVertex3sv + glVertex4d + glVertex4dv + glVertex4f + glVertex4fv + glVertex4i + glVertex4iv + glVertex4s + glVertex4sv + glVertexPointer + glViewport + wglChoosePixelFormat + wglCopyContext + wglCreateContext + wglCreateLayerContext + wglDeleteContext + wglDescribeLayerPlane + wglDescribePixelFormat + wglGetCurrentContext + wglGetCurrentDC +; wglGetDefaultProcAddress + wglGetLayerPaletteEntries + wglGetPixelFormat + wglGetProcAddress + wglMakeCurrent + wglRealizeLayerPalette + wglSetLayerPaletteEntries + wglSetPixelFormat + wglShareLists + wglSwapBuffers + wglSwapLayerBuffers +; wglSwapMultipleBuffers + wglUseFontBitmapsA + wglUseFontBitmapsW + wglUseFontOutlinesA + wglUseFontOutlinesW + wglGetExtensionsStringARB + DrvCopyContext + DrvCreateContext + DrvCreateLayerContext + DrvDeleteContext + DrvDescribeLayerPlane + DrvDescribePixelFormat + DrvGetLayerPaletteEntries + DrvGetProcAddress + DrvRealizeLayerPalette + DrvReleaseContext + DrvSetCallbackProcs + DrvSetContext + DrvSetLayerPaletteEntries + DrvSetPixelFormat + DrvShareLists + DrvSwapBuffers + DrvSwapLayerBuffers + DrvValidateVersion diff --git a/src/gallium/state_trackers/wgl/opengl32.mingw.def b/src/gallium/state_trackers/wgl/opengl32.mingw.def new file mode 100644 index 0000000000..1f03ea3b37 --- /dev/null +++ b/src/gallium/state_trackers/wgl/opengl32.mingw.def @@ -0,0 +1,387 @@ +EXPORTS +; GlmfBeginGlsBlock = GlmfBeginGlsBlock@4 +; GlmfCloseMetaFile = GlmfCloseMetaFile@4 +; GlmfEndGlsBlock = GlmfEndGlsBlock@4 +; GlmfEndPlayback = GlmfEndPlayback@4 +; GlmfInitPlayback = GlmfInitPlayback@12 +; GlmfPlayGlsRecord = GlmfPlayGlsRecord@16 + glAccum = glAccum@8 + glAlphaFunc = glAlphaFunc@8 + glAreTexturesResident = glAreTexturesResident@12 + glArrayElement = glArrayElement@4 + glBegin = glBegin@4 + glBindTexture = glBindTexture@8 + glBitmap = glBitmap@28 + glBlendFunc = glBlendFunc@8 + glCallList = glCallList@4 + glCallLists = glCallLists@12 + glClear = glClear@4 + glClearAccum = glClearAccum@16 + glClearColor = glClearColor@16 + glClearDepth = glClearDepth@8 + glClearIndex = glClearIndex@4 + glClearStencil = glClearStencil@4 + glClipPlane = glClipPlane@8 + glColor3b = glColor3b@12 + glColor3bv = glColor3bv@4 + glColor3d = glColor3d@24 + glColor3dv = glColor3dv@4 + glColor3f = glColor3f@12 + glColor3fv = glColor3fv@4 + glColor3i = glColor3i@12 + glColor3iv = glColor3iv@4 + glColor3s = glColor3s@12 + glColor3sv = glColor3sv@4 + glColor3ub = glColor3ub@12 + glColor3ubv = glColor3ubv@4 + glColor3ui = glColor3ui@12 + glColor3uiv = glColor3uiv@4 + glColor3us = glColor3us@12 + glColor3usv = glColor3usv@4 + glColor4b = glColor4b@16 + glColor4bv = glColor4bv@4 + glColor4d = glColor4d@32 + glColor4dv = glColor4dv@4 + glColor4f = glColor4f@16 + glColor4fv = glColor4fv@4 + glColor4i = glColor4i@16 + glColor4iv = glColor4iv@4 + glColor4s = glColor4s@16 + glColor4sv = glColor4sv@4 + glColor4ub = glColor4ub@16 + glColor4ubv = glColor4ubv@4 + glColor4ui = glColor4ui@16 + glColor4uiv = glColor4uiv@4 + glColor4us = glColor4us@16 + glColor4usv = glColor4usv@4 + glColorMask = glColorMask@16 + glColorMaterial = glColorMaterial@8 + glColorPointer = glColorPointer@16 + glCopyPixels = glCopyPixels@20 + glCopyTexImage1D = glCopyTexImage1D@28 + glCopyTexImage2D = glCopyTexImage2D@32 + glCopyTexSubImage1D = glCopyTexSubImage1D@24 + glCopyTexSubImage2D = glCopyTexSubImage2D@32 + glCullFace = glCullFace@4 +; glDebugEntry = glDebugEntry@8 + glDeleteLists = glDeleteLists@8 + glDeleteTextures = glDeleteTextures@8 + glDepthFunc = glDepthFunc@4 + glDepthMask = glDepthMask@4 + glDepthRange = glDepthRange@16 + glDisable = glDisable@4 + glDisableClientState = glDisableClientState@4 + glDrawArrays = glDrawArrays@12 + glDrawBuffer = glDrawBuffer@4 + glDrawElements = glDrawElements@16 + glDrawPixels = glDrawPixels@20 + glEdgeFlag = glEdgeFlag@4 + glEdgeFlagPointer = glEdgeFlagPointer@8 + glEdgeFlagv = glEdgeFlagv@4 + glEnable = glEnable@4 + glEnableClientState = glEnableClientState@4 + glEnd = glEnd@0 + glEndList = glEndList@0 + glEvalCoord1d = glEvalCoord1d@8 + glEvalCoord1dv = glEvalCoord1dv@4 + glEvalCoord1f = glEvalCoord1f@4 + glEvalCoord1fv = glEvalCoord1fv@4 + glEvalCoord2d = glEvalCoord2d@16 + glEvalCoord2dv = glEvalCoord2dv@4 + glEvalCoord2f = glEvalCoord2f@8 + glEvalCoord2fv = glEvalCoord2fv@4 + glEvalMesh1 = glEvalMesh1@12 + glEvalMesh2 = glEvalMesh2@20 + glEvalPoint1 = glEvalPoint1@4 + glEvalPoint2 = glEvalPoint2@8 + glFeedbackBuffer = glFeedbackBuffer@12 + glFinish = glFinish@0 + glFlush = glFlush@0 + glFogf = glFogf@8 + glFogfv = glFogfv@8 + glFogi = glFogi@8 + glFogiv = glFogiv@8 + glFrontFace = glFrontFace@4 + glFrustum = glFrustum@48 + glGenLists = glGenLists@4 + glGenTextures = glGenTextures@8 + glGetBooleanv = glGetBooleanv@8 + glGetClipPlane = glGetClipPlane@8 + glGetDoublev = glGetDoublev@8 + glGetError = glGetError@0 + glGetFloatv = glGetFloatv@8 + glGetIntegerv = glGetIntegerv@8 + glGetLightfv = glGetLightfv@12 + glGetLightiv = glGetLightiv@12 + glGetMapdv = glGetMapdv@12 + glGetMapfv = glGetMapfv@12 + glGetMapiv = glGetMapiv@12 + glGetMaterialfv = glGetMaterialfv@12 + glGetMaterialiv = glGetMaterialiv@12 + glGetPixelMapfv = glGetPixelMapfv@8 + glGetPixelMapuiv = glGetPixelMapuiv@8 + glGetPixelMapusv = glGetPixelMapusv@8 + glGetPointerv = glGetPointerv@8 + glGetPolygonStipple = glGetPolygonStipple@4 + glGetString = glGetString@4 + glGetTexEnvfv = glGetTexEnvfv@12 + glGetTexEnviv = glGetTexEnviv@12 + glGetTexGendv = glGetTexGendv@12 + glGetTexGenfv = glGetTexGenfv@12 + glGetTexGeniv = glGetTexGeniv@12 + glGetTexImage = glGetTexImage@20 + glGetTexLevelParameterfv = glGetTexLevelParameterfv@16 + glGetTexLevelParameteriv = glGetTexLevelParameteriv@16 + glGetTexParameterfv = glGetTexParameterfv@12 + glGetTexParameteriv = glGetTexParameteriv@12 + glHint = glHint@8 + glIndexMask = glIndexMask@4 + glIndexPointer = glIndexPointer@12 + glIndexd = glIndexd@8 + glIndexdv = glIndexdv@4 + glIndexf = glIndexf@4 + glIndexfv = glIndexfv@4 + glIndexi = glIndexi@4 + glIndexiv = glIndexiv@4 + glIndexs = glIndexs@4 + glIndexsv = glIndexsv@4 + glIndexub = glIndexub@4 + glIndexubv = glIndexubv@4 + glInitNames = glInitNames@0 + glInterleavedArrays = glInterleavedArrays@12 + glIsEnabled = glIsEnabled@4 + glIsList = glIsList@4 + glIsTexture = glIsTexture@4 + glLightModelf = glLightModelf@8 + glLightModelfv = glLightModelfv@8 + glLightModeli = glLightModeli@8 + glLightModeliv = glLightModeliv@8 + glLightf = glLightf@12 + glLightfv = glLightfv@12 + glLighti = glLighti@12 + glLightiv = glLightiv@12 + glLineStipple = glLineStipple@8 + glLineWidth = glLineWidth@4 + glListBase = glListBase@4 + glLoadIdentity = glLoadIdentity@0 + glLoadMatrixd = glLoadMatrixd@4 + glLoadMatrixf = glLoadMatrixf@4 + glLoadName = glLoadName@4 + glLogicOp = glLogicOp@4 + glMap1d = glMap1d@32 + glMap1f = glMap1f@24 + glMap2d = glMap2d@56 + glMap2f = glMap2f@40 + glMapGrid1d = glMapGrid1d@20 + glMapGrid1f = glMapGrid1f@12 + glMapGrid2d = glMapGrid2d@40 + glMapGrid2f = glMapGrid2f@24 + glMaterialf = glMaterialf@12 + glMaterialfv = glMaterialfv@12 + glMateriali = glMateriali@12 + glMaterialiv = glMaterialiv@12 + glMatrixMode = glMatrixMode@4 + glMultMatrixd = glMultMatrixd@4 + glMultMatrixf = glMultMatrixf@4 + glNewList = glNewList@8 + glNormal3b = glNormal3b@12 + glNormal3bv = glNormal3bv@4 + glNormal3d = glNormal3d@24 + glNormal3dv = glNormal3dv@4 + glNormal3f = glNormal3f@12 + glNormal3fv = glNormal3fv@4 + glNormal3i = glNormal3i@12 + glNormal3iv = glNormal3iv@4 + glNormal3s = glNormal3s@12 + glNormal3sv = glNormal3sv@4 + glNormalPointer = glNormalPointer@12 + glOrtho = glOrtho@48 + glPassThrough = glPassThrough@4 + glPixelMapfv = glPixelMapfv@12 + glPixelMapuiv = glPixelMapuiv@12 + glPixelMapusv = glPixelMapusv@12 + glPixelStoref = glPixelStoref@8 + glPixelStorei = glPixelStorei@8 + glPixelTransferf = glPixelTransferf@8 + glPixelTransferi = glPixelTransferi@8 + glPixelZoom = glPixelZoom@8 + glPointSize = glPointSize@4 + glPolygonMode = glPolygonMode@8 + glPolygonOffset = glPolygonOffset@8 + glPolygonStipple = glPolygonStipple@4 + glPopAttrib = glPopAttrib@0 + glPopClientAttrib = glPopClientAttrib@0 + glPopMatrix = glPopMatrix@0 + glPopName = glPopName@0 + glPrioritizeTextures = glPrioritizeTextures@12 + glPushAttrib = glPushAttrib@4 + glPushClientAttrib = glPushClientAttrib@4 + glPushMatrix = glPushMatrix@0 + glPushName = glPushName@4 + glRasterPos2d = glRasterPos2d@16 + glRasterPos2dv = glRasterPos2dv@4 + glRasterPos2f = glRasterPos2f@8 + glRasterPos2fv = glRasterPos2fv@4 + glRasterPos2i = glRasterPos2i@8 + glRasterPos2iv = glRasterPos2iv@4 + glRasterPos2s = glRasterPos2s@8 + glRasterPos2sv = glRasterPos2sv@4 + glRasterPos3d = glRasterPos3d@24 + glRasterPos3dv = glRasterPos3dv@4 + glRasterPos3f = glRasterPos3f@12 + glRasterPos3fv = glRasterPos3fv@4 + glRasterPos3i = glRasterPos3i@12 + glRasterPos3iv = glRasterPos3iv@4 + glRasterPos3s = glRasterPos3s@12 + glRasterPos3sv = glRasterPos3sv@4 + glRasterPos4d = glRasterPos4d@32 + glRasterPos4dv = glRasterPos4dv@4 + glRasterPos4f = glRasterPos4f@16 + glRasterPos4fv = glRasterPos4fv@4 + glRasterPos4i = glRasterPos4i@16 + glRasterPos4iv = glRasterPos4iv@4 + glRasterPos4s = glRasterPos4s@16 + glRasterPos4sv = glRasterPos4sv@4 + glReadBuffer = glReadBuffer@4 + glReadPixels = glReadPixels@28 + glRectd = glRectd@32 + glRectdv = glRectdv@8 + glRectf = glRectf@16 + glRectfv = glRectfv@8 + glRecti = glRecti@16 + glRectiv = glRectiv@8 + glRects = glRects@16 + glRectsv = glRectsv@8 + glRenderMode = glRenderMode@4 + glRotated = glRotated@32 + glRotatef = glRotatef@16 + glScaled = glScaled@24 + glScalef = glScalef@12 + glScissor = glScissor@16 + glSelectBuffer = glSelectBuffer@8 + glShadeModel = glShadeModel@4 + glStencilFunc = glStencilFunc@12 + glStencilMask = glStencilMask@4 + glStencilOp = glStencilOp@12 + glTexCoord1d = glTexCoord1d@8 + glTexCoord1dv = glTexCoord1dv@4 + glTexCoord1f = glTexCoord1f@4 + glTexCoord1fv = glTexCoord1fv@4 + glTexCoord1i = glTexCoord1i@4 + glTexCoord1iv = glTexCoord1iv@4 + glTexCoord1s = glTexCoord1s@4 + glTexCoord1sv = glTexCoord1sv@4 + glTexCoord2d = glTexCoord2d@16 + glTexCoord2dv = glTexCoord2dv@4 + glTexCoord2f = glTexCoord2f@8 + glTexCoord2fv = glTexCoord2fv@4 + glTexCoord2i = glTexCoord2i@8 + glTexCoord2iv = glTexCoord2iv@4 + glTexCoord2s = glTexCoord2s@8 + glTexCoord2sv = glTexCoord2sv@4 + glTexCoord3d = glTexCoord3d@24 + glTexCoord3dv = glTexCoord3dv@4 + glTexCoord3f = glTexCoord3f@12 + glTexCoord3fv = glTexCoord3fv@4 + glTexCoord3i = glTexCoord3i@12 + glTexCoord3iv = glTexCoord3iv@4 + glTexCoord3s = glTexCoord3s@12 + glTexCoord3sv = glTexCoord3sv@4 + glTexCoord4d = glTexCoord4d@32 + glTexCoord4dv = glTexCoord4dv@4 + glTexCoord4f = glTexCoord4f@16 + glTexCoord4fv = glTexCoord4fv@4 + glTexCoord4i = glTexCoord4i@16 + glTexCoord4iv = glTexCoord4iv@4 + glTexCoord4s = glTexCoord4s@16 + glTexCoord4sv = glTexCoord4sv@4 + glTexCoordPointer = glTexCoordPointer@16 + glTexEnvf = glTexEnvf@12 + glTexEnvfv = glTexEnvfv@12 + glTexEnvi = glTexEnvi@12 + glTexEnviv = glTexEnviv@12 + glTexGend = glTexGend@16 + glTexGendv = glTexGendv@12 + glTexGenf = glTexGenf@12 + glTexGenfv = glTexGenfv@12 + glTexGeni = glTexGeni@12 + glTexGeniv = glTexGeniv@12 + glTexImage1D = glTexImage1D@32 + glTexImage2D = glTexImage2D@36 + glTexParameterf = glTexParameterf@12 + glTexParameterfv = glTexParameterfv@12 + glTexParameteri = glTexParameteri@12 + glTexParameteriv = glTexParameteriv@12 + glTexSubImage1D = glTexSubImage1D@28 + glTexSubImage2D = glTexSubImage2D@36 + glTranslated = glTranslated@24 + glTranslatef = glTranslatef@12 + glVertex2d = glVertex2d@16 + glVertex2dv = glVertex2dv@4 + glVertex2f = glVertex2f@8 + glVertex2fv = glVertex2fv@4 + glVertex2i = glVertex2i@8 + glVertex2iv = glVertex2iv@4 + glVertex2s = glVertex2s@8 + glVertex2sv = glVertex2sv@4 + glVertex3d = glVertex3d@24 + glVertex3dv = glVertex3dv@4 + glVertex3f = glVertex3f@12 + glVertex3fv = glVertex3fv@4 + glVertex3i = glVertex3i@12 + glVertex3iv = glVertex3iv@4 + glVertex3s = glVertex3s@12 + glVertex3sv = glVertex3sv@4 + glVertex4d = glVertex4d@32 + glVertex4dv = glVertex4dv@4 + glVertex4f = glVertex4f@16 + glVertex4fv = glVertex4fv@4 + glVertex4i = glVertex4i@16 + glVertex4iv = glVertex4iv@4 + glVertex4s = glVertex4s@16 + glVertex4sv = glVertex4sv@4 + glVertexPointer = glVertexPointer@16 + glViewport = glViewport@16 + wglChoosePixelFormat = wglChoosePixelFormat@8 + wglCopyContext = wglCopyContext@12 + wglCreateContext = wglCreateContext@4 + wglCreateLayerContext = wglCreateLayerContext@8 + wglDeleteContext = wglDeleteContext@4 + wglDescribeLayerPlane = wglDescribeLayerPlane@20 + wglDescribePixelFormat = wglDescribePixelFormat@16 + wglGetCurrentContext = wglGetCurrentContext@0 + wglGetCurrentDC = wglGetCurrentDC@0 +; wglGetDefaultProcAddress = wglGetDefaultProcAddress@4 + wglGetLayerPaletteEntries = wglGetLayerPaletteEntries@20 + wglGetPixelFormat = wglGetPixelFormat@4 + wglGetProcAddress = wglGetProcAddress@4 + wglMakeCurrent = wglMakeCurrent@8 + wglRealizeLayerPalette = wglRealizeLayerPalette@12 + wglSetLayerPaletteEntries = wglSetLayerPaletteEntries@20 + wglSetPixelFormat = wglSetPixelFormat@12 + wglShareLists = wglShareLists@8 + wglSwapBuffers = wglSwapBuffers@4 + wglSwapLayerBuffers = wglSwapLayerBuffers@8 +; wglSwapMultipleBuffers = wglSwapMultipleBuffers@8 + wglUseFontBitmapsA = wglUseFontBitmapsA@16 + wglUseFontBitmapsW = wglUseFontBitmapsW@16 + wglUseFontOutlinesA = wglUseFontOutlinesA@32 + wglUseFontOutlinesW = wglUseFontOutlinesW@32 + DrvCopyContext = DrvCopyContext@12 + DrvCreateContext = DrvCreateContext@4 + DrvCreateLayerContext = DrvCreateLayerContext@8 + DrvDeleteContext = DrvDeleteContext@4 + DrvDescribeLayerPlane = DrvDescribeLayerPlane@20 + DrvDescribePixelFormat = DrvDescribePixelFormat@16 + DrvGetLayerPaletteEntries = DrvGetLayerPaletteEntries@20 + DrvGetProcAddress = DrvGetProcAddress@4 + DrvRealizeLayerPalette = DrvRealizeLayerPalette@12 + DrvReleaseContext = DrvReleaseContext@4 + DrvSetCallbackProcs = DrvSetCallbackProcs@8 + DrvSetContext = DrvSetContext@12 + DrvSetLayerPaletteEntries = DrvSetLayerPaletteEntries@20 + DrvSetPixelFormat = DrvSetPixelFormat@8 + DrvShareLists = DrvShareLists@8 + DrvSwapBuffers = DrvSwapBuffers@4 + DrvSwapLayerBuffers = DrvSwapLayerBuffers@8 + DrvValidateVersion = DrvValidateVersion@4 diff --git a/src/gallium/winsys/drm/intel/egl/intel_device.h b/src/gallium/state_trackers/wgl/shared/stw_arbextensionsstring.c index 323a7c2aef..b3934cb464 100644 --- a/src/gallium/winsys/drm/intel/egl/intel_device.h +++ b/src/gallium/state_trackers/wgl/shared/stw_arbextensionsstring.c @@ -1,8 +1,8 @@ /************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -10,11 +10,11 @@ * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. @@ -22,29 +22,21 @@ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * + * **************************************************************************/ -#ifndef _INTEL_SCREEN_H_ -#define _INTEL_SCREEN_H_ +#include <windows.h> -#include "intel_be_device.h" +#include "stw_arbextensionsstring.h" -#include "pipe/p_compiler.h" - -struct pipe_screen; -struct egl_drm_device; -struct intel_context; - -struct intel_device +WINGDIAPI const char * APIENTRY +wglGetExtensionsStringARB( + HDC hdc ) { - struct intel_be_device base; - struct pipe_screen *pipe; - - int deviceID; - struct egl_drm_device *device; - - struct intel_context *dummy; -}; + (void) hdc; -#endif + return + "WGL_ARB_extensions_string " + "WGL_ARB_multisample " + "WGL_ARB_pixel_format"; +} diff --git a/src/gallium/state_trackers/wgl/shared/stw_arbextensionsstring.h b/src/gallium/state_trackers/wgl/shared/stw_arbextensionsstring.h new file mode 100644 index 0000000000..a0e4c5d98e --- /dev/null +++ b/src/gallium/state_trackers/wgl/shared/stw_arbextensionsstring.h @@ -0,0 +1,35 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef WGL_ARBEXTENSIONSSTRING_H +#define WGL_ARBEXTENSIONSSTRING_H + +WINGDIAPI const char * APIENTRY +wglGetExtensionsStringARB( + HDC hdc ); + +#endif /* WGL_ARBEXTENSIONSSTRING_H */ diff --git a/src/gallium/state_trackers/wgl/shared/stw_arbpixelformat.c b/src/gallium/state_trackers/wgl/shared/stw_arbpixelformat.c new file mode 100644 index 0000000000..f563635420 --- /dev/null +++ b/src/gallium/state_trackers/wgl/shared/stw_arbpixelformat.c @@ -0,0 +1,519 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <windows.h> + +#include "pipe/p_compiler.h" +#include "util/u_memory.h" +#include "stw_public.h" +#include "stw_pixelformat.h" +#include "stw_arbpixelformat.h" + +#define WGL_NUMBER_PIXEL_FORMATS_ARB 0x2000 +#define WGL_DRAW_TO_WINDOW_ARB 0x2001 +#define WGL_DRAW_TO_BITMAP_ARB 0x2002 +#define WGL_ACCELERATION_ARB 0x2003 +#define WGL_NEED_PALETTE_ARB 0x2004 +#define WGL_NEED_SYSTEM_PALETTE_ARB 0x2005 +#define WGL_SWAP_LAYER_BUFFERS_ARB 0x2006 +#define WGL_SWAP_METHOD_ARB 0x2007 +#define WGL_NUMBER_OVERLAYS_ARB 0x2008 +#define WGL_NUMBER_UNDERLAYS_ARB 0x2009 +#define WGL_TRANSPARENT_ARB 0x200A +#define WGL_TRANSPARENT_RED_VALUE_ARB 0x2037 +#define WGL_TRANSPARENT_GREEN_VALUE_ARB 0x2038 +#define WGL_TRANSPARENT_BLUE_VALUE_ARB 0x2039 +#define WGL_TRANSPARENT_ALPHA_VALUE_ARB 0x203A +#define WGL_TRANSPARENT_INDEX_VALUE_ARB 0x203B +#define WGL_SHARE_DEPTH_ARB 0x200C +#define WGL_SHARE_STENCIL_ARB 0x200D +#define WGL_SHARE_ACCUM_ARB 0x200E +#define WGL_SUPPORT_GDI_ARB 0x200F +#define WGL_SUPPORT_OPENGL_ARB 0x2010 +#define WGL_DOUBLE_BUFFER_ARB 0x2011 +#define WGL_STEREO_ARB 0x2012 +#define WGL_PIXEL_TYPE_ARB 0x2013 +#define WGL_COLOR_BITS_ARB 0x2014 +#define WGL_RED_BITS_ARB 0x2015 +#define WGL_RED_SHIFT_ARB 0x2016 +#define WGL_GREEN_BITS_ARB 0x2017 +#define WGL_GREEN_SHIFT_ARB 0x2018 +#define WGL_BLUE_BITS_ARB 0x2019 +#define WGL_BLUE_SHIFT_ARB 0x201A +#define WGL_ALPHA_BITS_ARB 0x201B +#define WGL_ALPHA_SHIFT_ARB 0x201C +#define WGL_ACCUM_BITS_ARB 0x201D +#define WGL_ACCUM_RED_BITS_ARB 0x201E +#define WGL_ACCUM_GREEN_BITS_ARB 0x201F +#define WGL_ACCUM_BLUE_BITS_ARB 0x2020 +#define WGL_ACCUM_ALPHA_BITS_ARB 0x2021 +#define WGL_DEPTH_BITS_ARB 0x2022 +#define WGL_STENCIL_BITS_ARB 0x2023 +#define WGL_AUX_BUFFERS_ARB 0x2024 + +#define WGL_NO_ACCELERATION_ARB 0x2025 +#define WGL_GENERIC_ACCELERATION_ARB 0x2026 +#define WGL_FULL_ACCELERATION_ARB 0x2027 + +#define WGL_SWAP_EXCHANGE_ARB 0x2028 +#define WGL_SWAP_COPY_ARB 0x2029 +#define WGL_SWAP_UNDEFINED_ARB 0x202A + +#define WGL_TYPE_RGBA_ARB 0x202B +#define WGL_TYPE_COLORINDEX_ARB 0x202C + +/* From arb_multisample: + */ +#define WGL_SAMPLE_BUFFERS_ARB 0x2041 +#define WGL_SAMPLES_ARB 0x2042 + + +static boolean +query_attrib( + int iPixelFormat, + int iLayerPlane, + int attrib, + int *pvalue ) +{ + uint count; + uint index; + const struct pixelformat_info *pf; + + count = pixelformat_get_extended_count(); + + if (attrib == WGL_NUMBER_PIXEL_FORMATS_ARB) { + *pvalue = (int) count; + return TRUE; + } + + index = (uint) iPixelFormat - 1; + if (index >= count) + return FALSE; + + pf = pixelformat_get_info( index ); + + switch (attrib) { + case WGL_DRAW_TO_WINDOW_ARB: + *pvalue = TRUE; + return TRUE; + + case WGL_DRAW_TO_BITMAP_ARB: + *pvalue = FALSE; + return TRUE; + + case WGL_NEED_PALETTE_ARB: + *pvalue = FALSE; + return TRUE; + + case WGL_NEED_SYSTEM_PALETTE_ARB: + *pvalue = FALSE; + return TRUE; + + case WGL_SWAP_METHOD_ARB: + if (pf->flags & PF_FLAG_DOUBLEBUFFER) + *pvalue = WGL_SWAP_COPY_ARB; + else + *pvalue = WGL_SWAP_UNDEFINED_ARB; + return TRUE; + + case WGL_SWAP_LAYER_BUFFERS_ARB: + *pvalue = FALSE; + return TRUE; + + case WGL_NUMBER_OVERLAYS_ARB: + *pvalue = 0; + return TRUE; + + case WGL_NUMBER_UNDERLAYS_ARB: + *pvalue = 0; + return TRUE; + } + + if (iLayerPlane != 0) + return FALSE; + + switch (attrib) { + case WGL_ACCELERATION_ARB: + *pvalue = WGL_FULL_ACCELERATION_ARB; + break; + + case WGL_TRANSPARENT_ARB: + *pvalue = FALSE; + break; + + case WGL_TRANSPARENT_RED_VALUE_ARB: + case WGL_TRANSPARENT_GREEN_VALUE_ARB: + case WGL_TRANSPARENT_BLUE_VALUE_ARB: + case WGL_TRANSPARENT_ALPHA_VALUE_ARB: + case WGL_TRANSPARENT_INDEX_VALUE_ARB: + break; + + case WGL_SHARE_DEPTH_ARB: + case WGL_SHARE_STENCIL_ARB: + case WGL_SHARE_ACCUM_ARB: + *pvalue = TRUE; + break; + + case WGL_SUPPORT_GDI_ARB: + *pvalue = FALSE; + break; + + case WGL_SUPPORT_OPENGL_ARB: + *pvalue = TRUE; + break; + + case WGL_DOUBLE_BUFFER_ARB: + if (pf->flags & PF_FLAG_DOUBLEBUFFER) + *pvalue = TRUE; + else + *pvalue = FALSE; + break; + + case WGL_STEREO_ARB: + *pvalue = FALSE; + break; + + case WGL_PIXEL_TYPE_ARB: + *pvalue = WGL_TYPE_RGBA_ARB; + break; + + case WGL_COLOR_BITS_ARB: + *pvalue = (int) (pf->color.redbits + pf->color.greenbits + pf->color.bluebits); + break; + + case WGL_RED_BITS_ARB: + *pvalue = (int) pf->color.redbits; + break; + + case WGL_RED_SHIFT_ARB: + *pvalue = (int) pf->color.redshift; + break; + + case WGL_GREEN_BITS_ARB: + *pvalue = (int) pf->color.greenbits; + break; + + case WGL_GREEN_SHIFT_ARB: + *pvalue = (int) pf->color.greenshift; + break; + + case WGL_BLUE_BITS_ARB: + *pvalue = (int) pf->color.bluebits; + break; + + case WGL_BLUE_SHIFT_ARB: + *pvalue = (int) pf->color.blueshift; + break; + + case WGL_ALPHA_BITS_ARB: + *pvalue = (int) pf->alpha.alphabits; + break; + + case WGL_ALPHA_SHIFT_ARB: + *pvalue = (int) pf->alpha.alphashift; + break; + + case WGL_ACCUM_BITS_ARB: + case WGL_ACCUM_RED_BITS_ARB: + case WGL_ACCUM_GREEN_BITS_ARB: + case WGL_ACCUM_BLUE_BITS_ARB: + case WGL_ACCUM_ALPHA_BITS_ARB: + *pvalue = 0; + break; + + case WGL_DEPTH_BITS_ARB: + *pvalue = (int) pf->depth.depthbits; + break; + + case WGL_STENCIL_BITS_ARB: + *pvalue = (int) pf->depth.stencilbits; + break; + + case WGL_AUX_BUFFERS_ARB: + *pvalue = 0; + break; + + case WGL_SAMPLE_BUFFERS_ARB: + if (pf->flags & PF_FLAG_MULTISAMPLED) + *pvalue = stw_query_sample_buffers(); + else + *pvalue = 0; + break; + + case WGL_SAMPLES_ARB: + if (pf->flags & PF_FLAG_MULTISAMPLED) + *pvalue = stw_query_samples(); + else + *pvalue = 0; + break; + + default: + return FALSE; + } + + return TRUE; +} + +struct attrib_match_info +{ + int attribute; + int weight; + BOOL exact; +}; + +static struct attrib_match_info attrib_match[] = { + + /* WGL_ARB_pixel_format */ + { WGL_DRAW_TO_WINDOW_ARB, 0, TRUE }, + { WGL_DRAW_TO_BITMAP_ARB, 0, TRUE }, + { WGL_ACCELERATION_ARB, 0, TRUE }, + { WGL_NEED_PALETTE_ARB, 0, TRUE }, + { WGL_NEED_SYSTEM_PALETTE_ARB, 0, TRUE }, + { WGL_SWAP_LAYER_BUFFERS_ARB, 0, TRUE }, + { WGL_SWAP_METHOD_ARB, 0, TRUE }, + { WGL_NUMBER_OVERLAYS_ARB, 4, FALSE }, + { WGL_NUMBER_UNDERLAYS_ARB, 4, FALSE }, + /*{ WGL_SHARE_DEPTH_ARB, 0, TRUE },*/ /* no overlays -- ignore */ + /*{ WGL_SHARE_STENCIL_ARB, 0, TRUE },*/ /* no overlays -- ignore */ + /*{ WGL_SHARE_ACCUM_ARB, 0, TRUE },*/ /* no overlays -- ignore */ + { WGL_SUPPORT_GDI_ARB, 0, TRUE }, + { WGL_SUPPORT_OPENGL_ARB, 0, TRUE }, + { WGL_DOUBLE_BUFFER_ARB, 0, TRUE }, + { WGL_STEREO_ARB, 0, TRUE }, + { WGL_PIXEL_TYPE_ARB, 0, TRUE }, + { WGL_COLOR_BITS_ARB, 1, FALSE }, + { WGL_RED_BITS_ARB, 1, FALSE }, + { WGL_GREEN_BITS_ARB, 1, FALSE }, + { WGL_BLUE_BITS_ARB, 1, FALSE }, + { WGL_ALPHA_BITS_ARB, 1, FALSE }, + { WGL_ACCUM_BITS_ARB, 1, FALSE }, + { WGL_ACCUM_RED_BITS_ARB, 1, FALSE }, + { WGL_ACCUM_GREEN_BITS_ARB, 1, FALSE }, + { WGL_ACCUM_BLUE_BITS_ARB, 1, FALSE }, + { WGL_ACCUM_ALPHA_BITS_ARB, 1, FALSE }, + { WGL_DEPTH_BITS_ARB, 1, FALSE }, + { WGL_STENCIL_BITS_ARB, 1, FALSE }, + { WGL_AUX_BUFFERS_ARB, 2, FALSE }, + + /* WGL_ARB_multisample */ + { WGL_SAMPLE_BUFFERS_ARB, 2, FALSE }, + { WGL_SAMPLES_ARB, 2, FALSE } +}; + +struct pixelformat_score +{ + int points; + uint index; +}; + +static BOOL +score_pixelformats( + struct pixelformat_score *scores, + uint count, + int attribute, + int expected_value ) +{ + uint i; + struct attrib_match_info *ami = NULL; + uint index; + + /* Find out if a given attribute should be considered for score calculation. + */ + for (i = 0; i < sizeof( attrib_match ) / sizeof( attrib_match[0] ); i++) { + if (attrib_match[i].attribute == attribute) { + ami = &attrib_match[i]; + break; + } + } + if (ami == NULL) + return TRUE; + + /* Iterate all pixelformats, query the requested attribute and calculate + * score points. + */ + for (index = 0; index < count; index++) { + int actual_value; + + if (!query_attrib( index + 1, 0, attribute, &actual_value )) + return FALSE; + + if (ami->exact) { + /* For an exact match criteria, if the actual and expected values differ, + * the score is set to 0 points, effectively removing the pixelformat + * from a list of matching pixelformats. + */ + if (actual_value != expected_value) + scores[index].points = 0; + } + else { + /* For a minimum match criteria, if the actual value is smaller than the expected + * value, the pixelformat is rejected (score set to 0). However, if the actual + * value is bigger, the pixelformat is given a penalty to favour pixelformats that + * more closely match the expected values. + */ + if (actual_value < expected_value) + scores[index].points = 0; + else if (actual_value > expected_value) + scores[index].points -= (actual_value - expected_value) * ami->weight; + } + } + + return TRUE; +} + +WINGDIAPI BOOL APIENTRY +wglChoosePixelFormatARB( + HDC hdc, + const int *piAttribIList, + const FLOAT *pfAttribFList, + UINT nMaxFormats, + int *piFormats, + UINT *nNumFormats ) +{ + uint count; + struct pixelformat_score *scores; + uint i; + + *nNumFormats = 0; + + /* Allocate and initialize pixelformat score table -- better matches + * have higher scores. Start with a high score and take out penalty + * points for a mismatch when the match does not have to be exact. + * Set a score to 0 if there is a mismatch for an exact match criteria. + */ + count = pixelformat_get_extended_count(); + scores = (struct pixelformat_score *) MALLOC( count * sizeof( struct pixelformat_score ) ); + if (scores == NULL) + return FALSE; + for (i = 0; i < count; i++) { + scores[i].points = 0x7fffffff; + scores[i].index = i; + } + + /* Given the attribute list calculate a score for each pixelformat. + */ + if (piAttribIList != NULL) { + while (*piAttribIList != 0) { + if (!score_pixelformats( scores, count, piAttribIList[0], piAttribIList[1] )) { + FREE( scores ); + return FALSE; + } + piAttribIList += 2; + } + } + if (pfAttribFList != NULL) { + while (*pfAttribFList != 0) { + if (!score_pixelformats( scores, count, (int) pfAttribFList[0], (int) pfAttribFList[1] )) { + FREE( scores ); + return FALSE; + } + pfAttribFList += 2; + } + } + + /* Bubble-sort the resulting scores. Pixelformats with higher scores go first. + * TODO: Find out if there are any patent issues with it. + */ + if (count > 1) { + uint n = count; + boolean swapped; + + do { + swapped = FALSE; + for (i = 1; i < n; i++) { + if (scores[i - 1].points < scores[i].points) { + struct pixelformat_score score = scores[i - 1]; + + scores[i - 1] = scores[i]; + scores[i] = score; + swapped = TRUE; + } + } + n--; + } + while (swapped); + } + + /* Return a list of pixelformats that are the best match. + * Reject pixelformats with non-positive scores. + */ + for (i = 0; i < count; i++) { + if (scores[i].points > 0) { + if (*nNumFormats < nMaxFormats) + piFormats[*nNumFormats] = scores[i].index + 1; + (*nNumFormats)++; + } + } + + FREE( scores ); + return TRUE; +} + +WINGDIAPI BOOL APIENTRY +wglGetPixelFormatAttribfvARB( + HDC hdc, + int iPixelFormat, + int iLayerPlane, + UINT nAttributes, + const int *piAttributes, + FLOAT *pfValues ) +{ + UINT i; + + (void) hdc; + + for (i = 0; i < nAttributes; i++) { + int value; + + if (!query_attrib( iPixelFormat, iLayerPlane, piAttributes[i], &value )) + return FALSE; + pfValues[i] = (FLOAT) value; + } + + return TRUE; +} + +WINGDIAPI BOOL APIENTRY +wglGetPixelFormatAttribivARB( + HDC hdc, + int iPixelFormat, + int iLayerPlane, + UINT nAttributes, + const int *piAttributes, + int *piValues ) +{ + UINT i; + + (void) hdc; + + for (i = 0; i < nAttributes; i++) { + if (!query_attrib( iPixelFormat, iLayerPlane, piAttributes[i], &piValues[i] )) + return FALSE; + } + + return TRUE; +} diff --git a/src/gallium/state_trackers/wgl/shared/stw_arbpixelformat.h b/src/gallium/state_trackers/wgl/shared/stw_arbpixelformat.h new file mode 100644 index 0000000000..a6c4259942 --- /dev/null +++ b/src/gallium/state_trackers/wgl/shared/stw_arbpixelformat.h @@ -0,0 +1,61 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef WGL_ARBPIXELFORMAT_H +#define WGL_ARBPIXELFORMAT_H + + +/* Extension functions for get_proc_address: + */ +WINGDIAPI BOOL APIENTRY +wglChoosePixelFormatARB( + HDC hdc, + const int *piAttribIList, + const FLOAT *pfAttribFList, + UINT nMaxFormats, + int *piFormats, + UINT *nNumFormats ); + +WINGDIAPI BOOL APIENTRY +wglGetPixelFormatAttribfvARB( + HDC hdc, + int iPixelFormat, + int iLayerPlane, + UINT nAttributes, + const int *piAttributes, + FLOAT *pfValues ); + +WINGDIAPI BOOL APIENTRY +wglGetPixelFormatAttribivARB( + HDC hdc, + int iPixelFormat, + int iLayerPlane, + UINT nAttributes, + const int *piAttributes, + int *piValues ); + +#endif /* WGL_ARBPIXELFORMAT_H */ diff --git a/src/gallium/state_trackers/wgl/shared/stw_context.c b/src/gallium/state_trackers/wgl/shared/stw_context.c new file mode 100644 index 0000000000..1377fb1ec8 --- /dev/null +++ b/src/gallium/state_trackers/wgl/shared/stw_context.c @@ -0,0 +1,349 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <windows.h> + +#include "main/mtypes.h" +#include "main/context.h" +#include "pipe/p_compiler.h" +#include "pipe/p_context.h" +#include "state_tracker/st_context.h" +#include "state_tracker/st_public.h" +#include "shared/stw_device.h" +#include "shared/stw_winsys.h" +#include "shared/stw_framebuffer.h" +#include "shared/stw_pixelformat.h" +#include "stw_public.h" +#include "stw_context.h" + +static HDC current_hdc = NULL; +static UINT_PTR current_hglrc = 0; + +BOOL +stw_copy_context( + UINT_PTR hglrcSrc, + UINT_PTR hglrcDst, + UINT mask ) +{ + struct stw_context *src; + struct stw_context *dst; + BOOL ret = FALSE; + + pipe_mutex_lock( stw_dev->mutex ); + + src = stw_lookup_context( hglrcSrc ); + dst = stw_lookup_context( hglrcDst ); + + if (src && dst) { + /* FIXME */ + (void) src; + (void) dst; + (void) mask; + } + + pipe_mutex_unlock( stw_dev->mutex ); + + return ret; +} + +UINT_PTR +stw_create_layer_context( + HDC hdc, + int iLayerPlane ) +{ + uint pfi; + const struct pixelformat_info *pf = NULL; + struct stw_context *ctx = NULL; + GLvisual *visual = NULL; + struct pipe_context *pipe = NULL; + UINT_PTR hglrc; + + if(!stw_dev) + return 0; + + if (iLayerPlane != 0) + return 0; + + pfi = stw_pixelformat_get( hdc ); + if (pfi == 0) + return 0; + + pf = pixelformat_get_info( pfi - 1 ); + + ctx = CALLOC_STRUCT( stw_context ); + if (ctx == NULL) + return 0; + + ctx->hdc = hdc; + ctx->color_bits = GetDeviceCaps( ctx->hdc, BITSPIXEL ); + + /* Create visual based on flags + */ + visual = _mesa_create_visual( + GL_TRUE, + (pf->flags & PF_FLAG_DOUBLEBUFFER) ? GL_TRUE : GL_FALSE, + GL_FALSE, + pf->color.redbits, + pf->color.greenbits, + pf->color.bluebits, + pf->alpha.alphabits, + 0, + pf->depth.depthbits, + pf->depth.stencilbits, + 0, + 0, + 0, + 0, + (pf->flags & PF_FLAG_MULTISAMPLED) ? stw_query_samples() : 0 ); + if (visual == NULL) + goto fail; + + pipe = stw_dev->stw_winsys->create_context( stw_dev->screen ); + if (pipe == NULL) + goto fail; + + assert(!pipe->priv); + pipe->priv = hdc; + + ctx->st = st_create_context( pipe, visual, NULL ); + if (ctx->st == NULL) + goto fail; + + ctx->st->ctx->DriverCtx = ctx; + + pipe_mutex_lock( stw_dev->mutex ); + { + UINT_PTR i; + + for (i = 0; i < STW_CONTEXT_MAX; i++) { + if (stw_dev->ctx_array[i].ctx == NULL) + break; + } + + /* No slot available, fail: + */ + if (i == STW_CONTEXT_MAX) + goto done; + + stw_dev->ctx_array[i].ctx = ctx; + + /* success: + */ + hglrc = i + 1; + } +done: + pipe_mutex_unlock( stw_dev->mutex ); + + return hglrc; + +fail: + if (visual) + _mesa_destroy_visual( visual ); + + if (pipe) + pipe->destroy( pipe ); + + FREE( ctx ); + return 0; +} + +BOOL +stw_delete_context( + UINT_PTR hglrc ) +{ + struct stw_context *ctx ; + BOOL ret = FALSE; + + if (!stw_dev) + return FALSE; + + pipe_mutex_lock( stw_dev->mutex ); + + ctx = stw_lookup_context(hglrc); + if (ctx) { + GLcontext *glctx = ctx->st->ctx; + GET_CURRENT_CONTEXT( glcurctx ); + struct stw_framebuffer *fb; + + /* Unbind current if deleting current context. + */ + if (glcurctx == glctx) + st_make_current( NULL, NULL, NULL ); + + fb = framebuffer_from_hdc( ctx->hdc ); + if (fb) + framebuffer_destroy( fb ); + + if (WindowFromDC( ctx->hdc ) != NULL) + ReleaseDC( WindowFromDC( ctx->hdc ), ctx->hdc ); + + st_destroy_context( ctx->st ); + + FREE( ctx ); + + stw_dev->ctx_array[hglrc - 1].ctx = NULL; + + ret = TRUE; + } + + pipe_mutex_unlock( stw_dev->mutex ); + + return ret; +} + +BOOL +stw_release_context( + UINT_PTR hglrc ) +{ + BOOL ret = FALSE; + + if (!stw_dev) + return ret; + + pipe_mutex_lock( stw_dev->mutex ); + { + struct stw_context *ctx; + + /* XXX: The expectation is that ctx is the same context which is + * current for this thread. We should check that and return False + * if not the case. + */ + ctx = stw_lookup_context( hglrc ); + if (ctx == NULL) + goto done; + + if (stw_make_current( NULL, 0 ) == FALSE) + goto done; + + ret = TRUE; + } +done: + pipe_mutex_unlock( stw_dev->mutex ); + + return ret; +} + +/* Find the width and height of the window named by hdc. + */ +static void +get_window_size( HDC hdc, GLuint *width, GLuint *height ) +{ + if (WindowFromDC( hdc )) { + RECT rect; + + GetClientRect( WindowFromDC( hdc ), &rect ); + *width = rect.right - rect.left; + *height = rect.bottom - rect.top; + } + else { + *width = GetDeviceCaps( hdc, HORZRES ); + *height = GetDeviceCaps( hdc, VERTRES ); + } +} + +UINT_PTR +stw_get_current_context( void ) +{ + return current_hglrc; +} + +HDC +stw_get_current_dc( void ) +{ + return current_hdc; +} + +BOOL +stw_make_current( + HDC hdc, + UINT_PTR hglrc ) +{ + struct stw_context *ctx; + GET_CURRENT_CONTEXT( glcurctx ); + struct stw_framebuffer *fb; + GLuint width = 0; + GLuint height = 0; + + if (!stw_dev) + return FALSE; + + pipe_mutex_lock( stw_dev->mutex ); + ctx = stw_lookup_context( hglrc ); + pipe_mutex_unlock( stw_dev->mutex ); + + if (ctx == NULL) + return FALSE; + + current_hdc = hdc; + current_hglrc = hglrc; + + if (hdc == NULL || hglrc == 0) { + st_make_current( NULL, NULL, NULL ); + return TRUE; + } + + /* Return if already current. + */ + if (glcurctx != NULL) { + struct stw_context *curctx = (struct stw_context *) glcurctx->DriverCtx; + + if (curctx != NULL && curctx == ctx && ctx->hdc == hdc) + return TRUE; + } + + fb = framebuffer_from_hdc( hdc ); + + if (hdc != NULL) + get_window_size( hdc, &width, &height ); + + /* Lazy creation of framebuffers. + */ + if (fb == NULL && ctx != NULL && hdc != NULL) { + GLvisual *visual = &ctx->st->ctx->Visual; + + fb = framebuffer_create( hdc, visual, width, height ); + if (fb == NULL) + return FALSE; + + fb->dib_hDC = CreateCompatibleDC( hdc ); + fb->hbmDIB = NULL; + fb->pbPixels = NULL; + } + + if (ctx && fb) { + st_make_current( ctx->st, fb->stfb, fb->stfb ); + framebuffer_resize( fb, width, height ); + ctx->hdc = hdc; + ctx->st->pipe->priv = hdc; + } + else { + /* Detach */ + st_make_current( NULL, NULL, NULL ); + } + + return TRUE; +} diff --git a/src/gallium/winsys/drm/intel/dri/intel_winsys_softpipe.h b/src/gallium/state_trackers/wgl/shared/stw_context.h index 5fa14cb749..b289615272 100644 --- a/src/gallium/winsys/drm/intel/dri/intel_winsys_softpipe.h +++ b/src/gallium/state_trackers/wgl/shared/stw_context.h @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -25,15 +25,18 @@ * **************************************************************************/ -#ifndef INTEL_SOFTPIPE_H -#define INTEL_SOFTPIPE_H +#ifndef STW_CONTEXT_H +#define STW_CONTEXT_H -struct pipe_winsys; -struct pipe_context; -struct intel_context; +#include <windows.h> -struct pipe_context * -intel_create_softpipe( struct intel_context *intel, - struct pipe_winsys *winsys ); +struct st_context; -#endif +struct stw_context +{ + struct st_context *st; + HDC hdc; + DWORD color_bits; +}; + +#endif /* STW_CONTEXT_H */ diff --git a/src/gallium/state_trackers/wgl/shared/stw_device.c b/src/gallium/state_trackers/wgl/shared/stw_device.c new file mode 100644 index 0000000000..0dca856d73 --- /dev/null +++ b/src/gallium/state_trackers/wgl/shared/stw_device.c @@ -0,0 +1,152 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <windows.h> + +#include "glapi/glthread.h" +#include "util/u_debug.h" +#include "pipe/p_screen.h" + +#include "shared/stw_device.h" +#include "shared/stw_winsys.h" +#include "shared/stw_pixelformat.h" +#include "shared/stw_public.h" + +#ifdef WIN32_THREADS +extern _glthread_Mutex OneTimeLock; +extern void FreeAllTSD(void); +#endif + + +struct stw_device *stw_dev = NULL; + + +/** + * XXX: Dispatch pipe_screen::flush_front_buffer to our + * stw_winsys::flush_front_buffer. + */ +static void +st_flush_frontbuffer(struct pipe_screen *screen, + struct pipe_surface *surf, + void *context_private ) +{ + const struct stw_winsys *stw_winsys = stw_dev->stw_winsys; + HDC hdc = (HDC)context_private; + + stw_winsys->flush_frontbuffer(screen, surf, hdc); +} + + +boolean +st_init(const struct stw_winsys *stw_winsys) +{ + static struct stw_device stw_dev_storage; + + debug_printf("%s\n", __FUNCTION__); + + assert(!stw_dev); + + stw_dev = &stw_dev_storage; + memset(stw_dev, 0, sizeof(*stw_dev)); + +#ifdef DEBUG + stw_dev->memdbg_no = debug_memory_begin(); +#endif + + stw_dev->stw_winsys = stw_winsys; + +#ifdef WIN32_THREADS + _glthread_INIT_MUTEX(OneTimeLock); +#endif + + stw_dev->screen = stw_winsys->create_screen(); + if(!stw_dev->screen) + goto error1; + + stw_dev->screen->flush_frontbuffer = st_flush_frontbuffer; + + pipe_mutex_init( stw_dev->mutex ); + + pixelformat_init(); + + return TRUE; + +error1: + stw_dev = NULL; + return FALSE; +} + + +void +st_cleanup(void) +{ + UINT_PTR i; + + debug_printf("%s\n", __FUNCTION__); + + if (!stw_dev) + return; + + pipe_mutex_lock( stw_dev->mutex ); + { + /* Ensure all contexts are destroyed */ + for (i = 0; i < STW_CONTEXT_MAX; i++) + if (stw_dev->ctx_array[i].ctx) + stw_delete_context( i + 1 ); + } + pipe_mutex_unlock( stw_dev->mutex ); + + pipe_mutex_destroy( stw_dev->mutex ); + + stw_dev->screen->destroy(stw_dev->screen); + +#ifdef WIN32_THREADS + _glthread_DESTROY_MUTEX(OneTimeLock); + FreeAllTSD(); +#endif + +#ifdef DEBUG + debug_memory_end(stw_dev->memdbg_no); +#endif + + stw_dev = NULL; +} + + +struct stw_context * +stw_lookup_context( UINT_PTR dhglrc ) +{ + if (dhglrc == 0 || + dhglrc >= STW_CONTEXT_MAX) + return NULL; + + if (stw_dev == NULL) + return NULL; + + return stw_dev->ctx_array[dhglrc - 1].ctx; +} + diff --git a/src/gallium/winsys/drm/intel/dri/intel_swapbuffers.h b/src/gallium/state_trackers/wgl/shared/stw_device.h index 46c9bab3af..80da14b84f 100644 --- a/src/gallium/winsys/drm/intel/dri/intel_swapbuffers.h +++ b/src/gallium/state_trackers/wgl/shared/stw_device.h @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -25,23 +25,39 @@ * **************************************************************************/ -#ifndef INTEL_SWAPBUFFERS_H -#define INTEL_SWAPBUFFERS_H +#ifndef STW_DEVICE_H_ +#define STW_DEVICE_H_ -struct pipe_surface; +#include "pipe/p_compiler.h" +#include "pipe/p_thread.h" -extern void intelDisplaySurface(__DRIdrawablePrivate * dPriv, - struct pipe_surface *surf, - const drm_clip_rect_t * rect); +#define STW_CONTEXT_MAX 32 -extern void intelSwapBuffers(__DRIdrawablePrivate * dPriv); -extern void intelCopySubBuffer(__DRIdrawablePrivate * dPriv, - int x, int y, int w, int h); +struct pipe_screen; -extern void intelUpdateWindowSize(__DRIdrawablePrivate *dPriv); +struct stw_device +{ + const struct stw_winsys *stw_winsys; + struct pipe_screen *screen; + + pipe_mutex mutex; + struct { + struct stw_context *ctx; + } ctx_array[STW_CONTEXT_MAX]; + +#ifdef DEBUG + unsigned long memdbg_no; +#endif +}; -#endif /* INTEL_SWAPBUFFERS_H */ +struct stw_context * +stw_lookup_context( UINT_PTR hglrc ); + +extern struct stw_device *stw_dev; + + +#endif /* STW_DEVICE_H_ */ diff --git a/src/gallium/state_trackers/wgl/shared/stw_framebuffer.c b/src/gallium/state_trackers/wgl/shared/stw_framebuffer.c new file mode 100644 index 0000000000..c70b31a488 --- /dev/null +++ b/src/gallium/state_trackers/wgl/shared/stw_framebuffer.c @@ -0,0 +1,212 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <windows.h> + +#include "main/context.h" +#include "pipe/p_format.h" +#include "pipe/p_screen.h" +#include "state_tracker/st_context.h" +#include "state_tracker/st_public.h" +#include "stw_framebuffer.h" +#include "stw_device.h" +#include "stw_public.h" +#include "stw_winsys.h" + + +void +framebuffer_resize( + struct stw_framebuffer *fb, + GLuint width, + GLuint height ) +{ + if (fb->hbmDIB == NULL || fb->stfb->Base.Width != width || fb->stfb->Base.Height != height) { + if (fb->hbmDIB) + DeleteObject( fb->hbmDIB ); + + fb->hbmDIB = CreateCompatibleBitmap( + fb->hDC, + width, + height ); + } + + st_resize_framebuffer( fb->stfb, width, height ); +} + +static struct stw_framebuffer *fb_head = NULL; + +static LRESULT CALLBACK +window_proc( + HWND hWnd, + UINT uMsg, + WPARAM wParam, + LPARAM lParam ) +{ + struct stw_framebuffer *fb; + + for (fb = fb_head; fb != NULL; fb = fb->next) + if (fb->hWnd == hWnd) + break; + assert( fb != NULL ); + + if (uMsg == WM_SIZE && wParam != SIZE_MINIMIZED) + framebuffer_resize( fb, LOWORD( lParam ), HIWORD( lParam ) ); + + return CallWindowProc( fb->WndProc, hWnd, uMsg, wParam, lParam ); +} + +/* Create a new framebuffer object which will correspond to the given HDC. + */ +struct stw_framebuffer * +framebuffer_create( + HDC hdc, + GLvisual *visual, + GLuint width, + GLuint height ) +{ + struct stw_framebuffer *fb; + enum pipe_format colorFormat, depthFormat, stencilFormat; + + fb = CALLOC_STRUCT( stw_framebuffer ); + if (fb == NULL) + return NULL; + + /* Determine PIPE_FORMATs for buffers. + */ + colorFormat = PIPE_FORMAT_A8R8G8B8_UNORM; + + if (visual->depthBits == 0) + depthFormat = PIPE_FORMAT_NONE; + else if (visual->depthBits <= 16) + depthFormat = PIPE_FORMAT_Z16_UNORM; + else if (visual->depthBits <= 24) + depthFormat = PIPE_FORMAT_S8Z24_UNORM; + else + depthFormat = PIPE_FORMAT_Z32_UNORM; + + if (visual->stencilBits == 8) { + if (depthFormat == PIPE_FORMAT_S8Z24_UNORM) + stencilFormat = depthFormat; + else + stencilFormat = PIPE_FORMAT_S8_UNORM; + } + else { + stencilFormat = PIPE_FORMAT_NONE; + } + + fb->stfb = st_create_framebuffer( + visual, + colorFormat, + depthFormat, + stencilFormat, + width, + height, + (void *) fb ); + + fb->cColorBits = GetDeviceCaps( hdc, BITSPIXEL ); + fb->hDC = hdc; + + /* Subclass a window associated with the device context. + */ + fb->hWnd = WindowFromDC( hdc ); + if (fb->hWnd != NULL) { + fb->WndProc = (WNDPROC) SetWindowLong( + fb->hWnd, + GWL_WNDPROC, + (LONG) window_proc ); + } + + fb->next = fb_head; + fb_head = fb; + return fb; +} + +void +framebuffer_destroy( + struct stw_framebuffer *fb ) +{ + struct stw_framebuffer **link = &fb_head; + struct stw_framebuffer *pfb = fb_head; + + while (pfb != NULL) { + if (pfb == fb) { + if (fb->hWnd != NULL) { + SetWindowLong( + fb->hWnd, + GWL_WNDPROC, + (LONG) fb->WndProc ); + } + + *link = fb->next; + FREE( fb ); + return; + } + + link = &pfb->next; + pfb = pfb->next; + } +} + +/* Given an hdc, return the corresponding stw_framebuffer. + */ +struct stw_framebuffer * +framebuffer_from_hdc( + HDC hdc ) +{ + struct stw_framebuffer *fb; + + for (fb = fb_head; fb != NULL; fb = fb->next) + if (fb->hDC == hdc) + return fb; + return NULL; +} + + +BOOL +stw_swap_buffers( + HDC hdc ) +{ + struct stw_framebuffer *fb; + struct pipe_surface *surf; + + fb = framebuffer_from_hdc( hdc ); + if (fb == NULL) + return FALSE; + + /* If we're swapping the buffer associated with the current context + * we have to flush any pending rendering commands first. + */ + st_notify_swapbuffers( fb->stfb ); + + st_get_framebuffer_surface( fb->stfb, ST_SURFACE_BACK_LEFT, &surf ); + + stw_dev->stw_winsys->flush_frontbuffer(stw_dev->screen, + surf, + hdc ); + + return TRUE; +} diff --git a/src/gallium/winsys/drm/intel/dri/intel_reg.h b/src/gallium/state_trackers/wgl/shared/stw_framebuffer.h index 4f33bee438..2e16e421f2 100644 --- a/src/gallium/winsys/drm/intel/dri/intel_reg.h +++ b/src/gallium/state_trackers/wgl/shared/stw_framebuffer.h @@ -1,8 +1,8 @@ /************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -10,11 +10,11 @@ * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. @@ -22,32 +22,50 @@ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * + * **************************************************************************/ +#ifndef STW_FRAMEBUFFER_H +#define STW_FRAMEBUFFER_H -#ifndef _INTEL_REG_H_ -#define _INTEL_REG_H_ +#include "main/mtypes.h" +/* Windows framebuffer, derived from gl_framebuffer. + */ +struct stw_framebuffer +{ + struct st_framebuffer *stfb; + HDC hDC; + int pixelformat; + BYTE cColorBits; + HDC dib_hDC; + HBITMAP hbmDIB; + HBITMAP hOldBitmap; + PBYTE pbPixels; + HWND hWnd; + WNDPROC WndProc; + struct stw_framebuffer *next; +}; -#define BR00_BITBLT_CLIENT 0x40000000 -#define BR00_OP_COLOR_BLT 0x10000000 -#define BR00_OP_SRC_COPY_BLT 0x10C00000 -#define BR13_SOLID_PATTERN 0x80000000 +struct stw_framebuffer * +framebuffer_create( + HDC hdc, + GLvisual *visual, + GLuint width, + GLuint height ); -#define XY_COLOR_BLT_CMD ((2<<29)|(0x50<<22)|0x4) -#define XY_COLOR_BLT_WRITE_ALPHA (1<<21) -#define XY_COLOR_BLT_WRITE_RGB (1<<20) +void +framebuffer_destroy( + struct stw_framebuffer *fb ); -#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6) -#define XY_SRC_COPY_BLT_WRITE_ALPHA (1<<21) -#define XY_SRC_COPY_BLT_WRITE_RGB (1<<20) +void +framebuffer_resize( + struct stw_framebuffer *fb, + GLuint width, + GLuint height ); -#define MI_WAIT_FOR_EVENT ((0x3<<23)) -#define MI_WAIT_FOR_PLANE_B_FLIP (1<<6) -#define MI_WAIT_FOR_PLANE_A_FLIP (1<<2) +struct stw_framebuffer * +framebuffer_from_hdc( + HDC hdc ); -#define MI_BATCH_BUFFER_END (0xA<<23) - - -#endif +#endif /* STW_FRAMEBUFFER_H */ diff --git a/src/gallium/state_trackers/wgl/shared/stw_getprocaddress.c b/src/gallium/state_trackers/wgl/shared/stw_getprocaddress.c new file mode 100644 index 0000000000..ac2d6fc260 --- /dev/null +++ b/src/gallium/state_trackers/wgl/shared/stw_getprocaddress.c @@ -0,0 +1,71 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <windows.h> + +#include "glapi/glapi.h" +#include "stw_arbextensionsstring.h" +#include "stw_arbpixelformat.h" +#include "stw_public.h" + +struct extension_entry +{ + const char *name; + PROC proc; +}; + +#define EXTENTRY(P) { #P, (PROC) P } + +static struct extension_entry extension_entries[] = { + + /* WGL_ARB_extensions_string */ + EXTENTRY( wglGetExtensionsStringARB ), + + /* WGL_ARB_pixel_format */ + EXTENTRY( wglChoosePixelFormatARB ), + EXTENTRY( wglGetPixelFormatAttribfvARB ), + EXTENTRY( wglGetPixelFormatAttribivARB ), + + { NULL, NULL } +}; + +PROC +stw_get_proc_address( + LPCSTR lpszProc ) +{ + struct extension_entry *entry; + + PROC p = (PROC) _glapi_get_proc_address( (const char *) lpszProc ); + if (p) + return p; + + for (entry = extension_entries; entry->name; entry++) + if (strcmp( lpszProc, entry->name ) == 0) + return entry->proc; + + return NULL; +} diff --git a/src/gallium/state_trackers/wgl/shared/stw_pixelformat.c b/src/gallium/state_trackers/wgl/shared/stw_pixelformat.c new file mode 100644 index 0000000000..5cfdd41597 --- /dev/null +++ b/src/gallium/state_trackers/wgl/shared/stw_pixelformat.c @@ -0,0 +1,286 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_debug.h" +#include "stw_pixelformat.h" +#include "stw_public.h" + +#define MAX_PIXELFORMATS 16 + +static struct pixelformat_info pixelformats[MAX_PIXELFORMATS]; +static uint pixelformat_count = 0; +static uint pixelformat_extended_count = 0; + +static uint currentpixelformat = 0; + + +static void +add_standard_pixelformats( + struct pixelformat_info **ppf, + uint flags ) +{ + struct pixelformat_info *pf = *ppf; + struct pixelformat_color_info color24 = { 8, 0, 8, 8, 8, 16 }; + struct pixelformat_alpha_info alpha8 = { 8, 24 }; + struct pixelformat_alpha_info noalpha = { 0, 0 }; + struct pixelformat_depth_info depth24s8 = { 24, 8 }; + struct pixelformat_depth_info depth16 = { 16, 0 }; + + pf->flags = PF_FLAG_DOUBLEBUFFER | flags; + pf->color = color24; + pf->alpha = alpha8; + pf->depth = depth16; + pf++; + + pf->flags = PF_FLAG_DOUBLEBUFFER | flags; + pf->color = color24; + pf->alpha = alpha8; + pf->depth = depth24s8; + pf++; + + pf->flags = PF_FLAG_DOUBLEBUFFER | flags; + pf->color = color24; + pf->alpha = noalpha; + pf->depth = depth16; + pf++; + + pf->flags = PF_FLAG_DOUBLEBUFFER | flags; + pf->color = color24; + pf->alpha = noalpha; + pf->depth = depth24s8; + pf++; + + pf->flags = flags; + pf->color = color24; + pf->alpha = noalpha; + pf->depth = depth16; + pf++; + + pf->flags = flags; + pf->color = color24; + pf->alpha = noalpha; + pf->depth = depth24s8; + pf++; + + *ppf = pf; +} + +void +pixelformat_init( void ) +{ + struct pixelformat_info *pf = pixelformats; + + add_standard_pixelformats( &pf, 0 ); + pixelformat_count = pf - pixelformats; + + add_standard_pixelformats( &pf, PF_FLAG_MULTISAMPLED ); + pixelformat_extended_count = pf - pixelformats; + + assert( pixelformat_extended_count <= MAX_PIXELFORMATS ); +} + +uint +pixelformat_get_count( void ) +{ + return pixelformat_count; +} + +uint +pixelformat_get_extended_count( void ) +{ + return pixelformat_extended_count; +} + +const struct pixelformat_info * +pixelformat_get_info( uint index ) +{ + assert( index < pixelformat_extended_count ); + + return &pixelformats[index]; +} + + +int +stw_pixelformat_describe( + HDC hdc, + int iPixelFormat, + UINT nBytes, + LPPIXELFORMATDESCRIPTOR ppfd ) +{ + uint count; + uint index; + const struct pixelformat_info *pf; + + (void) hdc; + + count = pixelformat_get_extended_count(); + index = (uint) iPixelFormat - 1; + + if (ppfd == NULL) + return count; + if (index >= count || nBytes != sizeof( PIXELFORMATDESCRIPTOR )) + return 0; + + pf = pixelformat_get_info( index ); + + ppfd->nSize = sizeof( PIXELFORMATDESCRIPTOR ); + ppfd->nVersion = 1; + ppfd->dwFlags = PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL; + if (pf->flags & PF_FLAG_DOUBLEBUFFER) + ppfd->dwFlags |= PFD_DOUBLEBUFFER | PFD_SWAP_COPY; + ppfd->iPixelType = PFD_TYPE_RGBA; + ppfd->cColorBits = pf->color.redbits + pf->color.greenbits + pf->color.bluebits; + ppfd->cRedBits = pf->color.redbits; + ppfd->cRedShift = pf->color.redshift; + ppfd->cGreenBits = pf->color.greenbits; + ppfd->cGreenShift = pf->color.greenshift; + ppfd->cBlueBits = pf->color.bluebits; + ppfd->cBlueShift = pf->color.blueshift; + ppfd->cAlphaBits = pf->alpha.alphabits; + ppfd->cAlphaShift = pf->alpha.alphashift; + ppfd->cAccumBits = 0; + ppfd->cAccumRedBits = 0; + ppfd->cAccumGreenBits = 0; + ppfd->cAccumBlueBits = 0; + ppfd->cAccumAlphaBits = 0; + ppfd->cDepthBits = pf->depth.depthbits; + ppfd->cStencilBits = pf->depth.stencilbits; + ppfd->cAuxBuffers = 0; + ppfd->iLayerType = 0; + ppfd->bReserved = 0; + ppfd->dwLayerMask = 0; + ppfd->dwVisibleMask = 0; + ppfd->dwDamageMask = 0; + + return count; +} + +/* Only used by the wgl code, but have it here to avoid exporting the + * pixelformat.h functionality. + */ +int stw_pixelformat_choose( HDC hdc, + CONST PIXELFORMATDESCRIPTOR *ppfd ) +{ + uint count; + uint index; + uint bestindex; + uint bestdelta; + + (void) hdc; + + count = pixelformat_get_count(); + bestindex = count; + bestdelta = 0xffffffff; + + for (index = 0; index < count; index++) { + uint delta = 0; + const struct pixelformat_info *pf = pixelformat_get_info( index ); + + if (!(ppfd->dwFlags & PFD_DOUBLEBUFFER_DONTCARE) && + !!(ppfd->dwFlags & PFD_DOUBLEBUFFER) != + !!(pf->flags & PF_FLAG_DOUBLEBUFFER)) + continue; + + if (ppfd->cColorBits != pf->color.redbits + pf->color.greenbits + pf->color.bluebits) + delta += 8; + + if (ppfd->cDepthBits != pf->depth.depthbits) + delta += 4; + + if (ppfd->cStencilBits != pf->depth.stencilbits) + delta += 2; + + if (ppfd->cAlphaBits != pf->alpha.alphabits) + delta++; + + if (delta < bestdelta) { + bestindex = index; + bestdelta = delta; + if (bestdelta == 0) + break; + } + } + + if (bestindex == count) + return 0; + + return bestindex + 1; +} + + +int +stw_pixelformat_get( + HDC hdc ) +{ + return currentpixelformat; +} + + +BOOL +stw_pixelformat_set( + HDC hdc, + int iPixelFormat ) +{ + uint count; + uint index; + + (void) hdc; + + index = (uint) iPixelFormat - 1; + count = pixelformat_get_extended_count(); + if (index >= count) + return FALSE; + + currentpixelformat = iPixelFormat; + + /* Some applications mistakenly use the undocumented wglSetPixelFormat + * function instead of SetPixelFormat, so we call SetPixelFormat here to + * avoid opengl32.dll's wglCreateContext to fail */ + if (GetPixelFormat(hdc) == 0) { + SetPixelFormat(hdc, iPixelFormat, NULL); + } + + return TRUE; +} + + + +/* XXX: this needs to be turned into queries on pipe_screen or + * stw_winsys. + */ +int +stw_query_sample_buffers( void ) +{ + return 1; +} + +int +stw_query_samples( void ) +{ + return 4; +} + diff --git a/src/gallium/state_trackers/wgl/shared/stw_pixelformat.h b/src/gallium/state_trackers/wgl/shared/stw_pixelformat.h new file mode 100644 index 0000000000..7ca4194a2a --- /dev/null +++ b/src/gallium/state_trackers/wgl/shared/stw_pixelformat.h @@ -0,0 +1,83 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef PIXELFORMAT_H +#define PIXELFORMAT_H + +#include <windows.h> +#include "pipe/p_compiler.h" + +#define PF_FLAG_DOUBLEBUFFER 0x00000001 +#define PF_FLAG_MULTISAMPLED 0x00000002 + +struct pixelformat_color_info +{ + uint redbits; + uint redshift; + uint greenbits; + uint greenshift; + uint bluebits; + uint blueshift; +}; + +struct pixelformat_alpha_info +{ + uint alphabits; + uint alphashift; +}; + +struct pixelformat_depth_info +{ + uint depthbits; + uint stencilbits; +}; + +struct pixelformat_info +{ + uint flags; + struct pixelformat_color_info color; + struct pixelformat_alpha_info alpha; + struct pixelformat_depth_info depth; +}; + +void +pixelformat_init( void ); + +uint +pixelformat_get_count( void ); + +uint +pixelformat_get_extended_count( void ); + +const struct pixelformat_info * +pixelformat_get_info( uint index ); + +int stw_query_sample_buffers( void ); +int stw_query_samples( void ); + + +#endif /* PIXELFORMAT_H */ diff --git a/src/gallium/state_trackers/wgl/shared/stw_public.h b/src/gallium/state_trackers/wgl/shared/stw_public.h new file mode 100644 index 0000000000..39d377c16b --- /dev/null +++ b/src/gallium/state_trackers/wgl/shared/stw_public.h @@ -0,0 +1,68 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef STW_PUBLIC_H +#define STW_PUBLIC_H + +#include <windows.h> + +BOOL stw_copy_context( UINT_PTR hglrcSrc, + UINT_PTR hglrcDst, + UINT mask ); + +UINT_PTR stw_create_layer_context( HDC hdc, + int iLayerPlane ); + +BOOL stw_delete_context( UINT_PTR hglrc ); + +BOOL +stw_release_context( UINT_PTR dhglrc ); + +UINT_PTR stw_get_current_context( void ); + +HDC stw_get_current_dc( void ); + +BOOL stw_make_current( HDC hdc, UINT_PTR hglrc ); + +BOOL stw_swap_buffers( HDC hdc ); + +PROC stw_get_proc_address( LPCSTR lpszProc ); + +int stw_pixelformat_describe( HDC hdc, + int iPixelFormat, + UINT nBytes, + LPPIXELFORMATDESCRIPTOR ppfd ); + +int stw_pixelformat_get( HDC hdc ); + +BOOL stw_pixelformat_set( HDC hdc, + int iPixelFormat ); + +int stw_pixelformat_choose( HDC hdc, + CONST PIXELFORMATDESCRIPTOR *ppfd ); + +#endif diff --git a/src/gallium/state_trackers/wgl/shared/stw_quirks.c b/src/gallium/state_trackers/wgl/shared/stw_quirks.c new file mode 100644 index 0000000000..0961ce3bb0 --- /dev/null +++ b/src/gallium/state_trackers/wgl/shared/stw_quirks.c @@ -0,0 +1,113 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * + * This is hopefully a temporary hack to define some needed dispatch + * table entries. Hopefully, I'll find a better solution. The + * dispatch table generation scripts ought to be making these dummy + * stubs as well. + */ + +void gl_dispatch_stub_543(void){} +void gl_dispatch_stub_544(void){} +void gl_dispatch_stub_545(void){} +void gl_dispatch_stub_546(void){} +void gl_dispatch_stub_547(void){} +void gl_dispatch_stub_548(void){} +void gl_dispatch_stub_549(void){} +void gl_dispatch_stub_550(void){} +void gl_dispatch_stub_551(void){} +void gl_dispatch_stub_552(void){} +void gl_dispatch_stub_553(void){} +void gl_dispatch_stub_554(void){} +void gl_dispatch_stub_555(void){} +void gl_dispatch_stub_556(void){} +void gl_dispatch_stub_557(void){} +void gl_dispatch_stub_558(void){} +void gl_dispatch_stub_559(void){} +void gl_dispatch_stub_560(void){} +void gl_dispatch_stub_561(void){} +void gl_dispatch_stub_565(void){} +void gl_dispatch_stub_566(void){} +void gl_dispatch_stub_570(void){} +void gl_dispatch_stub_577(void){} +void gl_dispatch_stub_578(void){} +void gl_dispatch_stub_582(void){} +void gl_dispatch_stub_603(void){} +void gl_dispatch_stub_607(void){} +void gl_dispatch_stub_645(void){} +void gl_dispatch_stub_646(void){} +void gl_dispatch_stub_647(void){} +void gl_dispatch_stub_648(void){} +void gl_dispatch_stub_649(void){} +void gl_dispatch_stub_650(void){} +void gl_dispatch_stub_651(void){} +void gl_dispatch_stub_652(void){} +void gl_dispatch_stub_653(void){} +void gl_dispatch_stub_657(void){} +void gl_dispatch_stub_733(void){} +void gl_dispatch_stub_734(void){} +void gl_dispatch_stub_735(void){} +void gl_dispatch_stub_736(void){} +void gl_dispatch_stub_737(void){} +void gl_dispatch_stub_738(void){} +void gl_dispatch_stub_744(void){} +void gl_dispatch_stub_745(void){} +void gl_dispatch_stub_746(void){} +void gl_dispatch_stub_760(void){} +void gl_dispatch_stub_761(void){} +void gl_dispatch_stub_763(void){} +void gl_dispatch_stub_764(void){} +void gl_dispatch_stub_765(void){} +void gl_dispatch_stub_766(void){} +void gl_dispatch_stub_767(void){} +void gl_dispatch_stub_768(void){} + +void gl_dispatch_stub_562(void){} +void gl_dispatch_stub_563(void){} +void gl_dispatch_stub_564(void){} +void gl_dispatch_stub_567(void){} +void gl_dispatch_stub_568(void){} +void gl_dispatch_stub_569(void){} +void gl_dispatch_stub_580(void){} +void gl_dispatch_stub_581(void){} +void gl_dispatch_stub_606(void){} +void gl_dispatch_stub_654(void){} +void gl_dispatch_stub_655(void){} +void gl_dispatch_stub_656(void){} +void gl_dispatch_stub_739(void){} +void gl_dispatch_stub_740(void){} +void gl_dispatch_stub_741(void){} +void gl_dispatch_stub_748(void){} +void gl_dispatch_stub_749(void){} +void gl_dispatch_stub_769(void){} +void gl_dispatch_stub_770(void){} +void gl_dispatch_stub_771(void){} +void gl_dispatch_stub_772(void){} +void gl_dispatch_stub_773(void){} diff --git a/src/gallium/state_trackers/wgl/shared/stw_winsys.h b/src/gallium/state_trackers/wgl/shared/stw_winsys.h new file mode 100644 index 0000000000..a85a9a2257 --- /dev/null +++ b/src/gallium/state_trackers/wgl/shared/stw_winsys.h @@ -0,0 +1,59 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef STW_WINSYS_H +#define STW_WINSYS_H + +#include <windows.h> /* for HDC */ + +#include "pipe/p_compiler.h" + +struct pipe_screen; +struct pipe_context; +struct pipe_surface; + +struct stw_winsys +{ + struct pipe_screen * + (*create_screen)( void ); + + struct pipe_context * + (*create_context)( struct pipe_screen *screen ); + + void + (*flush_frontbuffer)( struct pipe_screen *screen, + struct pipe_surface *surf, + HDC hDC ); +}; + +boolean +st_init(const struct stw_winsys *stw_winsys); + +void +st_cleanup(void); + +#endif /* STW_WINSYS_H */ diff --git a/src/gallium/state_trackers/wgl/wgl/stw_wgl.c b/src/gallium/state_trackers/wgl/wgl/stw_wgl.c new file mode 100644 index 0000000000..e06d2640b4 --- /dev/null +++ b/src/gallium/state_trackers/wgl/wgl/stw_wgl.c @@ -0,0 +1,337 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <windows.h> + +#include "util/u_debug.h" +#include "shared/stw_public.h" +#include "stw_wgl.h" + + +WINGDIAPI BOOL APIENTRY +wglCopyContext( + HGLRC hglrcSrc, + HGLRC hglrcDst, + UINT mask ) +{ + return stw_copy_context( (UINT_PTR)hglrcSrc, + (UINT_PTR)hglrcDst, + mask ); +} + +WINGDIAPI HGLRC APIENTRY +wglCreateContext( + HDC hdc ) +{ + return wglCreateLayerContext(hdc, 0); +} + +WINGDIAPI HGLRC APIENTRY +wglCreateLayerContext( + HDC hdc, + int iLayerPlane ) +{ + return (HGLRC) stw_create_layer_context( hdc, iLayerPlane ); +} + +WINGDIAPI BOOL APIENTRY +wglDeleteContext( + HGLRC hglrc ) +{ + return stw_delete_context( (UINT_PTR)hglrc ); +} + + +WINGDIAPI HGLRC APIENTRY +wglGetCurrentContext( VOID ) +{ + return (HGLRC)stw_get_current_context(); +} + +WINGDIAPI HDC APIENTRY +wglGetCurrentDC( VOID ) +{ + return stw_get_current_dc(); +} + +WINGDIAPI BOOL APIENTRY +wglMakeCurrent( + HDC hdc, + HGLRC hglrc ) +{ + return stw_make_current( hdc, (UINT_PTR)hglrc ); +} + + +WINGDIAPI BOOL APIENTRY +wglSwapBuffers( + HDC hdc ) +{ + return stw_swap_buffers( hdc ); +} + + +WINGDIAPI BOOL APIENTRY +wglSwapLayerBuffers( + HDC hdc, + UINT fuPlanes ) +{ + (void) hdc; + (void) fuPlanes; + + return FALSE; +} + +WINGDIAPI PROC APIENTRY +wglGetProcAddress( + LPCSTR lpszProc ) +{ + return stw_get_proc_address( lpszProc ); +} + + +WINGDIAPI int APIENTRY +wglChoosePixelFormat( + HDC hdc, + CONST PIXELFORMATDESCRIPTOR *ppfd ) +{ + if (ppfd->nSize != sizeof( PIXELFORMATDESCRIPTOR ) || ppfd->nVersion != 1) + return 0; + if (ppfd->iPixelType != PFD_TYPE_RGBA) + return 0; + if (!(ppfd->dwFlags & PFD_DRAW_TO_WINDOW)) + return 0; + if (!(ppfd->dwFlags & PFD_SUPPORT_OPENGL)) + return 0; + if (ppfd->dwFlags & PFD_DRAW_TO_BITMAP) + return 0; + if (ppfd->dwFlags & PFD_SUPPORT_GDI) + return 0; + if (!(ppfd->dwFlags & PFD_STEREO_DONTCARE) && (ppfd->dwFlags & PFD_STEREO)) + return 0; + + return stw_pixelformat_choose( hdc, ppfd ); +} + +WINGDIAPI int APIENTRY +wglDescribePixelFormat( + HDC hdc, + int iPixelFormat, + UINT nBytes, + LPPIXELFORMATDESCRIPTOR ppfd ) +{ + return stw_pixelformat_describe( hdc, iPixelFormat, nBytes, ppfd ); +} + +WINGDIAPI int APIENTRY +wglGetPixelFormat( + HDC hdc ) +{ + return stw_pixelformat_get( hdc ); +} + +WINGDIAPI BOOL APIENTRY +wglSetPixelFormat( + HDC hdc, + int iPixelFormat, + const PIXELFORMATDESCRIPTOR *ppfd ) +{ + if (ppfd->nSize != sizeof( PIXELFORMATDESCRIPTOR )) + return FALSE; + + return stw_pixelformat_set( hdc, iPixelFormat ); +} + + +WINGDIAPI BOOL APIENTRY +wglUseFontBitmapsA( + HDC hdc, + DWORD first, + DWORD count, + DWORD listBase ) +{ + (void) hdc; + (void) first; + (void) count; + (void) listBase; + + assert( 0 ); + + return FALSE; +} + +WINGDIAPI BOOL APIENTRY +wglShareLists( + HGLRC hglrc1, + HGLRC hglrc2 ) +{ + (void) hglrc1; + (void) hglrc2; + + assert( 0 ); + + return FALSE; +} + +WINGDIAPI BOOL APIENTRY +wglUseFontBitmapsW( + HDC hdc, + DWORD first, + DWORD count, + DWORD listBase ) +{ + (void) hdc; + (void) first; + (void) count; + (void) listBase; + + assert( 0 ); + + return FALSE; +} + +WINGDIAPI BOOL APIENTRY +wglUseFontOutlinesA( + HDC hdc, + DWORD first, + DWORD count, + DWORD listBase, + FLOAT deviation, + FLOAT extrusion, + int format, + LPGLYPHMETRICSFLOAT lpgmf ) +{ + (void) hdc; + (void) first; + (void) count; + (void) listBase; + (void) deviation; + (void) extrusion; + (void) format; + (void) lpgmf; + + assert( 0 ); + + return FALSE; +} + +WINGDIAPI BOOL APIENTRY +wglUseFontOutlinesW( + HDC hdc, + DWORD first, + DWORD count, + DWORD listBase, + FLOAT deviation, + FLOAT extrusion, + int format, + LPGLYPHMETRICSFLOAT lpgmf ) +{ + (void) hdc; + (void) first; + (void) count; + (void) listBase; + (void) deviation; + (void) extrusion; + (void) format; + (void) lpgmf; + + assert( 0 ); + + return FALSE; +} + +WINGDIAPI BOOL APIENTRY +wglDescribeLayerPlane( + HDC hdc, + int iPixelFormat, + int iLayerPlane, + UINT nBytes, + LPLAYERPLANEDESCRIPTOR plpd ) +{ + (void) hdc; + (void) iPixelFormat; + (void) iLayerPlane; + (void) nBytes; + (void) plpd; + + assert( 0 ); + + return FALSE; +} + +WINGDIAPI int APIENTRY +wglSetLayerPaletteEntries( + HDC hdc, + int iLayerPlane, + int iStart, + int cEntries, + CONST COLORREF *pcr ) +{ + (void) hdc; + (void) iLayerPlane; + (void) iStart; + (void) cEntries; + (void) pcr; + + assert( 0 ); + + return 0; +} + +WINGDIAPI int APIENTRY +wglGetLayerPaletteEntries( + HDC hdc, + int iLayerPlane, + int iStart, + int cEntries, + COLORREF *pcr ) +{ + (void) hdc; + (void) iLayerPlane; + (void) iStart; + (void) cEntries; + (void) pcr; + + assert( 0 ); + + return 0; +} + +WINGDIAPI BOOL APIENTRY +wglRealizeLayerPalette( + HDC hdc, + int iLayerPlane, + BOOL bRealize ) +{ + (void) hdc; + (void) iLayerPlane; + (void) bRealize; + + assert( 0 ); + + return FALSE; +} diff --git a/src/gallium/state_trackers/wgl/wgl/stw_wgl.h b/src/gallium/state_trackers/wgl/wgl/stw_wgl.h new file mode 100644 index 0000000000..a98179944a --- /dev/null +++ b/src/gallium/state_trackers/wgl/wgl/stw_wgl.h @@ -0,0 +1,63 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef STW_WGL_H_ +#define STW_WGL_H_ + + +#include <windows.h> + +#include <GL/gl.h> + + +/* + * Undeclared APIs exported by opengl32.dll + */ + +WINGDIAPI BOOL WINAPI +wglSwapBuffers(HDC hdc); + +WINGDIAPI int WINAPI +wglChoosePixelFormat(HDC hdc, + CONST PIXELFORMATDESCRIPTOR *ppfd); + +WINGDIAPI int WINAPI +wglDescribePixelFormat(HDC hdc, + int iPixelFormat, + UINT nBytes, + LPPIXELFORMATDESCRIPTOR ppfd); + +WINGDIAPI int WINAPI +wglGetPixelFormat(HDC hdc); + +WINGDIAPI BOOL WINAPI +wglSetPixelFormat(HDC hdc, + int iPixelFormat, + CONST PIXELFORMATDESCRIPTOR *ppfd); + + +#endif /* STW_WGL_H_ */ diff --git a/src/gallium/winsys/Makefile b/src/gallium/winsys/Makefile index 2360a6a94a..bce5b3f9e0 100644 --- a/src/gallium/winsys/Makefile +++ b/src/gallium/winsys/Makefile @@ -1,24 +1,12 @@ +# src/gallium/winsys/Makefile TOP = ../../.. include $(TOP)/configs/current - SUBDIRS = $(GALLIUM_WINSYS_DIRS) - -default: subdirs - - -subdirs: +default install clean: @for dir in $(SUBDIRS) ; do \ if [ -d $$dir ] ; then \ - (cd $$dir && $(MAKE)) || exit 1 ; \ + (cd $$dir && $(MAKE) $@) || exit 1; \ fi \ done - - -clean: - rm -f `find . -name \*.[oa]` - - -# Dummy install target -install: diff --git a/src/gallium/winsys/drm/Makefile b/src/gallium/winsys/drm/Makefile index f466ce6c3c..fee0191643 100644 --- a/src/gallium/winsys/drm/Makefile +++ b/src/gallium/winsys/drm/Makefile @@ -1,38 +1,12 @@ -# src/mesa/drivers/dri/Makefile - +# src/gallium/winsys/Makefile TOP = ../../../.. - include $(TOP)/configs/current +SUBDIRS = $(GALLIUM_WINSYS_DRM_DIRS) - -default: $(TOP)/$(LIB_DIR) subdirs - - -$(TOP)/$(LIB_DIR): - -mkdir $(TOP)/$(LIB_DIR) - - -subdirs: - @for dir in $(DRI_DIRS) ; do \ - if [ -d $$dir ] ; then \ - (cd $$dir && $(MAKE)) || exit 1 ; \ - fi \ - done - - -install: - @for dir in $(DRI_DIRS) ; do \ - if [ -d $$dir ] ; then \ - (cd $$dir && $(MAKE) install) || exit 1 ; \ - fi \ - done - - -clean: - @for dir in $(DRI_DIRS) ; do \ +default install clean: + @for dir in $(SUBDIRS) ; do \ if [ -d $$dir ] ; then \ - (cd $$dir && $(MAKE) clean) ; \ + (cd $$dir && $(MAKE) $@) || exit 1; \ fi \ done - -rm -f common/*.o diff --git a/src/gallium/winsys/drm/Makefile.template b/src/gallium/winsys/drm/Makefile.template index 80e817b808..9f92cb4207 100644 --- a/src/gallium/winsys/drm/Makefile.template +++ b/src/gallium/winsys/drm/Makefile.template @@ -1,9 +1,9 @@ # -*-makefile-*- MESA_MODULES = \ - $(TOP)/src/mesa/libmesa.a \ + $(TOP)/src/mesa/libmesagallium.a \ $(GALLIUM_AUXILIARIES) - + COMMON_GALLIUM_SOURCES = \ $(TOP)/src/mesa/drivers/dri/common/utils.c \ $(TOP)/src/mesa/drivers/dri/common/vblank.c \ @@ -79,25 +79,24 @@ SHARED_INCLUDES = \ ##### TARGETS ##### -default: depend symlinks $(LIBNAME) $(TOP)/$(LIB_DIR)/$(LIBNAME) $(LIBNAME_EGL) $(TOP)/$(LIB_DIR)/$(LIBNAME_EGL) - +default: depend symlinks $(TOP)/$(LIB_DIR)/gallium/$(LIBNAME) $(LIBNAME): $(OBJECTS) $(MESA_MODULES) $(PIPE_DRIVERS) $(WINOBJ) Makefile $(TOP)/src/mesa/drivers/dri/Makefile.template - $(TOP)/bin/mklib -noprefix -o $@ \ - $(OBJECTS) $(PIPE_DRIVERS) $(MESA_MODULES) $(WINOBJ) $(DRI_LIB_DEPS) + $(MKLIB) -noprefix -o $@ \ + $(OBJECTS) $(PIPE_DRIVERS) $(MESA_MODULES) $(WINOBJ) $(DRI_LIB_DEPS) $(DRIVER_EXTRAS) $(LIBNAME_EGL): $(WINSYS_OBJECTS) $(LIBS) - $(TOP)/bin/mklib -o $(LIBNAME_EGL) \ + $(MKLIB) -o $(LIBNAME_EGL) \ -linker "$(CC)" \ -noprefix \ $(OBJECTS) $(MKLIB_OPTIONS) $(WINSYS_OBJECTS) $(PIPE_DRIVERS) $(WINOBJ) $(DRI_LIB_DEPS) \ - --whole-archive $(LIBS) $(GALLIUM_AUXILIARIES) --no-whole-archive + --whole-archive $(LIBS) $(GALLIUM_AUXILIARIES) --no-whole-archive $(DRIVER_EXTRAS) -$(TOP)/$(LIB_DIR)/$(LIBNAME): $(LIBNAME) - $(INSTALL) $(LIBNAME) $(TOP)/$(LIB_DIR) +$(TOP)/$(LIB_DIR)/gallium: + mkdir -p $@ -$(TOP)/$(LIB_DIR)/$(LIBNAME_EGL): $(LIBNAME_EGL) - $(INSTALL) $(LIBNAME_EGL) $(TOP)/$(LIB_DIR) +$(TOP)/$(LIB_DIR)/gallium/$(LIBNAME): $(LIBNAME) $(TOP)/$(LIB_DIR)/gallium + $(INSTALL) $(LIBNAME) $(TOP)/$(LIB_DIR)/gallium depend: $(C_SOURCES) $(ASM_SOURCES) $(SYMLINKS) rm -f depend @@ -118,8 +117,8 @@ clean: install: $(LIBNAME) - $(INSTALL) -d $(DRI_DRIVER_INSTALL_DIR) - $(INSTALL) -m 755 $(LIBNAME) $(DRI_DRIVER_INSTALL_DIR) + $(INSTALL) -d $(DESTDIR)$(DRI_DRIVER_INSTALL_DIR) + $(INSTALL) -m 755 $(LIBNAME) $(DESTDIR)$(DRI_DRIVER_INSTALL_DIR) include depend diff --git a/src/gallium/winsys/drm/intel/Makefile b/src/gallium/winsys/drm/intel/Makefile index a670ac044d..d8feef6824 100644 --- a/src/gallium/winsys/drm/intel/Makefile +++ b/src/gallium/winsys/drm/intel/Makefile @@ -1,25 +1,12 @@ +# src/gallium/winsys/drm/intel/Makefile TOP = ../../../../.. include $(TOP)/configs/current +SUBDIRS = gem $(GALLIUM_STATE_TRACKERS_DIRS) -SUBDIRS = common dri egl - - -default: subdirs - - -subdirs: +default install clean: @for dir in $(SUBDIRS) ; do \ if [ -d $$dir ] ; then \ - (cd $$dir && $(MAKE)) || exit 1 ; \ + (cd $$dir && $(MAKE) $@) || exit 1; \ fi \ done - - -clean: - rm -f `find . -name \*.[oa]` - rm -f `find . -name depend` - - -# Dummy install target -install: diff --git a/src/gallium/winsys/drm/intel/common/Makefile b/src/gallium/winsys/drm/intel/common/Makefile deleted file mode 100644 index bf1a7d691f..0000000000 --- a/src/gallium/winsys/drm/intel/common/Makefile +++ /dev/null @@ -1,23 +0,0 @@ -TOP = ../../../../../.. -include $(TOP)/configs/current - -LIBNAME = inteldrm - -C_SOURCES = \ - intel_be_batchbuffer.c \ - intel_be_context.c \ - intel_be_device.c \ - ws_dri_bufmgr.c \ - ws_dri_drmpool.c \ - ws_dri_fencemgr.c \ - ws_dri_mallocpool.c \ - ws_dri_slabpool.c - - -include ./Makefile.template - -DRIVER_DEFINES = $(shell pkg-config libdrm --cflags \ - && pkg-config libdrm --atleast-version=2.3.1 \ - && echo "-DDRM_VBLANK_FLIP=DRM_VBLANK_FLIP") -symlinks: - diff --git a/src/gallium/winsys/drm/intel/common/Makefile.template b/src/gallium/winsys/drm/intel/common/Makefile.template deleted file mode 100644 index 02ed363a43..0000000000 --- a/src/gallium/winsys/drm/intel/common/Makefile.template +++ /dev/null @@ -1,64 +0,0 @@ -# -*-makefile-*- - - -# We still have a dependency on the "dri" buffer manager. Most likely -# the interface can be reused in non-dri environments, and also as a -# frontend to simpler memory managers. -# -COMMON_SOURCES = - -OBJECTS = $(C_SOURCES:.c=.o) \ - $(CPP_SOURCES:.cpp=.o) \ - $(ASM_SOURCES:.S=.o) - - -### Include directories -INCLUDES = \ - -I. \ - -I$(TOP)/src/gallium/include \ - -I$(TOP)/src/gallium/auxiliary \ - -I$(TOP)/src/gallium/drivers \ - -I$(TOP)/include \ - $(DRIVER_INCLUDES) - - -##### RULES ##### - -.c.o: - $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ - -.cpp.o: - $(CXX) -c $(INCLUDES) $(CXXFLAGS) $(DRIVER_DEFINES) $< -o $@ - -.S.o: - $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ - - -##### TARGETS ##### - -default: depend symlinks $(LIBNAME) - - -$(LIBNAME): $(OBJECTS) Makefile Makefile.template - $(TOP)/bin/mklib -o $@ -static $(OBJECTS) $(DRIVER_LIBS) - - -depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(SYMLINKS) - rm -f depend - touch depend - $(MKDEP) $(MKDEP_OPTIONS) $(DRIVER_DEFINES) $(INCLUDES) $(C_SOURCES) $(CPP_SOURCES) \ - $(ASM_SOURCES) 2> /dev/null - - -# Emacs tags -tags: - etags `find . -name \*.[ch]` `find ../include` - - -# Remove .o and backup files -clean:: - -rm -f *.o */*.o *~ *.so *~ server/*.o $(SYMLINKS) - -rm -f depend depend.bak - - -include depend diff --git a/src/gallium/winsys/drm/intel/common/intel_be_batchbuffer.c b/src/gallium/winsys/drm/intel/common/intel_be_batchbuffer.c deleted file mode 100644 index bc13a5761e..0000000000 --- a/src/gallium/winsys/drm/intel/common/intel_be_batchbuffer.c +++ /dev/null @@ -1,429 +0,0 @@ - -#include "intel_be_batchbuffer.h" -#include "intel_be_context.h" -#include "intel_be_device.h" -#include <errno.h> - -#include "xf86drm.h" - -static void -intel_realloc_relocs(struct intel_be_batchbuffer *batch, int num_relocs) -{ - unsigned long size = num_relocs * I915_RELOC0_STRIDE + I915_RELOC_HEADER; - - size *= sizeof(uint32_t); - batch->reloc = realloc(batch->reloc, size); - batch->reloc_size = num_relocs; -} - - -void -intel_be_batchbuffer_reset(struct intel_be_batchbuffer *batch) -{ - /* - * Get a new, free batchbuffer. - */ - drmBO *bo; - struct drm_bo_info_req *req; - - driBOUnrefUserList(batch->list); - driBOResetList(batch->list); - - /* base.size is the size available to the i915simple driver */ - batch->base.size = batch->device->max_batch_size - BATCH_RESERVED; - batch->base.actual_size = batch->device->max_batch_size; - driBOData(batch->buffer, batch->base.actual_size, NULL, NULL, 0); - - /* - * Add the batchbuffer to the validate list. - */ - - driBOAddListItem(batch->list, batch->buffer, - DRM_BO_FLAG_EXE | DRM_BO_FLAG_MEM_TT, - DRM_BO_FLAG_EXE | DRM_BO_MASK_MEM, - &batch->dest_location, &batch->node); - - req = &batch->node->bo_arg.d.req.bo_req; - - /* - * Set up information needed for us to make relocations - * relative to the underlying drm buffer objects. - */ - - driReadLockKernelBO(); - bo = driBOKernel(batch->buffer); - req->presumed_offset = (uint64_t) bo->offset; - req->hint = DRM_BO_HINT_PRESUMED_OFFSET; - batch->drmBOVirtual = (uint8_t *) bo->virtual; - driReadUnlockKernelBO(); - - /* - * Adjust the relocation buffer size. - */ - - if (batch->reloc_size > INTEL_MAX_RELOCS || - batch->reloc == NULL) - intel_realloc_relocs(batch, INTEL_DEFAULT_RELOCS); - - assert(batch->reloc != NULL); - batch->reloc[0] = 0; /* No relocs yet. */ - batch->reloc[1] = 1; /* Reloc type 1 */ - batch->reloc[2] = 0; /* Only a single relocation list. */ - batch->reloc[3] = 0; /* Only a single relocation list. */ - - batch->base.map = driBOMap(batch->buffer, DRM_BO_FLAG_WRITE, 0); - batch->poolOffset = driBOPoolOffset(batch->buffer); - batch->base.ptr = batch->base.map; - batch->dirty_state = ~0; - batch->nr_relocs = 0; - batch->flags = 0; - batch->id = 0;//batch->intel->intelScreen->batch_id++; -} - -/*====================================================================== - * Public functions - */ -struct intel_be_batchbuffer * -intel_be_batchbuffer_alloc(struct intel_be_context *intel) -{ - struct intel_be_batchbuffer *batch = calloc(sizeof(*batch), 1); - - batch->intel = intel; - batch->device = intel->device; - - driGenBuffers(intel->device->batchPool, "batchbuffer", 1, - &batch->buffer, 4096, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_EXE, 0); - batch->last_fence = NULL; - batch->list = driBOCreateList(20); - batch->reloc = NULL; - intel_be_batchbuffer_reset(batch); - return batch; -} - -void -intel_be_batchbuffer_free(struct intel_be_batchbuffer *batch) -{ - if (batch->last_fence) { - driFenceFinish(batch->last_fence, - DRM_FENCE_TYPE_EXE, FALSE); - driFenceUnReference(&batch->last_fence); - } - if (batch->base.map) { - driBOUnmap(batch->buffer); - batch->base.map = NULL; - } - driBOUnReference(batch->buffer); - driBOFreeList(batch->list); - if (batch->reloc) - free(batch->reloc); - batch->buffer = NULL; - free(batch); -} - -void -intel_be_offset_relocation(struct intel_be_batchbuffer *batch, - unsigned pre_add, - struct _DriBufferObject *driBO, - uint64_t val_flags, - uint64_t val_mask) -{ - int itemLoc; - struct _drmBONode *node; - uint32_t *reloc; - struct drm_bo_info_req *req; - - driBOAddListItem(batch->list, driBO, val_flags, val_mask, - &itemLoc, &node); - req = &node->bo_arg.d.req.bo_req; - - if (!(req->hint & DRM_BO_HINT_PRESUMED_OFFSET)) { - - /* - * Stop other threads from tampering with the underlying - * drmBO while we're reading its offset. - */ - - driReadLockKernelBO(); - req->presumed_offset = (uint64_t) driBOKernel(driBO)->offset; - driReadUnlockKernelBO(); - req->hint = DRM_BO_HINT_PRESUMED_OFFSET; - } - - pre_add += driBOPoolOffset(driBO); - - if (batch->nr_relocs == batch->reloc_size) - intel_realloc_relocs(batch, batch->reloc_size * 2); - - reloc = batch->reloc + - (I915_RELOC_HEADER + batch->nr_relocs * I915_RELOC0_STRIDE); - - reloc[0] = ((uint8_t *)batch->base.ptr - batch->drmBOVirtual); - i915_batchbuffer_dword(&batch->base, req->presumed_offset + pre_add); - reloc[1] = pre_add; - reloc[2] = itemLoc; - reloc[3] = batch->dest_location; - batch->nr_relocs++; -} - -static void -i915_drm_copy_reply(const struct drm_bo_info_rep * rep, drmBO * buf) -{ - buf->handle = rep->handle; - buf->flags = rep->flags; - buf->size = rep->size; - buf->offset = rep->offset; - buf->mapHandle = rep->arg_handle; - buf->proposedFlags = rep->proposed_flags; - buf->start = rep->buffer_start; - buf->fenceFlags = rep->fence_flags; - buf->replyFlags = rep->rep_flags; - buf->pageAlignment = rep->page_alignment; -} - -static int -i915_execbuf(struct intel_be_batchbuffer *batch, - unsigned int used, - boolean ignore_cliprects, - drmBOList *list, - struct drm_i915_execbuffer *ea) -{ -// struct intel_be_context *intel = batch->intel; - drmBONode *node; - drmMMListHead *l; - struct drm_i915_op_arg *arg, *first; - struct drm_bo_op_req *req; - struct drm_bo_info_rep *rep; - uint64_t *prevNext = NULL; - drmBO *buf; - int ret = 0; - uint32_t count = 0; - - first = NULL; - for (l = list->list.next; l != &list->list; l = l->next) { - node = DRMLISTENTRY(drmBONode, l, head); - - arg = &node->bo_arg; - req = &arg->d.req; - - if (!first) - first = arg; - - if (prevNext) - *prevNext = (unsigned long)arg; - - prevNext = &arg->next; - req->bo_req.handle = node->buf->handle; - req->op = drm_bo_validate; - req->bo_req.flags = node->arg0; - req->bo_req.mask = node->arg1; - req->bo_req.hint |= 0; - count++; - } - - memset(ea, 0, sizeof(*ea)); - ea->num_buffers = count; - ea->batch.start = batch->poolOffset; - ea->batch.used = used; -#if 0 /* ZZZ JB: no cliprects used */ - ea->batch.cliprects = intel->pClipRects; - ea->batch.num_cliprects = ignore_cliprects ? 0 : intel->numClipRects; - ea->batch.DR1 = 0; - ea->batch.DR4 = 0;((((GLuint) intel->drawX) & 0xffff) | - (((GLuint) intel->drawY) << 16)); -#else - ea->batch.cliprects = NULL; - ea->batch.num_cliprects = 0; - ea->batch.DR1 = 0; - ea->batch.DR4 = 0; -#endif - ea->fence_arg.flags = DRM_I915_FENCE_FLAG_FLUSHED; - ea->ops_list = (unsigned long) first; - first->reloc_ptr = (unsigned long) batch->reloc; - batch->reloc[0] = batch->nr_relocs; - - //return -EFAULT; - do { - ret = drmCommandWriteRead(batch->device->fd, DRM_I915_EXECBUFFER, ea, - sizeof(*ea)); - } while (ret == -EAGAIN); - - if (ret != 0) - return ret; - - for (l = list->list.next; l != &list->list; l = l->next) { - node = DRMLISTENTRY(drmBONode, l, head); - arg = &node->bo_arg; - rep = &arg->d.rep.bo_info; - - if (!arg->handled) { - return -EFAULT; - } - if (arg->d.rep.ret) - return arg->d.rep.ret; - - buf = node->buf; - i915_drm_copy_reply(rep, buf); - } - return 0; -} - -/* TODO: Push this whole function into bufmgr. - */ -static struct _DriFenceObject * -do_flush_locked(struct intel_be_batchbuffer *batch, - unsigned int used, - boolean ignore_cliprects, boolean allow_unlock) -{ - struct intel_be_context *intel = batch->intel; - struct _DriFenceObject *fo; - drmFence fence; - drmBOList *boList; - struct drm_i915_execbuffer ea; - int ret = 0; - - driBOValidateUserList(batch->list); - boList = driGetdrmBOList(batch->list); - -#if 0 /* ZZZ JB Allways run */ - if (!(intel->numClipRects == 0 && !ignore_cliprects)) { -#else - if (1) { -#endif - ret = i915_execbuf(batch, used, ignore_cliprects, boList, &ea); - } else { - driPutdrmBOList(batch->list); - fo = NULL; - goto out; - } - driPutdrmBOList(batch->list); - if (ret) - abort(); - - if (ea.fence_arg.error != 0) { - - /* - * The hardware has been idled by the kernel. - * Don't fence the driBOs. - */ - - if (batch->last_fence) - driFenceUnReference(&batch->last_fence); -#if 0 /* ZZZ JB: no _mesa_* funcs in gallium */ - _mesa_printf("fence error\n"); -#endif - batch->last_fence = NULL; - fo = NULL; - goto out; - } - - fence.handle = ea.fence_arg.handle; - fence.fence_class = ea.fence_arg.fence_class; - fence.type = ea.fence_arg.type; - fence.flags = ea.fence_arg.flags; - fence.signaled = ea.fence_arg.signaled; - - fo = driBOFenceUserList(batch->device->fenceMgr, batch->list, - "SuperFence", &fence); - - if (driFenceType(fo) & DRM_I915_FENCE_TYPE_RW) { - if (batch->last_fence) - driFenceUnReference(&batch->last_fence); - /* - * FIXME: Context last fence?? - */ - batch->last_fence = fo; - driFenceReference(fo); - } - out: -#if 0 /* ZZZ JB: fix this */ - intel->vtbl.lost_hardware(intel); -#else - (void)intel; -#endif - return fo; -} - - -struct _DriFenceObject * -intel_be_batchbuffer_flush(struct intel_be_batchbuffer *batch) -{ - struct intel_be_context *intel = batch->intel; - unsigned int used = batch->base.ptr - batch->base.map; - boolean was_locked = batch->intel->hardware_locked(intel); - struct _DriFenceObject *fence; - - if (used == 0) { - driFenceReference(batch->last_fence); - return batch->last_fence; - } - - /* Add the MI_BATCH_BUFFER_END. Always add an MI_FLUSH - this is a - * performance drain that we would like to avoid. - */ -#if 0 /* ZZZ JB: what should we do here? */ - if (used & 4) { - ((int *) batch->base.ptr)[0] = intel->vtbl.flush_cmd(); - ((int *) batch->base.ptr)[1] = 0; - ((int *) batch->base.ptr)[2] = MI_BATCH_BUFFER_END; - used += 12; - } - else { - ((int *) batch->base.ptr)[0] = intel->vtbl.flush_cmd(); - ((int *) batch->base.ptr)[1] = MI_BATCH_BUFFER_END; - used += 8; - } -#else - if (used & 4) { - ((int *) batch->base.ptr)[0] = ((0<<29)|(4<<23)); // MI_FLUSH; - ((int *) batch->base.ptr)[1] = 0; - ((int *) batch->base.ptr)[2] = (0xA<<23); // MI_BATCH_BUFFER_END; - used += 12; - } - else { - ((int *) batch->base.ptr)[0] = ((0<<29)|(4<<23)); // MI_FLUSH; - ((int *) batch->base.ptr)[1] = (0xA<<23); // MI_BATCH_BUFFER_END; - used += 8; - } -#endif - driBOUnmap(batch->buffer); - batch->base.ptr = NULL; - batch->base.map = NULL; - - /* TODO: Just pass the relocation list and dma buffer up to the - * kernel. - */ - if (!was_locked) - intel->hardware_lock(intel); - - fence = do_flush_locked(batch, used, !(batch->flags & INTEL_BATCH_CLIPRECTS), - FALSE); - - if (!was_locked) - intel->hardware_unlock(intel); - - /* Reset the buffer: - */ - intel_be_batchbuffer_reset(batch); - return fence; -} - -void -intel_be_batchbuffer_finish(struct intel_be_batchbuffer *batch) -{ - struct _DriFenceObject *fence = intel_be_batchbuffer_flush(batch); - driFenceFinish(fence, driFenceType(fence), FALSE); - driFenceUnReference(&fence); -} - -#if 0 -void -intel_be_batchbuffer_data(struct intel_be_batchbuffer *batch, - const void *data, unsigned int bytes, unsigned int flags) -{ - assert((bytes & 3) == 0); - intel_batchbuffer_require_space(batch, bytes, flags); - memcpy(batch->base.ptr, data, bytes); - batch->base.ptr += bytes; -} -#endif diff --git a/src/gallium/winsys/drm/intel/common/intel_be_context.c b/src/gallium/winsys/drm/intel/common/intel_be_context.c deleted file mode 100644 index 1af39674f4..0000000000 --- a/src/gallium/winsys/drm/intel/common/intel_be_context.c +++ /dev/null @@ -1,107 +0,0 @@ - -/* - * Authors: Jakob Bornecrantz <jakob-at-tungstengraphics.com> - */ - -#include "ws_dri_fencemgr.h" -#include "intel_be_device.h" -#include "intel_be_context.h" -#include "intel_be_batchbuffer.h" - -static INLINE struct intel_be_context * -intel_be_context(struct i915_winsys *sws) -{ - return (struct intel_be_context *)sws; -} - -/* Simple batchbuffer interface: - */ - -static struct i915_batchbuffer* -intel_i915_batch_get(struct i915_winsys *sws) -{ - struct intel_be_context *intel = intel_be_context(sws); - return &intel->batch->base; -} - -static void intel_i915_batch_reloc(struct i915_winsys *sws, - struct pipe_buffer *buf, - unsigned access_flags, - unsigned delta) -{ - struct intel_be_context *intel = intel_be_context(sws); - - unsigned flags = DRM_BO_FLAG_MEM_TT; - unsigned mask = DRM_BO_MASK_MEM; - - if (access_flags & I915_BUFFER_ACCESS_WRITE) { - flags |= DRM_BO_FLAG_WRITE; - mask |= DRM_BO_FLAG_WRITE; - } - - if (access_flags & I915_BUFFER_ACCESS_READ) { - flags |= DRM_BO_FLAG_READ; - mask |= DRM_BO_FLAG_READ; - } - - intel_be_offset_relocation(intel->batch, - delta, - dri_bo(buf), - flags, - mask); -} - -static void intel_i915_batch_flush(struct i915_winsys *sws, - struct pipe_fence_handle **fence) -{ - struct intel_be_context *intel = intel_be_context(sws); - - union { - struct _DriFenceObject *dri; - struct pipe_fence_handle *pipe; - } fu; - - if (fence) - assert(!*fence); - - fu.dri = intel_be_batchbuffer_flush(intel->batch); - - if (!fu.dri) { - assert(0); - *fence = NULL; - return; - } - - if (fu.dri) { - if (fence) - *fence = fu.pipe; - else - driFenceUnReference(&fu.dri); - } - -} - -boolean -intel_be_init_context(struct intel_be_context *intel, struct intel_be_device *device) -{ - assert(intel); - assert(device); - - intel->device = device; - - /* TODO move framebuffer createion to the driver */ - - intel->base.batch_get = intel_i915_batch_get; - intel->base.batch_reloc = intel_i915_batch_reloc; - intel->base.batch_flush = intel_i915_batch_flush; - - intel->batch = intel_be_batchbuffer_alloc(intel); - - return true; -} - -void -intel_be_destroy_context(struct intel_be_context *intel) -{ - intel_be_batchbuffer_free(intel->batch); -} diff --git a/src/gallium/winsys/drm/intel/common/intel_be_context.h b/src/gallium/winsys/drm/intel/common/intel_be_context.h deleted file mode 100644 index d5cbc93594..0000000000 --- a/src/gallium/winsys/drm/intel/common/intel_be_context.h +++ /dev/null @@ -1,40 +0,0 @@ -/* These need to be diffrent from the intel winsys */ -#ifndef INTEL_BE_CONTEXT_H -#define INTEL_BE_CONTEXT_H - -#include "i915simple/i915_winsys.h" - -struct intel_be_context -{ - /** Interface to i915simple driver */ - struct i915_winsys base; - - struct intel_be_device *device; - struct intel_be_batchbuffer *batch; - - /* - * Hardware lock functions. - * - * Needs to be filled in by the winsys. - */ - void (*hardware_lock)(struct intel_be_context *context); - void (*hardware_unlock)(struct intel_be_context *context); - boolean (*hardware_locked)(struct intel_be_context *context); -}; - -/** - * Intialize a allocated intel_be_context struct. - * - * Remember to set the hardware_* functions. - */ -boolean -intel_be_init_context(struct intel_be_context *intel, - struct intel_be_device *device); - -/** - * Destroy a intel_be_context. - * Does not free the struct that is up to the winsys. - */ -void -intel_be_destroy_context(struct intel_be_context *intel); -#endif diff --git a/src/gallium/winsys/drm/intel/common/intel_be_device.c b/src/gallium/winsys/drm/intel/common/intel_be_device.c deleted file mode 100644 index 019ee5cbd2..0000000000 --- a/src/gallium/winsys/drm/intel/common/intel_be_device.c +++ /dev/null @@ -1,308 +0,0 @@ - - -/* - * Authors: Keith Whitwell <keithw-at-tungstengraphics-dot-com> - * Jakob Bornecrantz <jakob-at-tungstengraphics-dot-com> - */ - -#include "intel_be_device.h" -#include "ws_dri_bufmgr.h" -#include "ws_dri_bufpool.h" -#include "ws_dri_fencemgr.h" - -#include "pipe/p_winsys.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" -#include "pipe/p_inlines.h" -#include "util/u_memory.h" - -#include "i915simple/i915_screen.h" - -/* Turn a pipe winsys into an intel/pipe winsys: - */ -static INLINE struct intel_be_device * -intel_be_device( struct pipe_winsys *winsys ) -{ - return (struct intel_be_device *)winsys; -} - - -/* - * Buffer functions. - * - * Most callbacks map direcly onto dri_bufmgr operations: - */ - -static void *intel_be_buffer_map(struct pipe_winsys *winsys, - struct pipe_buffer *buf, - unsigned flags ) -{ - unsigned drm_flags = 0; - - if (flags & PIPE_BUFFER_USAGE_CPU_WRITE) - drm_flags |= DRM_BO_FLAG_WRITE; - - if (flags & PIPE_BUFFER_USAGE_CPU_READ) - drm_flags |= DRM_BO_FLAG_READ; - - return driBOMap( dri_bo(buf), drm_flags, 0 ); -} - -static void intel_be_buffer_unmap(struct pipe_winsys *winsys, - struct pipe_buffer *buf) -{ - driBOUnmap( dri_bo(buf) ); -} - -static void -intel_be_buffer_destroy(struct pipe_winsys *winsys, - struct pipe_buffer *buf) -{ - driBOUnReference( dri_bo(buf) ); - FREE(buf); -} - -static struct pipe_buffer * -intel_be_buffer_create(struct pipe_winsys *winsys, - unsigned alignment, - unsigned usage, - unsigned size ) -{ - struct intel_be_buffer *buffer = CALLOC_STRUCT( intel_be_buffer ); - struct intel_be_device *iws = intel_be_device(winsys); - unsigned flags = 0; - struct _DriBufferPool *pool; - - buffer->base.refcount = 1; - buffer->base.alignment = alignment; - buffer->base.usage = usage; - buffer->base.size = size; - - if (usage & (PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_CONSTANT)) { - flags |= DRM_BO_FLAG_MEM_LOCAL | DRM_BO_FLAG_CACHED; - pool = iws->mallocPool; - } else if (usage & PIPE_BUFFER_USAGE_CUSTOM) { - /* For vertex buffers */ - flags |= DRM_BO_FLAG_MEM_VRAM | DRM_BO_FLAG_MEM_TT; - pool = iws->vertexPool; - } else { - flags |= DRM_BO_FLAG_MEM_VRAM | DRM_BO_FLAG_MEM_TT; - pool = iws->regionPool; - } - - if (usage & PIPE_BUFFER_USAGE_GPU_READ) - flags |= DRM_BO_FLAG_READ; - - if (usage & PIPE_BUFFER_USAGE_GPU_WRITE) - flags |= DRM_BO_FLAG_WRITE; - - /* drm complains if we don't set any read/write flags. - */ - if ((flags & (DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE)) == 0) - flags |= DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE; - - buffer->pool = pool; - driGenBuffers( buffer->pool, - "pipe buffer", 1, &buffer->driBO, alignment, flags, 0 ); - - driBOData( buffer->driBO, size, NULL, buffer->pool, 0 ); - - return &buffer->base; -} - - -static struct pipe_buffer * -intel_be_user_buffer_create(struct pipe_winsys *winsys, void *ptr, unsigned bytes) -{ - struct intel_be_buffer *buffer = CALLOC_STRUCT( intel_be_buffer ); - struct intel_be_device *iws = intel_be_device(winsys); - - driGenUserBuffer( iws->regionPool, - "pipe user buffer", &buffer->driBO, ptr, bytes ); - - buffer->base.refcount = 1; - - return &buffer->base; -} - -struct pipe_buffer * -intel_be_buffer_from_handle(struct intel_be_device *device, - const char* name, unsigned handle) -{ - struct intel_be_buffer *be_buf = malloc(sizeof(*be_buf)); - struct pipe_buffer *buffer; - - if (!be_buf) - goto err; - - memset(be_buf, 0, sizeof(*be_buf)); - - driGenBuffers(device->staticPool, name, 1, &be_buf->driBO, 0, 0, 0); - driBOSetReferenced(be_buf->driBO, handle); - - if (0) /** XXX TODO check error */ - goto err_bo; - - buffer = &be_buf->base; - buffer->refcount = 1; - buffer->alignment = 0; - buffer->usage = 0; - buffer->size = driBOSize(be_buf->driBO); - - return buffer; -err_bo: - free(be_buf); -err: - return NULL; -} - - -/* - * Surface functions. - * - * Deprecated! - */ - -static struct pipe_surface * -intel_i915_surface_alloc(struct pipe_winsys *winsys) -{ - assert((size_t)"intel_i915_surface_alloc is deprecated" & 0); - return NULL; -} - -static int -intel_i915_surface_alloc_storage(struct pipe_winsys *winsys, - struct pipe_surface *surf, - unsigned width, unsigned height, - enum pipe_format format, - unsigned flags, - unsigned tex_usage) -{ - assert((size_t)"intel_i915_surface_alloc_storage is deprecated" & 0); - return -1; -} - -static void -intel_i915_surface_release(struct pipe_winsys *winsys, struct pipe_surface **s) -{ - assert((size_t)"intel_i915_surface_release is deprecated" & 0); -} - - -/* - * Fence functions - */ - -static void -intel_be_fence_reference( struct pipe_winsys *sws, - struct pipe_fence_handle **ptr, - struct pipe_fence_handle *fence ) -{ - if (*ptr) - driFenceUnReference((struct _DriFenceObject **)ptr); - - if (fence) - *ptr = (struct pipe_fence_handle *)driFenceReference((struct _DriFenceObject *)fence); -} - -static int -intel_be_fence_signalled( struct pipe_winsys *sws, - struct pipe_fence_handle *fence, - unsigned flag ) -{ - return driFenceSignaled((struct _DriFenceObject *)fence, flag); -} - -static int -intel_be_fence_finish( struct pipe_winsys *sws, - struct pipe_fence_handle *fence, - unsigned flag ) -{ - return driFenceFinish((struct _DriFenceObject *)fence, flag, 0); -} - - -/* - * Misc functions - */ - -boolean -intel_be_init_device(struct intel_be_device *dev, int fd, unsigned id) -{ - dev->fd = fd; - dev->max_batch_size = 16 * 4096; - dev->max_vertex_size = 128 * 4096; - - dev->base.buffer_create = intel_be_buffer_create; - dev->base.user_buffer_create = intel_be_user_buffer_create; - dev->base.buffer_map = intel_be_buffer_map; - dev->base.buffer_unmap = intel_be_buffer_unmap; - dev->base.buffer_destroy = intel_be_buffer_destroy; - dev->base.surface_alloc = intel_i915_surface_alloc; - dev->base.surface_alloc_storage = intel_i915_surface_alloc_storage; - dev->base.surface_release = intel_i915_surface_release; - dev->base.fence_reference = intel_be_fence_reference; - dev->base.fence_signalled = intel_be_fence_signalled; - dev->base.fence_finish = intel_be_fence_finish; - -#if 0 /* Set by the winsys */ - dev->base.flush_frontbuffer = intel_flush_frontbuffer; - dev->base.get_name = intel_get_name; -#endif - - dev->fMan = driInitFreeSlabManager(10, 10); - dev->fenceMgr = driFenceMgrTTMInit(dev->fd); - - dev->mallocPool = driMallocPoolInit(); - dev->staticPool = driDRMPoolInit(dev->fd); - /* Sizes: 64 128 256 512 1024 2048 4096 8192 16384 32768 */ - dev->regionPool = driSlabPoolInit(dev->fd, - DRM_BO_FLAG_READ | - DRM_BO_FLAG_WRITE | - DRM_BO_FLAG_MEM_TT, - DRM_BO_FLAG_READ | - DRM_BO_FLAG_WRITE | - DRM_BO_FLAG_MEM_TT, - 64, - 10, 120, 4096 * 64, 0, - dev->fMan); - - dev->vertexPool = driSlabPoolInit(dev->fd, - DRM_BO_FLAG_READ | - DRM_BO_FLAG_WRITE | - DRM_BO_FLAG_MEM_TT, - DRM_BO_FLAG_READ | - DRM_BO_FLAG_WRITE | - DRM_BO_FLAG_MEM_TT, - dev->max_vertex_size, - 1, 120, dev->max_vertex_size * 4, 0, - dev->fMan); - - dev->batchPool = driSlabPoolInit(dev->fd, - DRM_BO_FLAG_EXE | - DRM_BO_FLAG_MEM_TT, - DRM_BO_FLAG_EXE | - DRM_BO_FLAG_MEM_TT, - dev->max_batch_size, - 1, 40, dev->max_batch_size * 16, 0, - dev->fMan); - - /* Fill in this struct with callbacks that i915simple will need to - * communicate with the window system, buffer manager, etc. - */ - dev->screen = i915_create_screen(&dev->base, id); - - return true; -} - -void -intel_be_destroy_device(struct intel_be_device *dev) -{ - driPoolTakeDown(dev->mallocPool); - driPoolTakeDown(dev->staticPool); - driPoolTakeDown(dev->regionPool); - driPoolTakeDown(dev->vertexPool); - driPoolTakeDown(dev->batchPool); - - /** TODO takedown fenceMgr and fMan */ -} diff --git a/src/gallium/winsys/drm/intel/common/intel_be_device.h b/src/gallium/winsys/drm/intel/common/intel_be_device.h deleted file mode 100644 index 3f8b3f585c..0000000000 --- a/src/gallium/winsys/drm/intel/common/intel_be_device.h +++ /dev/null @@ -1,72 +0,0 @@ -#ifndef INTEL_DRM_DEVICE_H -#define INTEL_DRM_DEVICE_H - -#include "pipe/p_winsys.h" -#include "pipe/p_context.h" - -/* - * Device - */ - -struct intel_be_device -{ - struct pipe_winsys base; - - /** - * Hw level screen - */ - struct pipe_screen *screen; - - int fd; /**< Drm file discriptor */ - - size_t max_batch_size; - size_t max_vertex_size; - - struct _DriFenceMgr *fenceMgr; - - struct _DriBufferPool *batchPool; - struct _DriBufferPool *regionPool; - struct _DriBufferPool *mallocPool; - struct _DriBufferPool *vertexPool; - struct _DriBufferPool *staticPool; - struct _DriFreeSlabManager *fMan; -}; - -boolean -intel_be_init_device(struct intel_be_device *device, int fd, unsigned id); - -void -intel_be_destroy_device(struct intel_be_device *dev); - -/* - * Buffer - */ - -struct intel_be_buffer { - struct pipe_buffer base; - struct _DriBufferPool *pool; - struct _DriBufferObject *driBO; -}; - -/** - * Create a be buffer from a drm bo handle - * - * Takes a reference - */ -struct pipe_buffer * -intel_be_buffer_from_handle(struct intel_be_device *device, - const char* name, unsigned handle); - -static INLINE struct intel_be_buffer * -intel_be_buffer(struct pipe_buffer *buf) -{ - return (struct intel_be_buffer *)buf; -} - -static INLINE struct _DriBufferObject * -dri_bo(struct pipe_buffer *buf) -{ - return intel_be_buffer(buf)->driBO; -} - -#endif diff --git a/src/gallium/winsys/drm/intel/common/ws_dri_bufmgr.c b/src/gallium/winsys/drm/intel/common/ws_dri_bufmgr.c deleted file mode 100644 index 517a97b3ee..0000000000 --- a/src/gallium/winsys/drm/intel/common/ws_dri_bufmgr.c +++ /dev/null @@ -1,949 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * - **************************************************************************/ -/* - * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com> - * Keith Whitwell <keithw-at-tungstengraphics-dot-com> - */ - -#include <xf86drm.h> -#include <stdlib.h> -#include <stdio.h> -#include "pipe/p_thread.h" -#include "errno.h" -#include "ws_dri_bufmgr.h" -#include "string.h" -#include "pipe/p_debug.h" -#include "ws_dri_bufpool.h" -#include "ws_dri_fencemgr.h" - - -/* - * This lock is here to protect drmBO structs changing underneath us during a - * validate list call, since validatelist cannot take individiual locks for - * each drmBO. Validatelist takes this lock in write mode. Any access to an - * individual drmBO should take this lock in read mode, since in that case, the - * driBufferObject mutex will protect the access. Locking order is - * driBufferObject mutex - > this rw lock. - */ - -pipe_static_mutex(bmMutex); -pipe_static_condvar(bmCond); - -static int kernelReaders = 0; -static int num_buffers = 0; -static int num_user_buffers = 0; - -static drmBO *drmBOListBuf(void *iterator) -{ - drmBONode *node; - drmMMListHead *l = (drmMMListHead *) iterator; - node = DRMLISTENTRY(drmBONode, l, head); - return node->buf; -} - -static void *drmBOListIterator(drmBOList *list) -{ - void *ret = list->list.next; - - if (ret == &list->list) - return NULL; - return ret; -} - -static void *drmBOListNext(drmBOList *list, void *iterator) -{ - void *ret; - - drmMMListHead *l = (drmMMListHead *) iterator; - ret = l->next; - if (ret == &list->list) - return NULL; - return ret; -} - -static drmBONode *drmAddListItem(drmBOList *list, drmBO *item, - uint64_t arg0, - uint64_t arg1) -{ - drmBONode *node; - drmMMListHead *l; - - l = list->free.next; - if (l == &list->free) { - node = (drmBONode *) malloc(sizeof(*node)); - if (!node) { - return NULL; - } - list->numCurrent++; - } - else { - DRMLISTDEL(l); - node = DRMLISTENTRY(drmBONode, l, head); - } - node->buf = item; - node->arg0 = arg0; - node->arg1 = arg1; - DRMLISTADD(&node->head, &list->list); - list->numOnList++; - return node; -} - -static int drmAddValidateItem(drmBOList *list, drmBO *buf, uint64_t flags, - uint64_t mask, int *newItem) -{ - drmBONode *node, *cur; - drmMMListHead *l; - - *newItem = 0; - cur = NULL; - - for (l = list->list.next; l != &list->list; l = l->next) { - node = DRMLISTENTRY(drmBONode, l, head); - if (node->buf == buf) { - cur = node; - break; - } - } - if (!cur) { - cur = drmAddListItem(list, buf, flags, mask); - if (!cur) { - return -ENOMEM; - } - *newItem = 1; - cur->arg0 = flags; - cur->arg1 = mask; - } - else { - uint64_t memFlags = cur->arg0 & flags & DRM_BO_MASK_MEM; - uint64_t accFlags = (cur->arg0 | flags) & ~DRM_BO_MASK_MEM; - - if (mask & cur->arg1 & ~DRM_BO_MASK_MEM & (cur->arg0 ^ flags)) { - return -EINVAL; - } - - cur->arg1 |= mask; - cur->arg0 = (cur->arg0 & ~mask) | ((memFlags | accFlags) & mask); - - if (((cur->arg1 & DRM_BO_MASK_MEM) != 0) && - (cur->arg0 & DRM_BO_MASK_MEM) == 0) { - return -EINVAL; - } - } - return 0; -} - -static void drmBOFreeList(drmBOList *list) -{ - drmBONode *node; - drmMMListHead *l; - - l = list->list.next; - while(l != &list->list) { - DRMLISTDEL(l); - node = DRMLISTENTRY(drmBONode, l, head); - free(node); - l = list->list.next; - list->numCurrent--; - list->numOnList--; - } - - l = list->free.next; - while(l != &list->free) { - DRMLISTDEL(l); - node = DRMLISTENTRY(drmBONode, l, head); - free(node); - l = list->free.next; - list->numCurrent--; - } -} - -static int drmAdjustListNodes(drmBOList *list) -{ - drmBONode *node; - drmMMListHead *l; - int ret = 0; - - while(list->numCurrent < list->numTarget) { - node = (drmBONode *) malloc(sizeof(*node)); - if (!node) { - ret = -ENOMEM; - break; - } - list->numCurrent++; - DRMLISTADD(&node->head, &list->free); - } - - while(list->numCurrent > list->numTarget) { - l = list->free.next; - if (l == &list->free) - break; - DRMLISTDEL(l); - node = DRMLISTENTRY(drmBONode, l, head); - free(node); - list->numCurrent--; - } - return ret; -} - -static int drmBOCreateList(int numTarget, drmBOList *list) -{ - DRMINITLISTHEAD(&list->list); - DRMINITLISTHEAD(&list->free); - list->numTarget = numTarget; - list->numCurrent = 0; - list->numOnList = 0; - return drmAdjustListNodes(list); -} - -static int drmBOResetList(drmBOList *list) -{ - drmMMListHead *l; - int ret; - - ret = drmAdjustListNodes(list); - if (ret) - return ret; - - l = list->list.next; - while (l != &list->list) { - DRMLISTDEL(l); - DRMLISTADD(l, &list->free); - list->numOnList--; - l = list->list.next; - } - return drmAdjustListNodes(list); -} - -void driWriteLockKernelBO(void) -{ - pipe_mutex_lock(bmMutex); - while(kernelReaders != 0) - pipe_condvar_wait(bmCond, bmMutex); -} - -void driWriteUnlockKernelBO(void) -{ - pipe_mutex_unlock(bmMutex); -} - -void driReadLockKernelBO(void) -{ - pipe_mutex_lock(bmMutex); - kernelReaders++; - pipe_mutex_unlock(bmMutex); -} - -void driReadUnlockKernelBO(void) -{ - pipe_mutex_lock(bmMutex); - if (--kernelReaders == 0) - pipe_condvar_broadcast(bmCond); - pipe_mutex_unlock(bmMutex); -} - - - - -/* - * TODO: Introduce fence pools in the same way as - * buffer object pools. - */ - -typedef struct _DriBufferObject -{ - DriBufferPool *pool; - pipe_mutex mutex; - int refCount; - const char *name; - uint64_t flags; - unsigned hint; - unsigned alignment; - unsigned createdByReference; - void *private; - /* user-space buffer: */ - unsigned userBuffer; - void *userData; - unsigned userSize; -} DriBufferObject; - -typedef struct _DriBufferList { - drmBOList drmBuffers; /* List of kernel buffers needing validation */ - drmBOList driBuffers; /* List of user-space buffers needing validation */ -} DriBufferList; - - -void -bmError(int val, const char *file, const char *function, int line) -{ - printf("Fatal video memory manager error \"%s\".\n" - "Check kernel logs or set the LIBGL_DEBUG\n" - "environment variable to \"verbose\" for more info.\n" - "Detected in file %s, line %d, function %s.\n", - strerror(-val), file, line, function); -#ifndef NDEBUG - abort(); -#else - abort(); -#endif -} - -extern drmBO * -driBOKernel(struct _DriBufferObject *buf) -{ - drmBO *ret; - - driReadLockKernelBO(); - pipe_mutex_lock(buf->mutex); - assert(buf->private != NULL); - ret = buf->pool->kernel(buf->pool, buf->private); - if (!ret) - BM_CKFATAL(-EINVAL); - pipe_mutex_unlock(buf->mutex); - driReadUnlockKernelBO(); - - return ret; -} - -void -driBOWaitIdle(struct _DriBufferObject *buf, int lazy) -{ - - /* - * This function may block. Is it sane to keep the mutex held during - * that time?? - */ - - pipe_mutex_lock(buf->mutex); - BM_CKFATAL(buf->pool->waitIdle(buf->pool, buf->private, &buf->mutex, lazy)); - pipe_mutex_unlock(buf->mutex); -} - -void * -driBOMap(struct _DriBufferObject *buf, unsigned flags, unsigned hint) -{ - void *virtual; - int retval; - - if (buf->userBuffer) { - return buf->userData; - } - - pipe_mutex_lock(buf->mutex); - assert(buf->private != NULL); - retval = buf->pool->map(buf->pool, buf->private, flags, hint, - &buf->mutex, &virtual); - pipe_mutex_unlock(buf->mutex); - - return retval == 0 ? virtual : NULL; -} - -void -driBOUnmap(struct _DriBufferObject *buf) -{ - if (buf->userBuffer) - return; - - assert(buf->private != NULL); - pipe_mutex_lock(buf->mutex); - BM_CKFATAL(buf->pool->unmap(buf->pool, buf->private)); - pipe_mutex_unlock(buf->mutex); -} - -unsigned long -driBOOffset(struct _DriBufferObject *buf) -{ - unsigned long ret; - - assert(buf->private != NULL); - - pipe_mutex_lock(buf->mutex); - ret = buf->pool->offset(buf->pool, buf->private); - pipe_mutex_unlock(buf->mutex); - return ret; -} - -unsigned long -driBOPoolOffset(struct _DriBufferObject *buf) -{ - unsigned long ret; - - assert(buf->private != NULL); - - pipe_mutex_lock(buf->mutex); - ret = buf->pool->poolOffset(buf->pool, buf->private); - pipe_mutex_unlock(buf->mutex); - return ret; -} - -uint64_t -driBOFlags(struct _DriBufferObject *buf) -{ - uint64_t ret; - - assert(buf->private != NULL); - - driReadLockKernelBO(); - pipe_mutex_lock(buf->mutex); - ret = buf->pool->flags(buf->pool, buf->private); - pipe_mutex_unlock(buf->mutex); - driReadUnlockKernelBO(); - return ret; -} - -struct _DriBufferObject * -driBOReference(struct _DriBufferObject *buf) -{ - pipe_mutex_lock(buf->mutex); - if (++buf->refCount == 1) { - pipe_mutex_unlock(buf->mutex); - BM_CKFATAL(-EINVAL); - } - pipe_mutex_unlock(buf->mutex); - return buf; -} - -void -driBOUnReference(struct _DriBufferObject *buf) -{ - int tmp; - - if (!buf) - return; - - pipe_mutex_lock(buf->mutex); - tmp = --buf->refCount; - if (!tmp) { - pipe_mutex_unlock(buf->mutex); - if (buf->private) { - if (buf->createdByReference) - buf->pool->unreference(buf->pool, buf->private); - else - buf->pool->destroy(buf->pool, buf->private); - } - if (buf->userBuffer) - num_user_buffers--; - else - num_buffers--; - free(buf); - } else - pipe_mutex_unlock(buf->mutex); - -} - - -int -driBOData(struct _DriBufferObject *buf, - unsigned size, const void *data, - DriBufferPool *newPool, - uint64_t flags) -{ - void *virtual = NULL; - int newBuffer; - int retval = 0; - struct _DriBufferPool *pool; - - assert(!buf->userBuffer); /* XXX just do a memcpy? */ - - pipe_mutex_lock(buf->mutex); - pool = buf->pool; - - if (pool == NULL && newPool != NULL) { - buf->pool = newPool; - pool = newPool; - } - if (newPool == NULL) - newPool = pool; - - if (!pool->create) { - assert((size_t)"driBOData called on invalid buffer\n" & 0); - BM_CKFATAL(-EINVAL); - } - - newBuffer = (!buf->private || pool != newPool || - pool->size(pool, buf->private) < size); - - if (!flags) - flags = buf->flags; - - if (newBuffer) { - - if (buf->createdByReference) { - assert((size_t)"driBOData requiring resizing called on shared buffer.\n" & 0); - BM_CKFATAL(-EINVAL); - } - - if (buf->private) - buf->pool->destroy(buf->pool, buf->private); - - pool = newPool; - buf->pool = newPool; - buf->private = pool->create(pool, size, flags, DRM_BO_HINT_DONT_FENCE, - buf->alignment); - if (!buf->private) - retval = -ENOMEM; - - if (retval == 0) - retval = pool->map(pool, buf->private, - DRM_BO_FLAG_WRITE, - DRM_BO_HINT_DONT_BLOCK, &buf->mutex, &virtual); - } else if (pool->map(pool, buf->private, DRM_BO_FLAG_WRITE, - DRM_BO_HINT_DONT_BLOCK, &buf->mutex, &virtual)) { - /* - * Buffer is busy. need to create a new one. - */ - - void *newBuf; - - newBuf = pool->create(pool, size, flags, DRM_BO_HINT_DONT_FENCE, - buf->alignment); - if (newBuf) { - buf->pool->destroy(buf->pool, buf->private); - buf->private = newBuf; - } - - retval = pool->map(pool, buf->private, - DRM_BO_FLAG_WRITE, 0, &buf->mutex, &virtual); - } else { - uint64_t flag_diff = flags ^ buf->flags; - - /* - * We might need to change buffer flags. - */ - - if (flag_diff){ - assert(pool->setStatus != NULL); - BM_CKFATAL(pool->unmap(pool, buf->private)); - BM_CKFATAL(pool->setStatus(pool, buf->private, flag_diff, - buf->flags)); - if (!data) - goto out; - - retval = pool->map(pool, buf->private, - DRM_BO_FLAG_WRITE, 0, &buf->mutex, &virtual); - } - } - - if (retval == 0) { - if (data) - memcpy(virtual, data, size); - - BM_CKFATAL(pool->unmap(pool, buf->private)); - } - - out: - pipe_mutex_unlock(buf->mutex); - - return retval; -} - -void -driBOSubData(struct _DriBufferObject *buf, - unsigned long offset, unsigned long size, const void *data) -{ - void *virtual; - - assert(!buf->userBuffer); /* XXX just do a memcpy? */ - - pipe_mutex_lock(buf->mutex); - if (size && data) { - BM_CKFATAL(buf->pool->map(buf->pool, buf->private, - DRM_BO_FLAG_WRITE, 0, &buf->mutex, - &virtual)); - memcpy((unsigned char *) virtual + offset, data, size); - BM_CKFATAL(buf->pool->unmap(buf->pool, buf->private)); - } - pipe_mutex_unlock(buf->mutex); -} - -void -driBOGetSubData(struct _DriBufferObject *buf, - unsigned long offset, unsigned long size, void *data) -{ - void *virtual; - - assert(!buf->userBuffer); /* XXX just do a memcpy? */ - - pipe_mutex_lock(buf->mutex); - if (size && data) { - BM_CKFATAL(buf->pool->map(buf->pool, buf->private, - DRM_BO_FLAG_READ, 0, &buf->mutex, &virtual)); - memcpy(data, (unsigned char *) virtual + offset, size); - BM_CKFATAL(buf->pool->unmap(buf->pool, buf->private)); - } - pipe_mutex_unlock(buf->mutex); -} - -void -driBOSetReferenced(struct _DriBufferObject *buf, - unsigned long handle) -{ - pipe_mutex_lock(buf->mutex); - if (buf->private != NULL) { - assert((size_t)"Invalid buffer for setReferenced\n" & 0); - BM_CKFATAL(-EINVAL); - - } - if (buf->pool->reference == NULL) { - assert((size_t)"Invalid buffer pool for setReferenced\n" & 0); - BM_CKFATAL(-EINVAL); - } - buf->private = buf->pool->reference(buf->pool, handle); - if (!buf->private) { - assert((size_t)"Invalid buffer pool for setStatic\n" & 0); - BM_CKFATAL(-ENOMEM); - } - buf->createdByReference = TRUE; - buf->flags = buf->pool->kernel(buf->pool, buf->private)->flags; - pipe_mutex_unlock(buf->mutex); -} - -int -driGenBuffers(struct _DriBufferPool *pool, - const char *name, - unsigned n, - struct _DriBufferObject *buffers[], - unsigned alignment, uint64_t flags, unsigned hint) -{ - struct _DriBufferObject *buf; - int i; - - flags = (flags) ? flags : DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_MEM_VRAM | - DRM_BO_FLAG_MEM_LOCAL | DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE; - - ++num_buffers; - - assert(pool); - - for (i = 0; i < n; ++i) { - buf = (struct _DriBufferObject *) calloc(1, sizeof(*buf)); - if (!buf) - return -ENOMEM; - - pipe_mutex_init(buf->mutex); - pipe_mutex_lock(buf->mutex); - buf->refCount = 1; - buf->flags = flags; - buf->hint = hint; - buf->name = name; - buf->alignment = alignment; - buf->pool = pool; - buf->createdByReference = 0; - pipe_mutex_unlock(buf->mutex); - buffers[i] = buf; - } - return 0; -} - -void -driGenUserBuffer(struct _DriBufferPool *pool, - const char *name, - struct _DriBufferObject **buffers, - void *ptr, unsigned bytes) -{ - const unsigned alignment = 1, flags = 0, hint = 0; - - --num_buffers; /* JB: is inced in GenBuffes */ - driGenBuffers(pool, name, 1, buffers, alignment, flags, hint); - ++num_user_buffers; - - (*buffers)->userBuffer = 1; - (*buffers)->userData = ptr; - (*buffers)->userSize = bytes; -} - -void -driDeleteBuffers(unsigned n, struct _DriBufferObject *buffers[]) -{ - int i; - - for (i = 0; i < n; ++i) { - driBOUnReference(buffers[i]); - } -} - - -void -driInitBufMgr(int fd) -{ - ; -} - -/* - * Note that lists are per-context and don't need mutex protection. - */ - -struct _DriBufferList * -driBOCreateList(int target) -{ - struct _DriBufferList *list = calloc(sizeof(*list), 1); - - BM_CKFATAL(drmBOCreateList(target, &list->drmBuffers)); - BM_CKFATAL(drmBOCreateList(target, &list->driBuffers)); - return list; -} - -int -driBOResetList(struct _DriBufferList * list) -{ - int ret; - ret = drmBOResetList(&list->drmBuffers); - if (ret) - return ret; - ret = drmBOResetList(&list->driBuffers); - return ret; -} - -void -driBOFreeList(struct _DriBufferList * list) -{ - drmBOFreeList(&list->drmBuffers); - drmBOFreeList(&list->driBuffers); - free(list); -} - - -/* - * Copied from libdrm, because it is needed by driAddValidateItem. - */ - -static drmBONode * -driAddListItem(drmBOList * list, drmBO * item, - uint64_t arg0, uint64_t arg1) -{ - drmBONode *node; - drmMMListHead *l; - - l = list->free.next; - if (l == &list->free) { - node = (drmBONode *) malloc(sizeof(*node)); - if (!node) { - return NULL; - } - list->numCurrent++; - } else { - DRMLISTDEL(l); - node = DRMLISTENTRY(drmBONode, l, head); - } - memset(&node->bo_arg, 0, sizeof(node->bo_arg)); - node->buf = item; - node->arg0 = arg0; - node->arg1 = arg1; - DRMLISTADDTAIL(&node->head, &list->list); - list->numOnList++; - return node; -} - -/* - * Slightly modified version compared to the libdrm version. - * This one returns the list index of the buffer put on the list. - */ - -static int -driAddValidateItem(drmBOList * list, drmBO * buf, uint64_t flags, - uint64_t mask, int *itemLoc, - struct _drmBONode **pnode) -{ - drmBONode *node, *cur; - drmMMListHead *l; - int count = 0; - - cur = NULL; - - for (l = list->list.next; l != &list->list; l = l->next) { - node = DRMLISTENTRY(drmBONode, l, head); - if (node->buf == buf) { - cur = node; - break; - } - count++; - } - if (!cur) { - cur = driAddListItem(list, buf, flags, mask); - if (!cur) - return -ENOMEM; - - cur->arg0 = flags; - cur->arg1 = mask; - } else { - uint64_t memFlags = cur->arg0 & flags & DRM_BO_MASK_MEM; - uint64_t accFlags = (cur->arg0 | flags) & ~DRM_BO_MASK_MEM; - - if (mask & cur->arg1 & ~DRM_BO_MASK_MEM & (cur->arg0 ^ flags)) { - return -EINVAL; - } - - cur->arg1 |= mask; - cur->arg0 = (cur->arg0 & ~mask) | ((memFlags | accFlags) & mask); - - if (((cur->arg1 & DRM_BO_MASK_MEM) != 0) && - (cur->arg0 & DRM_BO_MASK_MEM) == 0) { - return -EINVAL; - } - } - *itemLoc = count; - *pnode = cur; - return 0; -} - - -void -driBOAddListItem(struct _DriBufferList * list, struct _DriBufferObject *buf, - uint64_t flags, uint64_t mask, int *itemLoc, - struct _drmBONode **node) -{ - int newItem; - - pipe_mutex_lock(buf->mutex); - BM_CKFATAL(driAddValidateItem(&list->drmBuffers, - buf->pool->kernel(buf->pool, buf->private), - flags, mask, itemLoc, node)); - BM_CKFATAL(drmAddValidateItem(&list->driBuffers, (drmBO *) buf, - flags, mask, &newItem)); - if (newItem) - buf->refCount++; - - pipe_mutex_unlock(buf->mutex); -} - -drmBOList *driGetdrmBOList(struct _DriBufferList *list) -{ - driWriteLockKernelBO(); - return &list->drmBuffers; -} - -void driPutdrmBOList(struct _DriBufferList *list) -{ - driWriteUnlockKernelBO(); -} - - -void -driBOFence(struct _DriBufferObject *buf, struct _DriFenceObject *fence) -{ - pipe_mutex_lock(buf->mutex); - if (buf->pool->fence) - BM_CKFATAL(buf->pool->fence(buf->pool, buf->private, fence)); - pipe_mutex_unlock(buf->mutex); - -} - -void -driBOUnrefUserList(struct _DriBufferList *list) -{ - struct _DriBufferObject *buf; - void *curBuf; - - curBuf = drmBOListIterator(&list->driBuffers); - while (curBuf) { - buf = (struct _DriBufferObject *)drmBOListBuf(curBuf); - driBOUnReference(buf); - curBuf = drmBOListNext(&list->driBuffers, curBuf); - } -} - -struct _DriFenceObject * -driBOFenceUserList(struct _DriFenceMgr *mgr, - struct _DriBufferList *list, const char *name, - drmFence *kFence) -{ - struct _DriFenceObject *fence; - struct _DriBufferObject *buf; - void *curBuf; - - fence = driFenceCreate(mgr, kFence->fence_class, kFence->type, - kFence, sizeof(*kFence)); - curBuf = drmBOListIterator(&list->driBuffers); - - /* - * User-space fencing callbacks. - */ - - while (curBuf) { - buf = (struct _DriBufferObject *) drmBOListBuf(curBuf); - driBOFence(buf, fence); - driBOUnReference(buf); - curBuf = drmBOListNext(&list->driBuffers, curBuf); - } - - driBOResetList(list); - return fence; -} - -void -driBOValidateUserList(struct _DriBufferList * list) -{ - void *curBuf; - struct _DriBufferObject *buf; - - curBuf = drmBOListIterator(&list->driBuffers); - - /* - * User-space validation callbacks. - */ - - while (curBuf) { - buf = (struct _DriBufferObject *) drmBOListBuf(curBuf); - pipe_mutex_lock(buf->mutex); - if (buf->pool->validate) - BM_CKFATAL(buf->pool->validate(buf->pool, buf->private, &buf->mutex)); - pipe_mutex_unlock(buf->mutex); - curBuf = drmBOListNext(&list->driBuffers, curBuf); - } -} - - -void -driPoolTakeDown(struct _DriBufferPool *pool) -{ - pool->takeDown(pool); - -} - -unsigned long -driBOSize(struct _DriBufferObject *buf) -{ - unsigned long size; - - pipe_mutex_lock(buf->mutex); - size = buf->pool->size(buf->pool, buf->private); - pipe_mutex_unlock(buf->mutex); - - return size; - -} - -drmBOList *driBOGetDRMBuffers(struct _DriBufferList *list) -{ - return &list->drmBuffers; -} - -drmBOList *driBOGetDRIBuffers(struct _DriBufferList *list) -{ - return &list->driBuffers; -} - diff --git a/src/gallium/winsys/drm/intel/common/ws_dri_bufmgr.h b/src/gallium/winsys/drm/intel/common/ws_dri_bufmgr.h deleted file mode 100644 index e6c0cff0a0..0000000000 --- a/src/gallium/winsys/drm/intel/common/ws_dri_bufmgr.h +++ /dev/null @@ -1,138 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * - **************************************************************************/ -/* - * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com> - * Keith Whitwell <keithw-at-tungstengraphics-dot-com> - */ - -#ifndef _PSB_BUFMGR_H_ -#define _PSB_BUFMGR_H_ -#include <xf86mm.h> -#include "i915_drm.h" -#include "ws_dri_fencemgr.h" - -typedef struct _drmBONode -{ - drmMMListHead head; - drmBO *buf; - struct drm_i915_op_arg bo_arg; - uint64_t arg0; - uint64_t arg1; -} drmBONode; - -typedef struct _drmBOList { - unsigned numTarget; - unsigned numCurrent; - unsigned numOnList; - drmMMListHead list; - drmMMListHead free; -} drmBOList; - - -struct _DriFenceObject; -struct _DriBufferObject; -struct _DriBufferPool; -struct _DriBufferList; - -/* - * Return a pointer to the libdrm buffer object this DriBufferObject - * uses. - */ - -extern drmBO *driBOKernel(struct _DriBufferObject *buf); -extern void *driBOMap(struct _DriBufferObject *buf, unsigned flags, - unsigned hint); -extern void driBOUnmap(struct _DriBufferObject *buf); -extern unsigned long driBOOffset(struct _DriBufferObject *buf); -extern unsigned long driBOPoolOffset(struct _DriBufferObject *buf); - -extern uint64_t driBOFlags(struct _DriBufferObject *buf); -extern struct _DriBufferObject *driBOReference(struct _DriBufferObject *buf); -extern void driBOUnReference(struct _DriBufferObject *buf); - -extern int driBOData(struct _DriBufferObject *r_buf, - unsigned size, const void *data, - struct _DriBufferPool *pool, uint64_t flags); - -extern void driBOSubData(struct _DriBufferObject *buf, - unsigned long offset, unsigned long size, - const void *data); -extern void driBOGetSubData(struct _DriBufferObject *buf, - unsigned long offset, unsigned long size, - void *data); -extern int driGenBuffers(struct _DriBufferPool *pool, - const char *name, - unsigned n, - struct _DriBufferObject *buffers[], - unsigned alignment, uint64_t flags, unsigned hint); -extern void driGenUserBuffer(struct _DriBufferPool *pool, - const char *name, - struct _DriBufferObject *buffers[], - void *ptr, unsigned bytes); -extern void driDeleteBuffers(unsigned n, struct _DriBufferObject *buffers[]); -extern void driInitBufMgr(int fd); -extern struct _DriBufferList *driBOCreateList(int target); -extern int driBOResetList(struct _DriBufferList * list); -extern void driBOAddListItem(struct _DriBufferList * list, - struct _DriBufferObject *buf, - uint64_t flags, uint64_t mask, int *itemLoc, - struct _drmBONode **node); - -extern void driBOValidateList(int fd, struct _DriBufferList * list); -extern void driBOFreeList(struct _DriBufferList * list); -extern struct _DriFenceObject *driBOFenceUserList(struct _DriFenceMgr *mgr, - struct _DriBufferList *list, - const char *name, - drmFence *kFence); -extern void driBOUnrefUserList(struct _DriBufferList *list); -extern void driBOValidateUserList(struct _DriBufferList * list); -extern drmBOList *driGetdrmBOList(struct _DriBufferList *list); -extern void driPutdrmBOList(struct _DriBufferList *list); - -extern void driBOFence(struct _DriBufferObject *buf, - struct _DriFenceObject *fence); - -extern void driPoolTakeDown(struct _DriBufferPool *pool); -extern void driBOSetReferenced(struct _DriBufferObject *buf, - unsigned long handle); -unsigned long driBOSize(struct _DriBufferObject *buf); -extern void driBOWaitIdle(struct _DriBufferObject *buf, int lazy); -extern void driPoolTakeDown(struct _DriBufferPool *pool); - -extern void driReadLockKernelBO(void); -extern void driReadUnlockKernelBO(void); -extern void driWriteLockKernelBO(void); -extern void driWriteUnlockKernelBO(void); - -/* - * For debugging purposes. - */ - -extern drmBOList *driBOGetDRMBuffers(struct _DriBufferList *list); -extern drmBOList *driBOGetDRIBuffers(struct _DriBufferList *list); -#endif diff --git a/src/gallium/winsys/drm/intel/common/ws_dri_bufpool.h b/src/gallium/winsys/drm/intel/common/ws_dri_bufpool.h deleted file mode 100644 index ad3b6f3931..0000000000 --- a/src/gallium/winsys/drm/intel/common/ws_dri_bufpool.h +++ /dev/null @@ -1,102 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * - **************************************************************************/ -/* - * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com> - */ - -#ifndef _PSB_BUFPOOL_H_ -#define _PSB_BUFPOOL_H_ - -#include <xf86drm.h> -#include "pipe/p_thread.h" -struct _DriFenceObject; - -typedef struct _DriBufferPool -{ - int fd; - int (*map) (struct _DriBufferPool * pool, void *private, - unsigned flags, int hint, pipe_mutex *mutex, - void **virtual); - int (*unmap) (struct _DriBufferPool * pool, void *private); - int (*destroy) (struct _DriBufferPool * pool, void *private); - unsigned long (*offset) (struct _DriBufferPool * pool, void *private); - unsigned long (*poolOffset) (struct _DriBufferPool * pool, void *private); - uint64_t (*flags) (struct _DriBufferPool * pool, void *private); - unsigned long (*size) (struct _DriBufferPool * pool, void *private); - void *(*create) (struct _DriBufferPool * pool, unsigned long size, - uint64_t flags, unsigned hint, unsigned alignment); - void *(*reference) (struct _DriBufferPool * pool, unsigned handle); - int (*unreference) (struct _DriBufferPool * pool, void *private); - int (*fence) (struct _DriBufferPool * pool, void *private, - struct _DriFenceObject * fence); - drmBO *(*kernel) (struct _DriBufferPool * pool, void *private); - int (*validate) (struct _DriBufferPool * pool, void *private, pipe_mutex *mutex); - int (*waitIdle) (struct _DriBufferPool *pool, void *private, pipe_mutex *mutex, - int lazy); - int (*setStatus) (struct _DriBufferPool *pool, void *private, - uint64_t flag_diff, uint64_t old_flags); - void (*takeDown) (struct _DriBufferPool * pool); - void *data; -} DriBufferPool; - -extern void bmError(int val, const char *file, const char *function, - int line); -#define BM_CKFATAL(val) \ - do{ \ - int tstVal = (val); \ - if (tstVal) \ - bmError(tstVal, __FILE__, __FUNCTION__, __LINE__); \ - } while(0); - - -/* - * Builtin pools. - */ - -/* - * Kernel buffer objects. Size in multiples of page size. Page size aligned. - */ - -extern struct _DriBufferPool *driDRMPoolInit(int fd); -extern struct _DriBufferPool *driMallocPoolInit(void); - -struct _DriFreeSlabManager; -extern struct _DriBufferPool * driSlabPoolInit(int fd, uint64_t flags, - uint64_t validMask, - uint32_t smallestSize, - uint32_t numSizes, - uint32_t desiredNumBuffers, - uint32_t maxSlabSize, - uint32_t pageAlignment, - struct _DriFreeSlabManager *fMan); -extern void driFinishFreeSlabManager(struct _DriFreeSlabManager *fMan); -extern struct _DriFreeSlabManager * -driInitFreeSlabManager(uint32_t checkIntervalMsec, uint32_t slabTimeoutMsec); - - -#endif diff --git a/src/gallium/winsys/drm/intel/common/ws_dri_drmpool.c b/src/gallium/winsys/drm/intel/common/ws_dri_drmpool.c deleted file mode 100644 index 54618b1c82..0000000000 --- a/src/gallium/winsys/drm/intel/common/ws_dri_drmpool.c +++ /dev/null @@ -1,268 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * - **************************************************************************/ -/* - * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com> - */ - -#include <xf86drm.h> -#include <stdlib.h> -#include <unistd.h> -#include "ws_dri_bufpool.h" -#include "ws_dri_bufmgr.h" -#include "assert.h" - -/* - * Buffer pool implementation using DRM buffer objects as DRI buffer objects. - */ - -static void * -pool_create(struct _DriBufferPool *pool, - unsigned long size, uint64_t flags, unsigned hint, - unsigned alignment) -{ - drmBO *buf = (drmBO *) malloc(sizeof(*buf)); - int ret; - unsigned pageSize = getpagesize(); - - if (!buf) - return NULL; - - if ((alignment > pageSize) && (alignment % pageSize)) { - free(buf); - return NULL; - } - - ret = drmBOCreate(pool->fd, size, alignment / pageSize, - NULL, - flags, hint, buf); - if (ret) { - free(buf); - return NULL; - } - - return (void *) buf; -} - -static void * -pool_reference(struct _DriBufferPool *pool, unsigned handle) -{ - drmBO *buf = (drmBO *) malloc(sizeof(*buf)); - int ret; - - if (!buf) - return NULL; - - ret = drmBOReference(pool->fd, handle, buf); - - if (ret) { - free(buf); - return NULL; - } - - return (void *) buf; -} - -static int -pool_destroy(struct _DriBufferPool *pool, void *private) -{ - int ret; - drmBO *buf = (drmBO *) private; - driReadLockKernelBO(); - ret = drmBOUnreference(pool->fd, buf); - free(buf); - driReadUnlockKernelBO(); - return ret; -} - -static int -pool_unreference(struct _DriBufferPool *pool, void *private) -{ - int ret; - drmBO *buf = (drmBO *) private; - driReadLockKernelBO(); - ret = drmBOUnreference(pool->fd, buf); - free(buf); - driReadUnlockKernelBO(); - return ret; -} - -static int -pool_map(struct _DriBufferPool *pool, void *private, unsigned flags, - int hint, pipe_mutex *mutex, void **virtual) -{ - drmBO *buf = (drmBO *) private; - int ret; - - driReadLockKernelBO(); - ret = drmBOMap(pool->fd, buf, flags, hint, virtual); - driReadUnlockKernelBO(); - return ret; -} - -static int -pool_unmap(struct _DriBufferPool *pool, void *private) -{ - drmBO *buf = (drmBO *) private; - int ret; - - driReadLockKernelBO(); - ret = drmBOUnmap(pool->fd, buf); - driReadUnlockKernelBO(); - - return ret; -} - -static unsigned long -pool_offset(struct _DriBufferPool *pool, void *private) -{ - drmBO *buf = (drmBO *) private; - unsigned long offset; - - driReadLockKernelBO(); - assert(buf->flags & DRM_BO_FLAG_NO_MOVE); - offset = buf->offset; - driReadUnlockKernelBO(); - - return buf->offset; -} - -static unsigned long -pool_poolOffset(struct _DriBufferPool *pool, void *private) -{ - return 0; -} - -static uint64_t -pool_flags(struct _DriBufferPool *pool, void *private) -{ - drmBO *buf = (drmBO *) private; - uint64_t flags; - - driReadLockKernelBO(); - flags = buf->flags; - driReadUnlockKernelBO(); - - return flags; -} - - -static unsigned long -pool_size(struct _DriBufferPool *pool, void *private) -{ - drmBO *buf = (drmBO *) private; - unsigned long size; - - driReadLockKernelBO(); - size = buf->size; - driReadUnlockKernelBO(); - - return buf->size; -} - -static int -pool_fence(struct _DriBufferPool *pool, void *private, - struct _DriFenceObject *fence) -{ - /* - * Noop. The kernel handles all fencing. - */ - - return 0; -} - -static drmBO * -pool_kernel(struct _DriBufferPool *pool, void *private) -{ - return (drmBO *) private; -} - -static int -pool_waitIdle(struct _DriBufferPool *pool, void *private, pipe_mutex *mutex, - int lazy) -{ - drmBO *buf = (drmBO *) private; - int ret; - - driReadLockKernelBO(); - ret = drmBOWaitIdle(pool->fd, buf, (lazy) ? DRM_BO_HINT_WAIT_LAZY:0); - driReadUnlockKernelBO(); - - return ret; -} - - -static void -pool_takedown(struct _DriBufferPool *pool) -{ - free(pool); -} - -/*static int -pool_setStatus(struct _DriBufferPool *pool, void *private, - uint64_t flag_diff, uint64_t old_flags) -{ - drmBO *buf = (drmBO *) private; - uint64_t new_flags = old_flags ^ flag_diff; - int ret; - - driReadLockKernelBO(); - ret = drmBOSetStatus(pool->fd, buf, new_flags, flag_diff, - 0, 0, 0); - driReadUnlockKernelBO(); - return ret; -}*/ - -struct _DriBufferPool * -driDRMPoolInit(int fd) -{ - struct _DriBufferPool *pool; - - pool = (struct _DriBufferPool *) malloc(sizeof(*pool)); - - if (!pool) - return NULL; - - pool->fd = fd; - pool->map = &pool_map; - pool->unmap = &pool_unmap; - pool->destroy = &pool_destroy; - pool->offset = &pool_offset; - pool->poolOffset = &pool_poolOffset; - pool->flags = &pool_flags; - pool->size = &pool_size; - pool->create = &pool_create; - pool->fence = &pool_fence; - pool->kernel = &pool_kernel; - pool->validate = NULL; - pool->waitIdle = &pool_waitIdle; - pool->takeDown = &pool_takedown; - pool->reference = &pool_reference; - pool->unreference = &pool_unreference; - pool->data = NULL; - return pool; -} diff --git a/src/gallium/winsys/drm/intel/common/ws_dri_fencemgr.c b/src/gallium/winsys/drm/intel/common/ws_dri_fencemgr.c deleted file mode 100644 index 831c75d30c..0000000000 --- a/src/gallium/winsys/drm/intel/common/ws_dri_fencemgr.c +++ /dev/null @@ -1,377 +0,0 @@ -#include "ws_dri_fencemgr.h" -#include "pipe/p_thread.h" -#include <xf86mm.h> -#include <string.h> -#include <unistd.h> - -/* - * Note: Locking order is - * _DriFenceObject::mutex - * _DriFenceMgr::mutex - */ - -struct _DriFenceMgr { - /* - * Constant members. Need no mutex protection. - */ - struct _DriFenceMgrCreateInfo info; - void *private; - - /* - * These members are protected by this->mutex - */ - pipe_mutex mutex; - int refCount; - drmMMListHead *heads; - int num_fences; -}; - -struct _DriFenceObject { - - /* - * These members are constant and need no mutex protection. - */ - struct _DriFenceMgr *mgr; - uint32_t fence_class; - uint32_t fence_type; - - /* - * These members are protected by mgr->mutex. - */ - drmMMListHead head; - int refCount; - - /* - * These members are protected by this->mutex. - */ - pipe_mutex mutex; - uint32_t signaled_type; - void *private; -}; - -uint32_t -driFenceType(struct _DriFenceObject *fence) -{ - return fence->fence_type; -} - -struct _DriFenceMgr * -driFenceMgrCreate(const struct _DriFenceMgrCreateInfo *info) -{ - struct _DriFenceMgr *tmp; - uint32_t i; - - tmp = calloc(1, sizeof(*tmp)); - if (!tmp) - return NULL; - - pipe_mutex_init(tmp->mutex); - pipe_mutex_lock(tmp->mutex); - tmp->refCount = 1; - tmp->info = *info; - tmp->num_fences = 0; - tmp->heads = calloc(tmp->info.num_classes, sizeof(*tmp->heads)); - if (!tmp->heads) - goto out_err; - - for (i=0; i<tmp->info.num_classes; ++i) { - DRMINITLISTHEAD(&tmp->heads[i]); - } - pipe_mutex_unlock(tmp->mutex); - return tmp; - - out_err: - if (tmp) - free(tmp); - return NULL; -} - -static void -driFenceMgrUnrefUnlock(struct _DriFenceMgr **pMgr) -{ - struct _DriFenceMgr *mgr = *pMgr; - - *pMgr = NULL; - if (--mgr->refCount == 0) - free(mgr); - else - pipe_mutex_unlock(mgr->mutex); -} - -void -driFenceMgrUnReference(struct _DriFenceMgr **pMgr) -{ - pipe_mutex_lock((*pMgr)->mutex); - driFenceMgrUnrefUnlock(pMgr); -} - -static void -driFenceUnReferenceLocked(struct _DriFenceObject **pFence) -{ - struct _DriFenceObject *fence = *pFence; - struct _DriFenceMgr *mgr = fence->mgr; - - *pFence = NULL; - if (--fence->refCount == 0) { - DRMLISTDELINIT(&fence->head); - if (fence->private) - mgr->info.unreference(mgr, &fence->private); - --mgr->num_fences; - fence->mgr = NULL; - --mgr->refCount; - free(fence); - - } -} - - -static void -driSignalPreviousFencesLocked(struct _DriFenceMgr *mgr, - drmMMListHead *list, - uint32_t fence_class, - uint32_t fence_type) -{ - struct _DriFenceObject *entry; - drmMMListHead *prev; - - while(list != &mgr->heads[fence_class]) { - entry = DRMLISTENTRY(struct _DriFenceObject, list, head); - - /* - * Up refcount so that entry doesn't disappear from under us - * when we unlock-relock mgr to get the correct locking order. - */ - - ++entry->refCount; - pipe_mutex_unlock(mgr->mutex); - pipe_mutex_lock(entry->mutex); - pipe_mutex_lock(mgr->mutex); - - prev = list->prev; - - - - if (list->prev == list) { - - /* - * Somebody else removed the entry from the list. - */ - - pipe_mutex_unlock(entry->mutex); - driFenceUnReferenceLocked(&entry); - return; - } - - entry->signaled_type |= (fence_type & entry->fence_type); - if (entry->signaled_type == entry->fence_type) { - DRMLISTDELINIT(list); - mgr->info.unreference(mgr, &entry->private); - } - pipe_mutex_unlock(entry->mutex); - driFenceUnReferenceLocked(&entry); - list = prev; - } -} - - -int -driFenceFinish(struct _DriFenceObject *fence, uint32_t fence_type, - int lazy_hint) -{ - struct _DriFenceMgr *mgr = fence->mgr; - int ret = 0; - - pipe_mutex_lock(fence->mutex); - - if ((fence->signaled_type & fence_type) == fence_type) - goto out0; - - ret = mgr->info.finish(mgr, fence->private, fence_type, lazy_hint); - if (ret) - goto out0; - - pipe_mutex_lock(mgr->mutex); - pipe_mutex_unlock(fence->mutex); - - driSignalPreviousFencesLocked(mgr, &fence->head, fence->fence_class, - fence_type); - pipe_mutex_unlock(mgr->mutex); - return 0; - - out0: - pipe_mutex_unlock(fence->mutex); - return ret; -} - -uint32_t driFenceSignaledTypeCached(struct _DriFenceObject *fence) -{ - uint32_t ret; - - pipe_mutex_lock(fence->mutex); - ret = fence->signaled_type; - pipe_mutex_unlock(fence->mutex); - - return ret; -} - -int -driFenceSignaledType(struct _DriFenceObject *fence, uint32_t flush_type, - uint32_t *signaled) -{ - int ret = 0; - struct _DriFenceMgr *mgr; - - pipe_mutex_lock(fence->mutex); - mgr = fence->mgr; - *signaled = fence->signaled_type; - if ((fence->signaled_type & flush_type) == flush_type) - goto out0; - - ret = mgr->info.signaled(mgr, fence->private, flush_type, signaled); - if (ret) { - *signaled = fence->signaled_type; - goto out0; - } - - if ((fence->signaled_type | *signaled) == fence->signaled_type) - goto out0; - - pipe_mutex_lock(mgr->mutex); - pipe_mutex_unlock(fence->mutex); - - driSignalPreviousFencesLocked(mgr, &fence->head, fence->fence_class, - *signaled); - - pipe_mutex_unlock(mgr->mutex); - return 0; - out0: - pipe_mutex_unlock(fence->mutex); - return ret; -} - -struct _DriFenceObject * -driFenceReference(struct _DriFenceObject *fence) -{ - pipe_mutex_lock(fence->mgr->mutex); - ++fence->refCount; - pipe_mutex_unlock(fence->mgr->mutex); - return fence; -} - -void -driFenceUnReference(struct _DriFenceObject **pFence) -{ - struct _DriFenceMgr *mgr; - - if (*pFence == NULL) - return; - - mgr = (*pFence)->mgr; - pipe_mutex_lock(mgr->mutex); - ++mgr->refCount; - driFenceUnReferenceLocked(pFence); - driFenceMgrUnrefUnlock(&mgr); -} - -struct _DriFenceObject -*driFenceCreate(struct _DriFenceMgr *mgr, uint32_t fence_class, - uint32_t fence_type, void *private, size_t private_size) -{ - struct _DriFenceObject *fence; - size_t fence_size = sizeof(*fence); - - if (private_size) - fence_size = ((fence_size + 15) & ~15); - - fence = calloc(1, fence_size + private_size); - - if (!fence) { - int ret = mgr->info.finish(mgr, private, fence_type, 0); - - if (ret) - usleep(10000000); - - return NULL; - } - - pipe_mutex_init(fence->mutex); - pipe_mutex_lock(fence->mutex); - pipe_mutex_lock(mgr->mutex); - fence->refCount = 1; - DRMLISTADDTAIL(&fence->head, &mgr->heads[fence_class]); - fence->mgr = mgr; - ++mgr->refCount; - ++mgr->num_fences; - pipe_mutex_unlock(mgr->mutex); - fence->fence_class = fence_class; - fence->fence_type = fence_type; - fence->signaled_type = 0; - fence->private = private; - if (private_size) { - fence->private = (void *)(((uint8_t *) fence) + fence_size); - memcpy(fence->private, private, private_size); - } - - pipe_mutex_unlock(fence->mutex); - return fence; -} - - -static int -tSignaled(struct _DriFenceMgr *mgr, void *private, uint32_t flush_type, - uint32_t *signaled_type) -{ - long fd = (long) mgr->private; - int dummy; - drmFence *fence = (drmFence *) private; - int ret; - - *signaled_type = 0; - ret = drmFenceSignaled((int) fd, fence, flush_type, &dummy); - if (ret) - return ret; - - *signaled_type = fence->signaled; - - return 0; -} - -static int -tFinish(struct _DriFenceMgr *mgr, void *private, uint32_t fence_type, - int lazy_hint) -{ - long fd = (long) mgr->private; - unsigned flags = lazy_hint ? DRM_FENCE_FLAG_WAIT_LAZY : 0; - - return drmFenceWait((int)fd, flags, (drmFence *) private, fence_type); -} - -static int -tUnref(struct _DriFenceMgr *mgr, void **private) -{ - long fd = (long) mgr->private; - drmFence *fence = (drmFence *) *private; - *private = NULL; - - return drmFenceUnreference(fd, fence); -} - -struct _DriFenceMgr *driFenceMgrTTMInit(int fd) -{ - struct _DriFenceMgrCreateInfo info; - struct _DriFenceMgr *mgr; - - info.flags = DRI_FENCE_CLASS_ORDERED; - info.num_classes = 4; - info.signaled = tSignaled; - info.finish = tFinish; - info.unreference = tUnref; - - mgr = driFenceMgrCreate(&info); - if (mgr == NULL) - return NULL; - - mgr->private = (void *) (long) fd; - return mgr; -} - diff --git a/src/gallium/winsys/drm/intel/common/ws_dri_fencemgr.h b/src/gallium/winsys/drm/intel/common/ws_dri_fencemgr.h deleted file mode 100644 index 4ea58dfe18..0000000000 --- a/src/gallium/winsys/drm/intel/common/ws_dri_fencemgr.h +++ /dev/null @@ -1,115 +0,0 @@ -#ifndef DRI_FENCEMGR_H -#define DRI_FENCEMGR_H - -#include <stdint.h> -#include <stdlib.h> - -struct _DriFenceObject; -struct _DriFenceMgr; - -/* - * Do a quick check to see if the fence manager has registered the fence - * object as signaled. Note that this function may return a false negative - * answer. - */ -extern uint32_t driFenceSignaledTypeCached(struct _DriFenceObject *fence); - -/* - * Check if the fence object is signaled. This function can be substantially - * more expensive to call than the above function, but will not return a false - * negative answer. The argument "flush_type" sets the types that the - * underlying mechanism must make sure will eventually signal. - */ -extern int driFenceSignaledType(struct _DriFenceObject *fence, - uint32_t flush_type, uint32_t *signaled); - -/* - * Convenience functions. - */ - -static inline int driFenceSignaled(struct _DriFenceObject *fence, - uint32_t flush_type) -{ - uint32_t signaled_types; - int ret = driFenceSignaledType(fence, flush_type, &signaled_types); - if (ret) - return 0; - return ((signaled_types & flush_type) == flush_type); -} - -static inline int driFenceSignaledCached(struct _DriFenceObject *fence, - uint32_t flush_type) -{ - uint32_t signaled_types = - driFenceSignaledTypeCached(fence); - - return ((signaled_types & flush_type) == flush_type); -} - -/* - * Reference a fence object. - */ -extern struct _DriFenceObject *driFenceReference(struct _DriFenceObject *fence); - -/* - * Unreference a fence object. The fence object pointer will be reset to NULL. - */ - -extern void driFenceUnReference(struct _DriFenceObject **pFence); - - -/* - * Wait for a fence to signal the indicated fence_type. - * If "lazy_hint" is true, it indicates that the wait may sleep to avoid - * busy-wait polling. - */ -extern int driFenceFinish(struct _DriFenceObject *fence, uint32_t fence_type, - int lazy_hint); - -/* - * Create a DriFenceObject for manager "mgr". - * - * "private" is a pointer that should be used for the callbacks in - * struct _DriFenceMgrCreateInfo. - * - * if private_size is nonzero, then the info stored at *private, with size - * private size will be copied and the fence manager will instead use a - * pointer to the copied data for the callbacks in - * struct _DriFenceMgrCreateInfo. In that case, the object pointed to by - * "private" may be destroyed after the call to driFenceCreate. - */ -extern struct _DriFenceObject *driFenceCreate(struct _DriFenceMgr *mgr, - uint32_t fence_class, - uint32_t fence_type, - void *private, - size_t private_size); - -extern uint32_t driFenceType(struct _DriFenceObject *fence); - -/* - * Fence creations are ordered. If a fence signals a fence_type, - * it is safe to assume that all fences of the same class that was - * created before that fence has signaled the same type. - */ - -#define DRI_FENCE_CLASS_ORDERED (1 << 0) - -struct _DriFenceMgrCreateInfo { - uint32_t flags; - uint32_t num_classes; - int (*signaled) (struct _DriFenceMgr *mgr, void *private, uint32_t flush_type, - uint32_t *signaled_type); - int (*finish) (struct _DriFenceMgr *mgr, void *private, uint32_t fence_type, int lazy_hint); - int (*unreference) (struct _DriFenceMgr *mgr, void **private); -}; - -extern struct _DriFenceMgr * -driFenceMgrCreate(const struct _DriFenceMgrCreateInfo *info); - -void -driFenceMgrUnReference(struct _DriFenceMgr **pMgr); - -extern struct _DriFenceMgr * -driFenceMgrTTMInit(int fd); - -#endif diff --git a/src/gallium/winsys/drm/intel/common/ws_dri_mallocpool.c b/src/gallium/winsys/drm/intel/common/ws_dri_mallocpool.c deleted file mode 100644 index 60924eac9e..0000000000 --- a/src/gallium/winsys/drm/intel/common/ws_dri_mallocpool.c +++ /dev/null @@ -1,161 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, TX., USA - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * - **************************************************************************/ -/* - * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com> - */ - -#include <xf86drm.h> -#include <stdlib.h> -#include <errno.h> -#include "pipe/p_debug.h" -#include "pipe/p_thread.h" -#include "ws_dri_bufpool.h" -#include "ws_dri_bufmgr.h" - -static void * -pool_create(struct _DriBufferPool *pool, - unsigned long size, uint64_t flags, unsigned hint, - unsigned alignment) -{ - unsigned long *private = malloc(size + 2*sizeof(unsigned long)); - if ((flags & DRM_BO_MASK_MEM) != DRM_BO_FLAG_MEM_LOCAL) - abort(); - - *private = size; - return (void *)private; -} - - -static int -pool_destroy(struct _DriBufferPool *pool, void *private) -{ - free(private); - return 0; -} - -static int -pool_waitIdle(struct _DriBufferPool *pool, void *private, - pipe_mutex *mutex, int lazy) -{ - return 0; -} - -static int -pool_map(struct _DriBufferPool *pool, void *private, unsigned flags, - int hint, pipe_mutex *mutex, void **virtual) -{ - *virtual = (void *)((unsigned long *)private + 2); - return 0; -} - -static int -pool_unmap(struct _DriBufferPool *pool, void *private) -{ - return 0; -} - -static unsigned long -pool_offset(struct _DriBufferPool *pool, void *private) -{ - /* - * BUG - */ - abort(); - return 0UL; -} - -static unsigned long -pool_poolOffset(struct _DriBufferPool *pool, void *private) -{ - /* - * BUG - */ - abort(); -} - -static uint64_t -pool_flags(struct _DriBufferPool *pool, void *private) -{ - return DRM_BO_FLAG_MEM_LOCAL | DRM_BO_FLAG_CACHED; -} - -static unsigned long -pool_size(struct _DriBufferPool *pool, void *private) -{ - return *(unsigned long *) private; -} - - -static int -pool_fence(struct _DriBufferPool *pool, void *private, - struct _DriFenceObject *fence) -{ - abort(); - return 0UL; -} - -static drmBO * -pool_kernel(struct _DriBufferPool *pool, void *private) -{ - abort(); - return NULL; -} - -static void -pool_takedown(struct _DriBufferPool *pool) -{ - free(pool); -} - - -struct _DriBufferPool * -driMallocPoolInit(void) -{ - struct _DriBufferPool *pool; - - pool = (struct _DriBufferPool *) malloc(sizeof(*pool)); - if (!pool) - return NULL; - - pool->data = NULL; - pool->fd = -1; - pool->map = &pool_map; - pool->unmap = &pool_unmap; - pool->destroy = &pool_destroy; - pool->offset = &pool_offset; - pool->poolOffset = &pool_poolOffset; - pool->flags = &pool_flags; - pool->size = &pool_size; - pool->create = &pool_create; - pool->fence = &pool_fence; - pool->kernel = &pool_kernel; - pool->validate = NULL; - pool->waitIdle = &pool_waitIdle; - pool->takeDown = &pool_takedown; - return pool; -} diff --git a/src/gallium/winsys/drm/intel/common/ws_dri_slabpool.c b/src/gallium/winsys/drm/intel/common/ws_dri_slabpool.c deleted file mode 100644 index 391cea50a7..0000000000 --- a/src/gallium/winsys/drm/intel/common/ws_dri_slabpool.c +++ /dev/null @@ -1,968 +0,0 @@ -/************************************************************************** - * - * Copyright 2006-2008 Tungsten Graphics, Inc., Cedar Park, TX., USA - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * - **************************************************************************/ -/* - * Authors: Thomas Hellstrom <thomas-at-tungstengraphics-dot-com> - */ - -#include <stdint.h> -#include <sys/time.h> -#include <errno.h> -#include <unistd.h> -#include <assert.h> -#include "ws_dri_bufpool.h" -#include "ws_dri_fencemgr.h" -#include "ws_dri_bufmgr.h" -#include "pipe/p_thread.h" - -#define DRI_SLABPOOL_ALLOC_RETRIES 100 - -struct _DriSlab; - -struct _DriSlabBuffer { - int isSlabBuffer; - drmBO *bo; - struct _DriFenceObject *fence; - struct _DriSlab *parent; - drmMMListHead head; - uint32_t mapCount; - uint32_t start; - uint32_t fenceType; - int unFenced; - pipe_condvar event; -}; - -struct _DriKernelBO { - int fd; - drmBO bo; - drmMMListHead timeoutHead; - drmMMListHead head; - struct timeval timeFreed; - uint32_t pageAlignment; - void *virtual; -}; - -struct _DriSlab{ - drmMMListHead head; - drmMMListHead freeBuffers; - uint32_t numBuffers; - uint32_t numFree; - struct _DriSlabBuffer *buffers; - struct _DriSlabSizeHeader *header; - struct _DriKernelBO *kbo; -}; - - -struct _DriSlabSizeHeader { - drmMMListHead slabs; - drmMMListHead freeSlabs; - drmMMListHead delayedBuffers; - uint32_t numDelayed; - struct _DriSlabPool *slabPool; - uint32_t bufSize; - pipe_mutex mutex; -}; - -struct _DriFreeSlabManager { - struct timeval slabTimeout; - struct timeval checkInterval; - struct timeval nextCheck; - drmMMListHead timeoutList; - drmMMListHead unCached; - drmMMListHead cached; - pipe_mutex mutex; -}; - - -struct _DriSlabPool { - - /* - * The data of this structure remains constant after - * initialization and thus needs no mutex protection. - */ - - struct _DriFreeSlabManager *fMan; - uint64_t proposedFlags; - uint64_t validMask; - uint32_t *bucketSizes; - uint32_t numBuckets; - uint32_t pageSize; - int fd; - int pageAlignment; - int maxSlabSize; - int desiredNumBuffers; - struct _DriSlabSizeHeader *headers; -}; - -/* - * FIXME: Perhaps arrange timeout slabs in size buckets for fast - * retreival?? - */ - - -static inline int -driTimeAfterEq(struct timeval *arg1, struct timeval *arg2) -{ - return ((arg1->tv_sec > arg2->tv_sec) || - ((arg1->tv_sec == arg2->tv_sec) && - (arg1->tv_usec > arg2->tv_usec))); -} - -static inline void -driTimeAdd(struct timeval *arg, struct timeval *add) -{ - unsigned int sec; - - arg->tv_sec += add->tv_sec; - arg->tv_usec += add->tv_usec; - sec = arg->tv_usec / 1000000; - arg->tv_sec += sec; - arg->tv_usec -= sec*1000000; -} - -static void -driFreeKernelBO(struct _DriKernelBO *kbo) -{ - if (!kbo) - return; - - (void) drmBOUnreference(kbo->fd, &kbo->bo); - free(kbo); -} - - -static void -driFreeTimeoutKBOsLocked(struct _DriFreeSlabManager *fMan, - struct timeval *time) -{ - drmMMListHead *list, *next; - struct _DriKernelBO *kbo; - - if (!driTimeAfterEq(time, &fMan->nextCheck)) - return; - - for (list = fMan->timeoutList.next, next = list->next; - list != &fMan->timeoutList; - list = next, next = list->next) { - - kbo = DRMLISTENTRY(struct _DriKernelBO, list, timeoutHead); - - if (!driTimeAfterEq(time, &kbo->timeFreed)) - break; - - DRMLISTDELINIT(&kbo->timeoutHead); - DRMLISTDELINIT(&kbo->head); - driFreeKernelBO(kbo); - } - - fMan->nextCheck = *time; - driTimeAdd(&fMan->nextCheck, &fMan->checkInterval); -} - - -/* - * Add a _DriKernelBO to the free slab manager. - * This means that it is available for reuse, but if it's not - * reused in a while, it will be freed. - */ - -static void -driSetKernelBOFree(struct _DriFreeSlabManager *fMan, - struct _DriKernelBO *kbo) -{ - struct timeval time; - - pipe_mutex_lock(fMan->mutex); - gettimeofday(&time, NULL); - driTimeAdd(&time, &fMan->slabTimeout); - - kbo->timeFreed = time; - - if (kbo->bo.flags & DRM_BO_FLAG_CACHED) - DRMLISTADD(&kbo->head, &fMan->cached); - else - DRMLISTADD(&kbo->head, &fMan->unCached); - - DRMLISTADDTAIL(&kbo->timeoutHead, &fMan->timeoutList); - driFreeTimeoutKBOsLocked(fMan, &time); - - pipe_mutex_unlock(fMan->mutex); -} - -/* - * Get a _DriKernelBO for us to use as storage for a slab. - * - */ - -static struct _DriKernelBO * -driAllocKernelBO(struct _DriSlabSizeHeader *header) - -{ - struct _DriSlabPool *slabPool = header->slabPool; - struct _DriFreeSlabManager *fMan = slabPool->fMan; - drmMMListHead *list, *next, *head; - uint32_t size = header->bufSize * slabPool->desiredNumBuffers; - struct _DriKernelBO *kbo; - struct _DriKernelBO *kboTmp; - int ret; - - /* - * FIXME: We should perhaps allow some variation in slabsize in order - * to efficiently reuse slabs. - */ - - size = (size <= slabPool->maxSlabSize) ? size : slabPool->maxSlabSize; - size = (size + slabPool->pageSize - 1) & ~(slabPool->pageSize - 1); - pipe_mutex_lock(fMan->mutex); - - kbo = NULL; - - retry: - head = (slabPool->proposedFlags & DRM_BO_FLAG_CACHED) ? - &fMan->cached : &fMan->unCached; - - for (list = head->next, next = list->next; - list != head; - list = next, next = list->next) { - - kboTmp = DRMLISTENTRY(struct _DriKernelBO, list, head); - - if ((kboTmp->bo.size == size) && - (slabPool->pageAlignment == 0 || - (kboTmp->pageAlignment % slabPool->pageAlignment) == 0)) { - - if (!kbo) - kbo = kboTmp; - - if ((kbo->bo.proposedFlags ^ slabPool->proposedFlags) == 0) - break; - - } - } - - if (kbo) { - DRMLISTDELINIT(&kbo->head); - DRMLISTDELINIT(&kbo->timeoutHead); - } - - pipe_mutex_unlock(fMan->mutex); - - if (kbo) { - uint64_t new_mask = kbo->bo.proposedFlags ^ slabPool->proposedFlags; - - ret = 0; - if (new_mask) { - ret = drmBOSetStatus(kbo->fd, &kbo->bo, slabPool->proposedFlags, - new_mask, DRM_BO_HINT_DONT_FENCE, 0, 0); - } - if (ret == 0) - return kbo; - - driFreeKernelBO(kbo); - kbo = NULL; - goto retry; - } - - kbo = calloc(1, sizeof(struct _DriKernelBO)); - if (!kbo) - return NULL; - - kbo->fd = slabPool->fd; - DRMINITLISTHEAD(&kbo->head); - DRMINITLISTHEAD(&kbo->timeoutHead); - ret = drmBOCreate(kbo->fd, size, slabPool->pageAlignment, NULL, - slabPool->proposedFlags, - DRM_BO_HINT_DONT_FENCE, &kbo->bo); - if (ret) - goto out_err0; - - ret = drmBOMap(kbo->fd, &kbo->bo, - DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, - 0, &kbo->virtual); - - if (ret) - goto out_err1; - - ret = drmBOUnmap(kbo->fd, &kbo->bo); - if (ret) - goto out_err1; - - return kbo; - - out_err1: - drmBOUnreference(kbo->fd, &kbo->bo); - out_err0: - free(kbo); - return NULL; -} - - -static int -driAllocSlab(struct _DriSlabSizeHeader *header) -{ - struct _DriSlab *slab; - struct _DriSlabBuffer *buf; - uint32_t numBuffers; - int ret; - int i; - - slab = calloc(1, sizeof(*slab)); - if (!slab) - return -ENOMEM; - - slab->kbo = driAllocKernelBO(header); - if (!slab->kbo) { - ret = -ENOMEM; - goto out_err0; - } - - numBuffers = slab->kbo->bo.size / header->bufSize; - - slab->buffers = calloc(numBuffers, sizeof(*slab->buffers)); - if (!slab->buffers) { - ret = -ENOMEM; - goto out_err1; - } - - DRMINITLISTHEAD(&slab->head); - DRMINITLISTHEAD(&slab->freeBuffers); - slab->numBuffers = numBuffers; - slab->numFree = 0; - slab->header = header; - - buf = slab->buffers; - for (i=0; i < numBuffers; ++i) { - buf->parent = slab; - buf->start = i* header->bufSize; - buf->mapCount = 0; - buf->isSlabBuffer = 1; - pipe_condvar_init(buf->event); - DRMLISTADDTAIL(&buf->head, &slab->freeBuffers); - slab->numFree++; - buf++; - } - - DRMLISTADDTAIL(&slab->head, &header->slabs); - - return 0; - - out_err1: - driSetKernelBOFree(header->slabPool->fMan, slab->kbo); - free(slab->buffers); - out_err0: - free(slab); - return ret; -} - -/* - * Delete a buffer from the slab header delayed list and put - * it on the slab free list. - */ - -static void -driSlabFreeBufferLocked(struct _DriSlabBuffer *buf) -{ - struct _DriSlab *slab = buf->parent; - struct _DriSlabSizeHeader *header = slab->header; - drmMMListHead *list = &buf->head; - - DRMLISTDEL(list); - DRMLISTADDTAIL(list, &slab->freeBuffers); - slab->numFree++; - - if (slab->head.next == &slab->head) - DRMLISTADDTAIL(&slab->head, &header->slabs); - - if (slab->numFree == slab->numBuffers) { - list = &slab->head; - DRMLISTDEL(list); - DRMLISTADDTAIL(list, &header->freeSlabs); - } - - if (header->slabs.next == &header->slabs || - slab->numFree != slab->numBuffers) { - - drmMMListHead *next; - struct _DriFreeSlabManager *fMan = header->slabPool->fMan; - - for (list = header->freeSlabs.next, next = list->next; - list != &header->freeSlabs; - list = next, next = list->next) { - - slab = DRMLISTENTRY(struct _DriSlab, list, head); - - DRMLISTDELINIT(list); - driSetKernelBOFree(fMan, slab->kbo); - free(slab->buffers); - free(slab); - } - } -} - -static void -driSlabCheckFreeLocked(struct _DriSlabSizeHeader *header, int wait) -{ - drmMMListHead *list, *prev, *first; - struct _DriSlabBuffer *buf; - struct _DriSlab *slab; - int firstWasSignaled = 1; - int signaled; - int i; - int ret; - - /* - * Rerun the freeing test if the youngest tested buffer - * was signaled, since there might be more idle buffers - * in the delay list. - */ - - while (firstWasSignaled) { - firstWasSignaled = 0; - signaled = 0; - first = header->delayedBuffers.next; - - /* Only examine the oldest 1/3 of delayed buffers: - */ - if (header->numDelayed > 3) { - for (i = 0; i < header->numDelayed; i += 3) { - first = first->next; - } - } - - for (list = first, prev = list->prev; - list != &header->delayedBuffers; - list = prev, prev = list->prev) { - buf = DRMLISTENTRY(struct _DriSlabBuffer, list, head); - slab = buf->parent; - - if (!signaled) { - if (wait) { - ret = driFenceFinish(buf->fence, buf->fenceType, 0); - if (ret) - break; - signaled = 1; - wait = 0; - } else { - signaled = driFenceSignaled(buf->fence, buf->fenceType); - } - if (signaled) { - if (list == first) - firstWasSignaled = 1; - driFenceUnReference(&buf->fence); - header->numDelayed--; - driSlabFreeBufferLocked(buf); - } - } else if (driFenceSignaledCached(buf->fence, buf->fenceType)) { - driFenceUnReference(&buf->fence); - header->numDelayed--; - driSlabFreeBufferLocked(buf); - } - } - } -} - - -static struct _DriSlabBuffer * -driSlabAllocBuffer(struct _DriSlabSizeHeader *header) -{ - static struct _DriSlabBuffer *buf; - struct _DriSlab *slab; - drmMMListHead *list; - int count = DRI_SLABPOOL_ALLOC_RETRIES; - - pipe_mutex_lock(header->mutex); - while(header->slabs.next == &header->slabs && count > 0) { - driSlabCheckFreeLocked(header, 0); - if (header->slabs.next != &header->slabs) - break; - - pipe_mutex_unlock(header->mutex); - if (count != DRI_SLABPOOL_ALLOC_RETRIES) - usleep(1); - pipe_mutex_lock(header->mutex); - (void) driAllocSlab(header); - count--; - } - - list = header->slabs.next; - if (list == &header->slabs) { - pipe_mutex_unlock(header->mutex); - return NULL; - } - slab = DRMLISTENTRY(struct _DriSlab, list, head); - if (--slab->numFree == 0) - DRMLISTDELINIT(list); - - list = slab->freeBuffers.next; - DRMLISTDELINIT(list); - - pipe_mutex_unlock(header->mutex); - buf = DRMLISTENTRY(struct _DriSlabBuffer, list, head); - return buf; -} - -static void * -pool_create(struct _DriBufferPool *driPool, unsigned long size, - uint64_t flags, unsigned hint, unsigned alignment) -{ - struct _DriSlabPool *pool = (struct _DriSlabPool *) driPool->data; - struct _DriSlabSizeHeader *header; - struct _DriSlabBuffer *buf; - void *dummy; - int i; - int ret; - - /* - * FIXME: Check for compatibility. - */ - - header = pool->headers; - for (i=0; i<pool->numBuckets; ++i) { - if (header->bufSize >= size) - break; - header++; - } - - if (i < pool->numBuckets) - return driSlabAllocBuffer(header); - - - /* - * Fall back to allocate a buffer object directly from DRM. - * and wrap it in a driBO structure. - */ - - - buf = calloc(1, sizeof(*buf)); - - if (!buf) - return NULL; - - buf->bo = calloc(1, sizeof(*buf->bo)); - if (!buf->bo) - goto out_err0; - - if (alignment) { - if ((alignment < pool->pageSize) && (pool->pageSize % alignment)) - goto out_err1; - if ((alignment > pool->pageSize) && (alignment % pool->pageSize)) - goto out_err1; - } - - ret = drmBOCreate(pool->fd, size, alignment / pool->pageSize, NULL, - flags, hint, buf->bo); - if (ret) - goto out_err1; - - ret = drmBOMap(pool->fd, buf->bo, DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, - 0, &dummy); - if (ret) - goto out_err2; - - ret = drmBOUnmap(pool->fd, buf->bo); - if (ret) - goto out_err2; - - return buf; - out_err2: - drmBOUnreference(pool->fd, buf->bo); - out_err1: - free(buf->bo); - out_err0: - free(buf); - return NULL; -} - -static int -pool_destroy(struct _DriBufferPool *driPool, void *private) -{ - struct _DriSlabBuffer *buf = - (struct _DriSlabBuffer *) private; - struct _DriSlab *slab; - struct _DriSlabSizeHeader *header; - - if (!buf->isSlabBuffer) { - struct _DriSlabPool *pool = (struct _DriSlabPool *) driPool->data; - int ret; - - ret = drmBOUnreference(pool->fd, buf->bo); - free(buf->bo); - free(buf); - return ret; - } - - slab = buf->parent; - header = slab->header; - - pipe_mutex_lock(header->mutex); - buf->unFenced = 0; - buf->mapCount = 0; - - if (buf->fence && !driFenceSignaledCached(buf->fence, buf->fenceType)) { - DRMLISTADDTAIL(&buf->head, &header->delayedBuffers); - header->numDelayed++; - } else { - if (buf->fence) - driFenceUnReference(&buf->fence); - driSlabFreeBufferLocked(buf); - } - - pipe_mutex_unlock(header->mutex); - return 0; -} - -static int -pool_waitIdle(struct _DriBufferPool *driPool, void *private, - pipe_mutex *mutex, int lazy) -{ - struct _DriSlabBuffer *buf = (struct _DriSlabBuffer *) private; - - while(buf->unFenced) - pipe_condvar_wait(buf->event, *mutex); - - if (!buf->fence) - return 0; - - driFenceFinish(buf->fence, buf->fenceType, lazy); - driFenceUnReference(&buf->fence); - - return 0; -} - -static int -pool_map(struct _DriBufferPool *pool, void *private, unsigned flags, - int hint, pipe_mutex *mutex, void **virtual) -{ - struct _DriSlabBuffer *buf = (struct _DriSlabBuffer *) private; - int busy; - - if (buf->isSlabBuffer) - busy = buf->unFenced || (buf->fence && !driFenceSignaledCached(buf->fence, buf->fenceType)); - else - busy = buf->fence && !driFenceSignaled(buf->fence, buf->fenceType); - - - if (busy) { - if (hint & DRM_BO_HINT_DONT_BLOCK) - return -EBUSY; - else { - (void) pool_waitIdle(pool, private, mutex, 0); - } - } - - ++buf->mapCount; - *virtual = (buf->isSlabBuffer) ? - (void *) ((uint8_t *) buf->parent->kbo->virtual + buf->start) : - (void *) buf->bo->virtual; - - return 0; -} - -static int -pool_unmap(struct _DriBufferPool *pool, void *private) -{ - struct _DriSlabBuffer *buf = (struct _DriSlabBuffer *) private; - - --buf->mapCount; - if (buf->mapCount == 0 && buf->isSlabBuffer) - pipe_condvar_broadcast(buf->event); - - return 0; -} - -static unsigned long -pool_offset(struct _DriBufferPool *pool, void *private) -{ - struct _DriSlabBuffer *buf = (struct _DriSlabBuffer *) private; - struct _DriSlab *slab; - struct _DriSlabSizeHeader *header; - - if (!buf->isSlabBuffer) { - assert(buf->bo->proposedFlags & DRM_BO_FLAG_NO_MOVE); - return buf->bo->offset; - } - - slab = buf->parent; - header = slab->header; - - (void) header; - assert(header->slabPool->proposedFlags & DRM_BO_FLAG_NO_MOVE); - return slab->kbo->bo.offset + buf->start; -} - -static unsigned long -pool_poolOffset(struct _DriBufferPool *pool, void *private) -{ - struct _DriSlabBuffer *buf = (struct _DriSlabBuffer *) private; - - return buf->start; -} - -static uint64_t -pool_flags(struct _DriBufferPool *pool, void *private) -{ - struct _DriSlabBuffer *buf = (struct _DriSlabBuffer *) private; - - if (!buf->isSlabBuffer) - return buf->bo->flags; - - return buf->parent->kbo->bo.flags; -} - -static unsigned long -pool_size(struct _DriBufferPool *pool, void *private) -{ - struct _DriSlabBuffer *buf = (struct _DriSlabBuffer *) private; - if (!buf->isSlabBuffer) - return buf->bo->size; - - return buf->parent->header->bufSize; -} - -static int -pool_fence(struct _DriBufferPool *pool, void *private, - struct _DriFenceObject *fence) -{ - struct _DriSlabBuffer *buf = (struct _DriSlabBuffer *) private; - drmBO *bo; - - if (buf->fence) - driFenceUnReference(&buf->fence); - - buf->fence = driFenceReference(fence); - bo = (buf->isSlabBuffer) ? - &buf->parent->kbo->bo: - buf->bo; - buf->fenceType = bo->fenceFlags; - - buf->unFenced = 0; - pipe_condvar_broadcast(buf->event); - - return 0; -} - -static drmBO * -pool_kernel(struct _DriBufferPool *pool, void *private) -{ - struct _DriSlabBuffer *buf = (struct _DriSlabBuffer *) private; - - return (buf->isSlabBuffer) ? &buf->parent->kbo->bo : buf->bo; -} - -static int -pool_validate(struct _DriBufferPool *pool, void *private, - pipe_mutex *mutex) -{ - struct _DriSlabBuffer *buf = (struct _DriSlabBuffer *) private; - - if (!buf->isSlabBuffer) - return 0; - - while(buf->mapCount != 0) - pipe_condvar_wait(buf->event, *mutex); - - buf->unFenced = 1; - return 0; -} - - -struct _DriFreeSlabManager * -driInitFreeSlabManager(uint32_t checkIntervalMsec, uint32_t slabTimeoutMsec) -{ - struct _DriFreeSlabManager *tmp; - - tmp = calloc(1, sizeof(*tmp)); - if (!tmp) - return NULL; - - pipe_mutex_init(tmp->mutex); - pipe_mutex_lock(tmp->mutex); - tmp->slabTimeout.tv_usec = slabTimeoutMsec*1000; - tmp->slabTimeout.tv_sec = tmp->slabTimeout.tv_usec / 1000000; - tmp->slabTimeout.tv_usec -= tmp->slabTimeout.tv_sec*1000000; - - tmp->checkInterval.tv_usec = checkIntervalMsec*1000; - tmp->checkInterval.tv_sec = tmp->checkInterval.tv_usec / 1000000; - tmp->checkInterval.tv_usec -= tmp->checkInterval.tv_sec*1000000; - - gettimeofday(&tmp->nextCheck, NULL); - driTimeAdd(&tmp->nextCheck, &tmp->checkInterval); - DRMINITLISTHEAD(&tmp->timeoutList); - DRMINITLISTHEAD(&tmp->unCached); - DRMINITLISTHEAD(&tmp->cached); - pipe_mutex_unlock(tmp->mutex); - - return tmp; -} - -void -driFinishFreeSlabManager(struct _DriFreeSlabManager *fMan) -{ - struct timeval time; - - time = fMan->nextCheck; - driTimeAdd(&time, &fMan->checkInterval); - - pipe_mutex_lock(fMan->mutex); - driFreeTimeoutKBOsLocked(fMan, &time); - pipe_mutex_unlock(fMan->mutex); - - assert(fMan->timeoutList.next == &fMan->timeoutList); - assert(fMan->unCached.next == &fMan->unCached); - assert(fMan->cached.next == &fMan->cached); - - free(fMan); -} - -static void -driInitSizeHeader(struct _DriSlabPool *pool, uint32_t size, - struct _DriSlabSizeHeader *header) -{ - pipe_mutex_init(header->mutex); - pipe_mutex_lock(header->mutex); - - DRMINITLISTHEAD(&header->slabs); - DRMINITLISTHEAD(&header->freeSlabs); - DRMINITLISTHEAD(&header->delayedBuffers); - - header->numDelayed = 0; - header->slabPool = pool; - header->bufSize = size; - - pipe_mutex_unlock(header->mutex); -} - -static void -driFinishSizeHeader(struct _DriSlabSizeHeader *header) -{ - drmMMListHead *list, *next; - struct _DriSlabBuffer *buf; - - pipe_mutex_lock(header->mutex); - for (list = header->delayedBuffers.next, next = list->next; - list != &header->delayedBuffers; - list = next, next = list->next) { - - buf = DRMLISTENTRY(struct _DriSlabBuffer, list , head); - if (buf->fence) { - (void) driFenceFinish(buf->fence, buf->fenceType, 0); - driFenceUnReference(&buf->fence); - } - header->numDelayed--; - driSlabFreeBufferLocked(buf); - } - pipe_mutex_unlock(header->mutex); -} - -static void -pool_takedown(struct _DriBufferPool *driPool) -{ - struct _DriSlabPool *pool = driPool->data; - int i; - - for (i=0; i<pool->numBuckets; ++i) { - driFinishSizeHeader(&pool->headers[i]); - } - - free(pool->headers); - free(pool->bucketSizes); - free(pool); - free(driPool); -} - -struct _DriBufferPool * -driSlabPoolInit(int fd, uint64_t flags, - uint64_t validMask, - uint32_t smallestSize, - uint32_t numSizes, - uint32_t desiredNumBuffers, - uint32_t maxSlabSize, - uint32_t pageAlignment, - struct _DriFreeSlabManager *fMan) -{ - struct _DriBufferPool *driPool; - struct _DriSlabPool *pool; - uint32_t i; - - driPool = calloc(1, sizeof(*driPool)); - if (!driPool) - return NULL; - - pool = calloc(1, sizeof(*pool)); - if (!pool) - goto out_err0; - - pool->bucketSizes = calloc(numSizes, sizeof(*pool->bucketSizes)); - if (!pool->bucketSizes) - goto out_err1; - - pool->headers = calloc(numSizes, sizeof(*pool->headers)); - if (!pool->headers) - goto out_err2; - - pool->fMan = fMan; - pool->proposedFlags = flags; - pool->validMask = validMask; - pool->numBuckets = numSizes; - pool->pageSize = getpagesize(); - pool->fd = fd; - pool->pageAlignment = pageAlignment; - pool->maxSlabSize = maxSlabSize; - pool->desiredNumBuffers = desiredNumBuffers; - - for (i=0; i<pool->numBuckets; ++i) { - pool->bucketSizes[i] = (smallestSize << i); - driInitSizeHeader(pool, pool->bucketSizes[i], - &pool->headers[i]); - } - - driPool->data = (void *) pool; - driPool->map = &pool_map; - driPool->unmap = &pool_unmap; - driPool->destroy = &pool_destroy; - driPool->offset = &pool_offset; - driPool->poolOffset = &pool_poolOffset; - driPool->flags = &pool_flags; - driPool->size = &pool_size; - driPool->create = &pool_create; - driPool->fence = &pool_fence; - driPool->kernel = &pool_kernel; - driPool->validate = &pool_validate; - driPool->waitIdle = &pool_waitIdle; - driPool->takeDown = &pool_takedown; - - return driPool; - - out_err2: - free(pool->bucketSizes); - out_err1: - free(pool); - out_err0: - free(driPool); - - return NULL; -} diff --git a/src/gallium/winsys/drm/intel/dri/Makefile b/src/gallium/winsys/drm/intel/dri/Makefile deleted file mode 100644 index 2046441a22..0000000000 --- a/src/gallium/winsys/drm/intel/dri/Makefile +++ /dev/null @@ -1,33 +0,0 @@ -TOP = ../../../../../.. -include $(TOP)/configs/current - -LIBNAME = i915_dri.so -LIBNAME_EGL = egl_i915_dri.so - -PIPE_DRIVERS = \ - $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ - ../common/libinteldrm.a \ - $(TOP)/src/gallium/drivers/i915simple/libi915simple.a - - -DRIVER_SOURCES = \ - intel_winsys_softpipe.c \ - intel_swapbuffers.c \ - intel_context.c \ - intel_lock.c \ - intel_screen.c - -C_SOURCES = \ - $(COMMON_GALLIUM_SOURCES) \ - $(DRIVER_SOURCES) - -ASM_SOURCES = - -DRIVER_DEFINES = -I../common $(shell pkg-config libdrm --atleast-version=2.3.1 \ - && echo "-DDRM_VBLANK_FLIP=DRM_VBLANK_FLIP") - -include ../../Makefile.template - -#intel_tex_layout.o: $(TOP)/src/mesa/drivers/dri/intel/intel_tex_layout.c - -symlinks: diff --git a/src/gallium/winsys/drm/intel/dri/SConscript b/src/gallium/winsys/drm/intel/dri/SConscript deleted file mode 100644 index 6a4f50afcc..0000000000 --- a/src/gallium/winsys/drm/intel/dri/SConscript +++ /dev/null @@ -1,41 +0,0 @@ -Import('*') - -if 'mesa' in env['statetrackers']: - - env = drienv.Clone() - - env.Append(CPPPATH = [ - '../intel', - 'server' - ]) - - #MINIGLX_SOURCES = server/intel_dri.c - - DRIVER_SOURCES = [ - 'intel_winsys_pipe.c', - 'intel_winsys_softpipe.c', - 'intel_winsys_i915.c', - 'intel_batchbuffer.c', - 'intel_swapbuffers.c', - 'intel_context.c', - 'intel_lock.c', - 'intel_screen.c', - 'intel_batchpool.c', - ] - - sources = \ - COMMON_GALLIUM_SOURCES + \ - COMMON_BM_SOURCES + \ - DRIVER_SOURCES - - drivers = [ - softpipe, - i915simple - ] - - # TODO: write a wrapper function http://www.scons.org/wiki/WrapperFunctions - env.SharedLibrary( - target ='i915tex_dri.so', - source = sources, - LIBS = drivers + mesa + auxiliaries + env['LIBS'], - ) diff --git a/src/gallium/winsys/drm/intel/dri/intel_batchbuffer.h b/src/gallium/winsys/drm/intel/dri/intel_batchbuffer.h deleted file mode 100644 index 3e95326168..0000000000 --- a/src/gallium/winsys/drm/intel/dri/intel_batchbuffer.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef INTEL_BATCHBUFFER_H -#define INTEL_BATCHBUFFER_H - -#include "intel_be_batchbuffer.h" - -/* - * Need to redefine the BATCH defines - */ - -#undef BEGIN_BATCH -#define BEGIN_BATCH(dwords, relocs) \ - (i915_batchbuffer_check(&intel->base.batch->base, dwords, relocs)) - -#undef OUT_BATCH -#define OUT_BATCH(d) \ - i915_batchbuffer_dword(&intel->base.batch->base, d) - -#undef OUT_RELOC -#define OUT_RELOC(buf,flags,mask,delta) do { \ - assert((delta) >= 0); \ - intel_be_offset_relocation(intel->base.batch, delta, buf, flags, mask); \ -} while (0) - -#endif diff --git a/src/gallium/winsys/drm/intel/dri/intel_context.c b/src/gallium/winsys/drm/intel/dri/intel_context.c deleted file mode 100644 index 97ef731aaa..0000000000 --- a/src/gallium/winsys/drm/intel/dri/intel_context.c +++ /dev/null @@ -1,337 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "i830_dri.h" - -#include "intel_screen.h" -#include "intel_context.h" -#include "intel_swapbuffers.h" -#include "intel_batchbuffer.h" -#include "intel_winsys_softpipe.h" - -#include "i915simple/i915_screen.h" - -#include "state_tracker/st_public.h" -#include "state_tracker/st_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_context.h" - -#include "utils.h" - - -#ifdef DEBUG -int __intel_debug = 0; -#endif - - -#define need_GL_ARB_multisample -#define need_GL_ARB_point_parameters -#define need_GL_ARB_texture_compression -#define need_GL_ARB_vertex_buffer_object -#define need_GL_ARB_vertex_program -#define need_GL_ARB_window_pos -#define need_GL_EXT_blend_color -#define need_GL_EXT_blend_equation_separate -#define need_GL_EXT_blend_func_separate -#define need_GL_EXT_blend_minmax -#define need_GL_EXT_cull_vertex -#define need_GL_EXT_fog_coord -#define need_GL_EXT_framebuffer_object -#define need_GL_EXT_multi_draw_arrays -#define need_GL_EXT_secondary_color -#define need_GL_NV_vertex_program -#include "extension_helper.h" - - -/** - * Extension strings exported by the intel driver. - * - * \note - * It appears that ARB_texture_env_crossbar has "disappeared" compared to the - * old i830-specific driver. - */ -const struct dri_extension card_extensions[] = { - {"GL_ARB_multisample", GL_ARB_multisample_functions}, - {"GL_ARB_multitexture", NULL}, - {"GL_ARB_point_parameters", GL_ARB_point_parameters_functions}, - {"GL_ARB_texture_border_clamp", NULL}, - {"GL_ARB_texture_compression", GL_ARB_texture_compression_functions}, - {"GL_ARB_texture_cube_map", NULL}, - {"GL_ARB_texture_env_add", NULL}, - {"GL_ARB_texture_env_combine", NULL}, - {"GL_ARB_texture_env_dot3", NULL}, - {"GL_ARB_texture_mirrored_repeat", NULL}, - {"GL_ARB_texture_rectangle", NULL}, - {"GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions}, - {"GL_ARB_pixel_buffer_object", NULL}, - {"GL_ARB_vertex_program", GL_ARB_vertex_program_functions}, - {"GL_ARB_window_pos", GL_ARB_window_pos_functions}, - {"GL_EXT_blend_color", GL_EXT_blend_color_functions}, - {"GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions}, - {"GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions}, - {"GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions}, - {"GL_EXT_blend_subtract", NULL}, - {"GL_EXT_cull_vertex", GL_EXT_cull_vertex_functions}, - {"GL_EXT_fog_coord", GL_EXT_fog_coord_functions}, - {"GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions}, - {"GL_EXT_multi_draw_arrays", GL_EXT_multi_draw_arrays_functions}, - {"GL_EXT_packed_depth_stencil", NULL}, - {"GL_EXT_pixel_buffer_object", NULL}, - {"GL_EXT_secondary_color", GL_EXT_secondary_color_functions}, - {"GL_EXT_stencil_wrap", NULL}, - {"GL_EXT_texture_edge_clamp", NULL}, - {"GL_EXT_texture_env_combine", NULL}, - {"GL_EXT_texture_env_dot3", NULL}, - {"GL_EXT_texture_filter_anisotropic", NULL}, - {"GL_EXT_texture_lod_bias", NULL}, - {"GL_3DFX_texture_compression_FXT1", NULL}, - {"GL_APPLE_client_storage", NULL}, - {"GL_MESA_pack_invert", NULL}, - {"GL_MESA_ycbcr_texture", NULL}, - {"GL_NV_blend_square", NULL}, - {"GL_NV_vertex_program", GL_NV_vertex_program_functions}, - {"GL_NV_vertex_program1_1", NULL}, - {"GL_SGIS_generate_mipmap", NULL }, - {NULL, NULL} -}; - - - -#ifdef DEBUG -static const struct dri_debug_control debug_control[] = { - {"ioctl", DEBUG_IOCTL}, - {"bat", DEBUG_BATCH}, - {"lock", DEBUG_LOCK}, - {"swap", DEBUG_SWAP}, - {NULL, 0} -}; -#endif - - - -static void -intel_lock_hardware(struct intel_be_context *context) -{ - struct intel_context *intel = (struct intel_context *)context; - LOCK_HARDWARE(intel); -} - -static void -intel_unlock_hardware(struct intel_be_context *context) -{ - struct intel_context *intel = (struct intel_context *)context; - UNLOCK_HARDWARE(intel); -} - -static boolean -intel_locked_hardware(struct intel_be_context *context) -{ - struct intel_context *intel = (struct intel_context *)context; - return intel->locked ? TRUE : FALSE; -} - -GLboolean -intelCreateContext(const __GLcontextModes * visual, - __DRIcontextPrivate * driContextPriv, - void *sharedContextPrivate) -{ - struct intel_context *intel = CALLOC_STRUCT(intel_context); - __DRIscreenPrivate *sPriv = driContextPriv->driScreenPriv; - struct intel_screen *intelScreen = intel_screen(sPriv); - drmI830Sarea *saPriv = intelScreen->sarea; - int fthrottle_mode; - GLboolean havePools; - struct pipe_context *pipe; - struct st_context *st_share = NULL; - - if (sharedContextPrivate) { - st_share = ((struct intel_context *) sharedContextPrivate)->st; - } - - driContextPriv->driverPrivate = intel; - intel->intelScreen = intelScreen; - intel->driScreen = sPriv; - intel->sarea = saPriv; - - driParseConfigFiles(&intel->optionCache, &intelScreen->optionCache, - intel->driScreen->myNum, "i915"); - - - /* - * memory pools - */ - DRM_LIGHT_LOCK(sPriv->fd, &sPriv->pSAREA->lock, driContextPriv->hHWContext); - // ZZZ JB should be per screen and not be done per context - havePools = intelCreatePools(sPriv); - DRM_UNLOCK(sPriv->fd, &sPriv->pSAREA->lock, driContextPriv->hHWContext); - if (!havePools) - return GL_FALSE; - - - /* Dri stuff */ - intel->hHWContext = driContextPriv->hHWContext; - intel->driFd = sPriv->fd; - intel->driHwLock = (drmLock *) & sPriv->pSAREA->lock; - - fthrottle_mode = driQueryOptioni(&intel->optionCache, "fthrottle_mode"); - intel->iw.irq_seq = -1; - intel->irqsEmitted = 0; - - intel->last_swap_fence = NULL; - intel->first_swap_fence = NULL; - -#ifdef DEBUG - __intel_debug = driParseDebugString(getenv("INTEL_DEBUG"), debug_control); -#endif - intel->base.hardware_lock = intel_lock_hardware; - intel->base.hardware_unlock = intel_unlock_hardware; - intel->base.hardware_locked = intel_locked_hardware; - - intel_be_init_context(&intel->base, &intelScreen->base); - - /* - * Pipe-related setup - */ - if (getenv("INTEL_SP")) { - /* use softpipe driver instead of hw */ - pipe = intel_create_softpipe( intel, &intelScreen->base.base ); - } - else { - switch (intel->intelScreen->deviceID) { - case PCI_CHIP_I945_G: - case PCI_CHIP_I945_GM: - case PCI_CHIP_I945_GME: - case PCI_CHIP_G33_G: - case PCI_CHIP_Q33_G: - case PCI_CHIP_Q35_G: - case PCI_CHIP_I915_G: - case PCI_CHIP_I915_GM: - pipe = i915_create_context(intelScreen->base.screen, - &intelScreen->base.base, - &intel->base.base); - break; - default: - fprintf(stderr, "Unknown PCIID %x in %s, using software driver\n", - intel->intelScreen->deviceID, __FUNCTION__); - - pipe = intel_create_softpipe( intel, &intelScreen->base.base ); - break; - } - } - - pipe->priv = intel; - - intel->st = st_create_context(pipe, visual, st_share); - - driInitExtensions( intel->st->ctx, card_extensions, GL_TRUE ); - - return GL_TRUE; -} - - -void -intelDestroyContext(__DRIcontextPrivate * driContextPriv) -{ - struct intel_context *intel = intel_context(driContextPriv); - - assert(intel); /* should never be null */ - if (intel) { - st_finish(intel->st); - - if (intel->last_swap_fence) { - driFenceFinish(intel->last_swap_fence, DRM_FENCE_TYPE_EXE, GL_TRUE); - driFenceUnReference(&intel->last_swap_fence); - intel->last_swap_fence = NULL; - } - if (intel->first_swap_fence) { - driFenceFinish(intel->first_swap_fence, DRM_FENCE_TYPE_EXE, GL_TRUE); - driFenceUnReference(&intel->first_swap_fence); - intel->first_swap_fence = NULL; - } - - if (intel->intelScreen->dummyContext == intel) - intel->intelScreen->dummyContext = NULL; - - st_destroy_context(intel->st); - intel_be_destroy_context(&intel->base); - free(intel); - } -} - - -GLboolean -intelUnbindContext(__DRIcontextPrivate * driContextPriv) -{ - struct intel_context *intel = intel_context(driContextPriv); - st_flush(intel->st, PIPE_FLUSH_RENDER_CACHE, NULL); - /* XXX make_current(NULL)? */ - return GL_TRUE; -} - - -GLboolean -intelMakeCurrent(__DRIcontextPrivate * driContextPriv, - __DRIdrawablePrivate * driDrawPriv, - __DRIdrawablePrivate * driReadPriv) -{ - if (driContextPriv) { - struct intel_context *intel = intel_context(driContextPriv); - struct intel_framebuffer *draw_fb = intel_framebuffer(driDrawPriv); - struct intel_framebuffer *read_fb = intel_framebuffer(driReadPriv); - - assert(draw_fb->stfb); - assert(read_fb->stfb); - - /* This is for situations in which we need a rendering context but - * there may not be any currently bound. - */ - intel->intelScreen->dummyContext = intel; - - st_make_current(intel->st, draw_fb->stfb, read_fb->stfb); - - if ((intel->driDrawable != driDrawPriv) || - (intel->lastStamp != driDrawPriv->lastStamp)) { - intel->driDrawable = driDrawPriv; - intelUpdateWindowSize(driDrawPriv); - intel->lastStamp = driDrawPriv->lastStamp; - } - - /* The size of the draw buffer will have been updated above. - * If the readbuffer is a different window, check/update its size now. - */ - if (driReadPriv != driDrawPriv) { - intelUpdateWindowSize(driReadPriv); - } - - } - else { - st_make_current(NULL, NULL, NULL); - } - - return GL_TRUE; -} diff --git a/src/gallium/winsys/drm/intel/dri/intel_context.h b/src/gallium/winsys/drm/intel/dri/intel_context.h deleted file mode 100644 index 5d22a422af..0000000000 --- a/src/gallium/winsys/drm/intel/dri/intel_context.h +++ /dev/null @@ -1,164 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef INTEL_CONTEXT_H -#define INTEL_CONTEXT_H - -#include <stdint.h> -#include "drm.h" - -#include "pipe/p_debug.h" - -#include "intel_screen.h" -#include "i915_drm.h" - -#include "intel_be_context.h" - - -struct pipe_context; -struct intel_context; -struct _DriBufferObject; -struct st_context; - - -#define INTEL_MAX_FIXUP 64 - -/** - * Intel rendering context, contains a state tracker and intel-specific info. - */ -struct intel_context -{ - struct intel_be_context base; - struct st_context *st; - - struct _DriFenceObject *last_swap_fence; - struct _DriFenceObject *first_swap_fence; - -// struct intel_batchbuffer *batch; - - boolean locked; - char *prevLockFile; - int prevLockLine; - - uint irqsEmitted; - drm_i915_irq_wait_t iw; - - drm_context_t hHWContext; - drmLock *driHwLock; - int driFd; - - __DRIdrawablePrivate *driDrawable; - __DRIscreenPrivate *driScreen; - struct intel_screen *intelScreen; - drmI830Sarea *sarea; - - uint lastStamp; - - /** - * Configuration cache - */ - driOptionCache optionCache; -}; - - - -/** - * Intel framebuffer. - */ -struct intel_framebuffer -{ - struct st_framebuffer *stfb; - - /* other fields TBD */ - int other; -}; - - - - -/* These are functions now: - */ -void LOCK_HARDWARE( struct intel_context *intel ); -void UNLOCK_HARDWARE( struct intel_context *intel ); - -extern char *__progname; - - - -/* ================================================================ - * Debugging: - */ -#ifdef DEBUG -extern int __intel_debug; - -#define DEBUG_SWAP 0x1 -#define DEBUG_LOCK 0x2 -#define DEBUG_IOCTL 0x4 -#define DEBUG_BATCH 0x8 - -#define DBG(flag, ...) do { \ - if (__intel_debug & (DEBUG_##flag)) \ - printf(__VA_ARGS__); \ -} while(0) - -#else -#define DBG(flag, ...) -#endif - - - -#define PCI_CHIP_845_G 0x2562 -#define PCI_CHIP_I830_M 0x3577 -#define PCI_CHIP_I855_GM 0x3582 -#define PCI_CHIP_I865_G 0x2572 -#define PCI_CHIP_I915_G 0x2582 -#define PCI_CHIP_I915_GM 0x2592 -#define PCI_CHIP_I945_G 0x2772 -#define PCI_CHIP_I945_GM 0x27A2 -#define PCI_CHIP_I945_GME 0x27AE -#define PCI_CHIP_G33_G 0x29C2 -#define PCI_CHIP_Q35_G 0x29B2 -#define PCI_CHIP_Q33_G 0x29D2 - - -/** Cast wrapper */ -static INLINE struct intel_context * -intel_context(__DRIcontextPrivate *driContextPriv) -{ - return (struct intel_context *) driContextPriv->driverPrivate; -} - - -/** Cast wrapper */ -static INLINE struct intel_framebuffer * -intel_framebuffer(__DRIdrawablePrivate * driDrawPriv) -{ - return (struct intel_framebuffer *) driDrawPriv->driverPrivate; -} - - -#endif diff --git a/src/gallium/winsys/drm/intel/dri/intel_screen.c b/src/gallium/winsys/drm/intel/dri/intel_screen.c deleted file mode 100644 index 3a486481f5..0000000000 --- a/src/gallium/winsys/drm/intel/dri/intel_screen.c +++ /dev/null @@ -1,607 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "utils.h" -#include "vblank.h" -#include "xmlpool.h" - -#include "intel_context.h" -#include "intel_screen.h" -#include "intel_batchbuffer.h" -#include "intel_swapbuffers.h" - -#include "i830_dri.h" -#include "ws_dri_bufpool.h" - -#include "pipe/p_context.h" -#include "pipe/p_screen.h" -#include "pipe/p_inlines.h" -#include "state_tracker/st_public.h" -#include "state_tracker/st_cb_fbo.h" - -static void -intelCreateSurface(struct intel_screen *intelScreen, struct pipe_winsys *winsys, unsigned handle); - -static void -intelCreateSurface(struct intel_screen *intelScreen, struct pipe_winsys *winsys, unsigned handle) -{ - struct pipe_screen *screen = intelScreen->base.screen; - struct pipe_texture *texture; - struct pipe_texture templat; - struct pipe_surface *surface; - struct pipe_buffer *buffer; - unsigned pitch; - - assert(intelScreen->front.cpp == 4); - - buffer = intel_be_buffer_from_handle(&intelScreen->base, - "front", handle); - - if (!buffer) - return; - - intelScreen->front.buffer = dri_bo(buffer); - - memset(&templat, 0, sizeof(templat)); - templat.tex_usage |= PIPE_TEXTURE_USAGE_DISPLAY_TARGET; - templat.target = PIPE_TEXTURE_2D; - templat.last_level = 0; - templat.depth[0] = 1; - templat.format = PIPE_FORMAT_A8R8G8B8_UNORM; - templat.width[0] = intelScreen->front.width; - templat.height[0] = intelScreen->front.height; - pf_get_block(templat.format, &templat.block); - pitch = intelScreen->front.pitch; - - texture = screen->texture_blanket(screen, - &templat, - &pitch, - buffer); - - /* Unref the buffer we don't need it anyways */ - pipe_buffer_reference(screen, &buffer, NULL); - - surface = screen->get_tex_surface(screen, - texture, - 0, - 0, - 0, - PIPE_BUFFER_USAGE_GPU_WRITE); - - intelScreen->front.texture = texture; - intelScreen->front.surface = surface; -} - -PUBLIC const char __driConfigOptions[] = - DRI_CONF_BEGIN DRI_CONF_SECTION_PERFORMANCE - DRI_CONF_FTHROTTLE_MODE(DRI_CONF_FTHROTTLE_IRQS) - DRI_CONF_VBLANK_MODE(DRI_CONF_VBLANK_DEF_INTERVAL_0) - DRI_CONF_SECTION_END DRI_CONF_SECTION_QUALITY -// DRI_CONF_FORCE_S3TC_ENABLE(false) - DRI_CONF_ALLOW_LARGE_TEXTURES(1) - DRI_CONF_SECTION_END DRI_CONF_END; - -const uint __driNConfigOptions = 3; - -#ifdef USE_NEW_INTERFACE -static PFNGLXCREATECONTEXTMODES create_context_modes = NULL; -#endif /*USE_NEW_INTERFACE */ - -extern const struct dri_extension card_extensions[]; - - - - -static void -intelPrintDRIInfo(struct intel_screen * intelScreen, - __DRIscreenPrivate * sPriv, I830DRIPtr gDRIPriv) -{ - fprintf(stderr, "*** Front size: 0x%x offset: 0x%x pitch: %d\n", - intelScreen->front.size, intelScreen->front.offset, - intelScreen->front.pitch); - fprintf(stderr, "*** Memory : 0x%x\n", gDRIPriv->mem); -} - - -#if 0 -static void -intelPrintSAREA(const drmI830Sarea * sarea) -{ - fprintf(stderr, "SAREA: sarea width %d height %d\n", sarea->width, - sarea->height); - fprintf(stderr, "SAREA: pitch: %d\n", sarea->pitch); - fprintf(stderr, - "SAREA: front offset: 0x%08x size: 0x%x handle: 0x%x\n", - sarea->front_offset, sarea->front_size, - (unsigned) sarea->front_handle); - fprintf(stderr, - "SAREA: back offset: 0x%08x size: 0x%x handle: 0x%x\n", - sarea->back_offset, sarea->back_size, - (unsigned) sarea->back_handle); - fprintf(stderr, "SAREA: depth offset: 0x%08x size: 0x%x handle: 0x%x\n", - sarea->depth_offset, sarea->depth_size, - (unsigned) sarea->depth_handle); - fprintf(stderr, "SAREA: tex offset: 0x%08x size: 0x%x handle: 0x%x\n", - sarea->tex_offset, sarea->tex_size, (unsigned) sarea->tex_handle); - fprintf(stderr, "SAREA: rotation: %d\n", sarea->rotation); - fprintf(stderr, - "SAREA: rotated offset: 0x%08x size: 0x%x\n", - sarea->rotated_offset, sarea->rotated_size); - fprintf(stderr, "SAREA: rotated pitch: %d\n", sarea->rotated_pitch); -} -#endif - - -/** - * Use the information in the sarea to update the screen parameters - * related to screen rotation. Needs to be called locked. - */ -void -intelUpdateScreenRotation(__DRIscreenPrivate * sPriv, drmI830Sarea * sarea) -{ - struct intel_screen *intelScreen = intel_screen(sPriv); - - if (intelScreen->front.map) { - drmUnmap(intelScreen->front.map, intelScreen->front.size); - intelScreen->front.map = NULL; - } - - if (intelScreen->front.buffer) - driDeleteBuffers(1, &intelScreen->front.buffer); - - intelScreen->front.width = sarea->width; - intelScreen->front.height = sarea->height; - intelScreen->front.offset = sarea->front_offset; - intelScreen->front.pitch = sarea->pitch * intelScreen->front.cpp; - intelScreen->front.size = sarea->front_size; - intelScreen->front.handle = sarea->front_handle; - - assert( sarea->front_size >= - intelScreen->front.pitch * intelScreen->front.height ); - -#if 0 /* JB not important */ - if (!sarea->front_handle) - return; - - if (drmMap(sPriv->fd, - sarea->front_handle, - intelScreen->front.size, - (drmAddress *) & intelScreen->front.map) != 0) { - fprintf(stderr, "drmMap(frontbuffer) failed!\n"); - return; - } -#endif - -#if 0 /* JB */ - if (intelScreen->staticPool) { - driGenBuffers(intelScreen->staticPool, "static region", 1, - &intelScreen->front.buffer, 64, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_NO_MOVE | - DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, 0); - - driBOSetStatic(intelScreen->front.buffer, - intelScreen->front.offset, - intelScreen->front.pitch * intelScreen->front.height, - intelScreen->front.map, 0); - } -#else - if (intelScreen->base.staticPool) { - if (intelScreen->front.buffer) { - driBOUnReference(intelScreen->front.buffer); - pipe_surface_reference(&intelScreen->front.surface, NULL); - pipe_texture_reference(&intelScreen->front.texture, NULL); - } - intelCreateSurface(intelScreen, &intelScreen->base.base, sarea->front_bo_handle); - } -#endif -} - - -boolean -intelCreatePools(__DRIscreenPrivate * sPriv) -{ - //unsigned batchPoolSize = 1024*1024; - struct intel_screen *intelScreen = intel_screen(sPriv); - - if (intelScreen->havePools) - return GL_TRUE; - - intelScreen->havePools = GL_TRUE; - - intelUpdateScreenRotation(sPriv, intelScreen->sarea); - - return GL_TRUE; -} - -static const char * -intel_get_name( struct pipe_winsys *winsys ) -{ - return "Intel/DRI/ttm"; -} - -/* - * The state tracker (should!) keep track of whether the fake - * frontbuffer has been touched by any rendering since the last time - * we copied its contents to the real frontbuffer. Our task is easy: - */ -static void -intel_flush_frontbuffer( struct pipe_winsys *winsys, - struct pipe_surface *surf, - void *context_private) -{ - struct intel_context *intel = (struct intel_context *) context_private; - __DRIdrawablePrivate *dPriv = intel->driDrawable; - - intelDisplaySurface(dPriv, surf, NULL); -} - -static boolean -intelInitDriver(__DRIscreenPrivate * sPriv) -{ - struct intel_screen *intelScreen; - I830DRIPtr gDRIPriv = (I830DRIPtr) sPriv->pDevPriv; - - PFNGLXSCRENABLEEXTENSIONPROC glx_enable_extension = - (PFNGLXSCRENABLEEXTENSIONPROC) (*dri_interface-> - getProcAddress("glxEnableExtension")); - void *const psc = sPriv->psc->screenConfigs; - - if (sPriv->devPrivSize != sizeof(I830DRIRec)) { - fprintf(stderr, - "\nERROR! sizeof(I830DRIRec) does not match passed size from device driver\n"); - return GL_FALSE; - } - - /* Allocate the private area */ - intelScreen = CALLOC_STRUCT(intel_screen); - if (!intelScreen) - return GL_FALSE; - - /* parse information in __driConfigOptions */ - driParseOptionInfo(&intelScreen->optionCache, - __driConfigOptions, __driNConfigOptions); - - sPriv->private = (void *) intelScreen; - - intelScreen->sarea = (drmI830Sarea *) (((GLubyte *) sPriv->pSAREA) + - gDRIPriv->sarea_priv_offset); - intelScreen->deviceID = gDRIPriv->deviceID; - intelScreen->front.cpp = gDRIPriv->cpp; - intelScreen->drmMinor = sPriv->drmMinor; - - assert(gDRIPriv->bitsPerPixel == 16 || - gDRIPriv->bitsPerPixel == 32); - - intelUpdateScreenRotation(sPriv, intelScreen->sarea); - - if (0) - intelPrintDRIInfo(intelScreen, sPriv, gDRIPriv); - - if (glx_enable_extension != NULL) { - (*glx_enable_extension) (psc, "GLX_SGI_swap_control"); - (*glx_enable_extension) (psc, "GLX_SGI_video_sync"); - (*glx_enable_extension) (psc, "GLX_MESA_swap_control"); - (*glx_enable_extension) (psc, "GLX_MESA_swap_frame_usage"); - (*glx_enable_extension) (psc, "GLX_SGI_make_current_read"); - } - - intelScreen->base.base.flush_frontbuffer = intel_flush_frontbuffer; - intelScreen->base.base.get_name = intel_get_name; - intel_be_init_device(&intelScreen->base, sPriv->fd, intelScreen->deviceID); - - return GL_TRUE; -} - - -static void -intelDestroyScreen(__DRIscreenPrivate * sPriv) -{ - struct intel_screen *intelScreen = intel_screen(sPriv); - - intel_be_destroy_device(&intelScreen->base); - /* intelUnmapScreenRegions(intelScreen); */ - - FREE(intelScreen); - sPriv->private = NULL; -} - - -/** - * This is called when we need to set up GL rendering to a new X window. - */ -static boolean -intelCreateBuffer(__DRIscreenPrivate * driScrnPriv, - __DRIdrawablePrivate * driDrawPriv, - const __GLcontextModes * visual, boolean isPixmap) -{ - if (isPixmap) { - return GL_FALSE; /* not implemented */ - } - else { - enum pipe_format colorFormat, depthFormat, stencilFormat; - struct intel_framebuffer *intelfb = CALLOC_STRUCT(intel_framebuffer); - - if (!intelfb) - return GL_FALSE; - - if (visual->redBits == 5) - colorFormat = PIPE_FORMAT_R5G6B5_UNORM; - else - colorFormat = PIPE_FORMAT_A8R8G8B8_UNORM; - - if (visual->depthBits == 16) - depthFormat = PIPE_FORMAT_Z16_UNORM; - else if (visual->depthBits == 24) - depthFormat = PIPE_FORMAT_S8Z24_UNORM; - else - depthFormat = PIPE_FORMAT_NONE; - - if (visual->stencilBits == 8) - stencilFormat = PIPE_FORMAT_S8Z24_UNORM; - else - stencilFormat = PIPE_FORMAT_NONE; - - intelfb->stfb = st_create_framebuffer(visual, - colorFormat, - depthFormat, - stencilFormat, - driDrawPriv->w, - driDrawPriv->h, - (void*) intelfb); - if (!intelfb->stfb) { - free(intelfb); - return GL_FALSE; - } - - driDrawPriv->driverPrivate = (void *) intelfb; - return GL_TRUE; - } -} - -static void -intelDestroyBuffer(__DRIdrawablePrivate * driDrawPriv) -{ - struct intel_framebuffer *intelfb = intel_framebuffer(driDrawPriv); - assert(intelfb->stfb); - st_unreference_framebuffer(&intelfb->stfb); - free(intelfb); -} - - -/** - * Get information about previous buffer swaps. - */ -static int -intelGetSwapInfo(__DRIdrawablePrivate * dPriv, __DRIswapInfo * sInfo) -{ - if ((dPriv == NULL) || (dPriv->driverPrivate == NULL) - || (sInfo == NULL)) { - return -1; - } - - return 0; -} - - -static void -intelSetTexOffset(__DRIcontext *pDRICtx, int texname, - unsigned long long offset, int depth, uint pitch) -{ - abort(); -#if 0 - struct intel_context *intel = (struct intel_context*) - ((__DRIcontextPrivate*)pDRICtx->private)->driverPrivate; - struct gl_texture_object *tObj = _mesa_lookup_texture(&intel->ctx, texname); - struct st_texture_object *stObj = st_texture_object(tObj); - - if (!stObj) - return; - - if (stObj->pt) - st->pipe->texture_release(intel->st->pipe, &stObj->pt); - - stObj->imageOverride = GL_TRUE; - stObj->depthOverride = depth; - stObj->pitchOverride = pitch; - - if (offset) - stObj->textureOffset = offset; -#endif -} - - -static const struct __DriverAPIRec intelAPI = { - .InitDriver = intelInitDriver, - .DestroyScreen = intelDestroyScreen, - .CreateContext = intelCreateContext, - .DestroyContext = intelDestroyContext, - .CreateBuffer = intelCreateBuffer, - .DestroyBuffer = intelDestroyBuffer, - .SwapBuffers = intelSwapBuffers, - .MakeCurrent = intelMakeCurrent, - .UnbindContext = intelUnbindContext, - .GetSwapInfo = intelGetSwapInfo, - .GetMSC = driGetMSC32, - .WaitForMSC = driWaitForMSC32, - .WaitForSBC = NULL, - .SwapBuffersMSC = NULL, - .CopySubBuffer = intelCopySubBuffer, - .setTexOffset = intelSetTexOffset, -}; - - -static __GLcontextModes * -intelFillInModes(unsigned pixel_bits, unsigned depth_bits, - unsigned stencil_bits, boolean have_back_buffer) -{ - __GLcontextModes *modes; - __GLcontextModes *m; - unsigned num_modes; - unsigned depth_buffer_factor; - unsigned back_buffer_factor; - GLenum fb_format; - GLenum fb_type; - - /* GLX_SWAP_COPY_OML is only supported because the Intel driver doesn't - * support pageflipping at all. - */ - static const GLenum back_buffer_modes[] = { - GLX_NONE, GLX_SWAP_UNDEFINED_OML, GLX_SWAP_COPY_OML - }; - - uint8_t depth_bits_array[3]; - uint8_t stencil_bits_array[3]; - uint8_t msaa_samples_array[1]; - - - depth_bits_array[0] = 0; - depth_bits_array[1] = depth_bits; - depth_bits_array[2] = depth_bits; - msaa_samples_array[0] = 0; - - /* Just like with the accumulation buffer, always provide some modes - * with a stencil buffer. It will be a sw fallback, but some apps won't - * care about that. - */ - stencil_bits_array[0] = 0; - stencil_bits_array[1] = 0; - if (depth_bits == 24) - stencil_bits_array[1] = (stencil_bits == 0) ? 8 : stencil_bits; - - stencil_bits_array[2] = (stencil_bits == 0) ? 8 : stencil_bits; - - depth_buffer_factor = ((depth_bits != 0) || (stencil_bits != 0)) ? 3 : 1; - back_buffer_factor = (have_back_buffer) ? 3 : 1; - - num_modes = depth_buffer_factor * back_buffer_factor * 4; - - if (pixel_bits == 16) { - fb_format = GL_RGB; - fb_type = GL_UNSIGNED_SHORT_5_6_5; - } - else { - fb_format = GL_BGRA; - fb_type = GL_UNSIGNED_INT_8_8_8_8_REV; - } - - modes = - (*dri_interface->createContextModes) (num_modes, - sizeof(__GLcontextModes)); - m = modes; - if (!driFillInModes(&m, fb_format, fb_type, - depth_bits_array, stencil_bits_array, - depth_buffer_factor, back_buffer_modes, - back_buffer_factor, msaa_samples_array, 1, GLX_TRUE_COLOR)) { - fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__, - __LINE__); - return NULL; - } - if (!driFillInModes(&m, fb_format, fb_type, - depth_bits_array, stencil_bits_array, - depth_buffer_factor, back_buffer_modes, - back_buffer_factor, msaa_samples_array, 1, GLX_DIRECT_COLOR)) { - fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__, - __LINE__); - return NULL; - } - - /* Mark the visual as slow if there are "fake" stencil bits. - */ - for (m = modes; m != NULL; m = m->next) { - if ((m->stencilBits != 0) && (m->stencilBits != stencil_bits)) { - m->visualRating = GLX_SLOW_CONFIG; - } - } - - return modes; -} - - -/** - * This is the bootstrap function for the driver. libGL supplies all of the - * requisite information about the system, and the driver initializes itself. - * This routine also fills in the linked list pointed to by \c driver_modes - * with the \c __GLcontextModes that the driver can support for windows or - * pbuffers. - * - * \return A pointer to a \c __DRIscreenPrivate on success, or \c NULL on - * failure. - */ -PUBLIC void * -__driCreateNewScreen_20050727(__DRInativeDisplay * dpy, int scrn, - __DRIscreen * psc, - const __GLcontextModes * modes, - const __DRIversion * ddx_version, - const __DRIversion * dri_version, - const __DRIversion * drm_version, - const __DRIframebuffer * frame_buffer, - drmAddress pSAREA, int fd, - int internal_api_version, - const __DRIinterfaceMethods * interface, - __GLcontextModes ** driver_modes) -{ - __DRIscreenPrivate *psp; - static const __DRIversion ddx_expected = { 1, 7, 0 }; - static const __DRIversion dri_expected = { 4, 0, 0 }; - static const __DRIversion drm_expected = { 1, 7, 0 }; - - dri_interface = interface; - - if (!driCheckDriDdxDrmVersions2("i915", - dri_version, &dri_expected, - ddx_version, &ddx_expected, - drm_version, &drm_expected)) { - return NULL; - } - - psp = __driUtilCreateNewScreen(dpy, scrn, psc, NULL, - ddx_version, dri_version, drm_version, - frame_buffer, pSAREA, fd, - internal_api_version, &intelAPI); - - if (psp != NULL) { - I830DRIPtr dri_priv = (I830DRIPtr) psp->pDevPriv; - *driver_modes = intelFillInModes(dri_priv->cpp * 8, - (dri_priv->cpp == 2) ? 16 : 24, - (dri_priv->cpp == 2) ? 0 : 8, 1); - - /* Calling driInitExtensions here, with a NULL context pointer, - * does not actually enable the extensions. It just makes sure - * that all the dispatch offsets for all the extensions that - * *might* be enables are known. This is needed because the - * dispatch offsets need to be known when _mesa_context_create - * is called, but we can't enable the extensions until we have a - * context pointer. - * - * Hello chicken. Hello egg. How are you two today? - */ - driInitExtensions(NULL, card_extensions, GL_FALSE); - } - - return (void *) psp; -} - diff --git a/src/gallium/winsys/drm/intel/dri/intel_swapbuffers.c b/src/gallium/winsys/drm/intel/dri/intel_swapbuffers.c deleted file mode 100644 index 34ad7eebe1..0000000000 --- a/src/gallium/winsys/drm/intel/dri/intel_swapbuffers.c +++ /dev/null @@ -1,260 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "intel_screen.h" -#include "intel_context.h" -#include "intel_swapbuffers.h" - -#include "intel_reg.h" - -#include "pipe/p_context.h" -#include "state_tracker/st_public.h" -#include "state_tracker/st_context.h" -#include "state_tracker/st_cb_fbo.h" - -#include "ws_dri_bufmgr.h" -#include "intel_batchbuffer.h" - -/** - * Display a colorbuffer surface in an X window. - * Used for SwapBuffers and flushing front buffer rendering. - * - * \param dPriv the window/drawable to display into - * \param surf the surface to display - * \param rect optional subrect of surface to display (may be NULL). - */ -void -intelDisplaySurface(__DRIdrawablePrivate *dPriv, - struct pipe_surface *surf, - const drm_clip_rect_t *rect) -{ - struct intel_screen *intelScreen = intel_screen(dPriv->driScreenPriv); - struct intel_context *intel = intelScreen->dummyContext; - - DBG(SWAP, "%s\n", __FUNCTION__); - - if (!intel) { - /* XXX this is where some kind of extra/meta context could be useful */ - return; - } - - if (intel->last_swap_fence) { - driFenceFinish(intel->last_swap_fence, DRM_FENCE_TYPE_EXE, TRUE); - driFenceUnReference(&intel->last_swap_fence); - intel->last_swap_fence = NULL; - } - intel->last_swap_fence = intel->first_swap_fence; - intel->first_swap_fence = NULL; - - /* The LOCK_HARDWARE is required for the cliprects. Buffer offsets - * should work regardless. - */ - LOCK_HARDWARE(intel); - /* if this drawable isn't currently bound the LOCK_HARDWARE done on the - * current context (which is what intelScreenContext should return) might - * not get a contended lock and thus cliprects not updated (tests/manywin) - */ - if (intel_context(dPriv->driContextPriv) != intel) - DRI_VALIDATE_DRAWABLE_INFO(intel->driScreen, dPriv); - - - if (dPriv && dPriv->numClipRects) { - const int srcWidth = surf->width; - const int srcHeight = surf->height; - const int nbox = dPriv->numClipRects; - const drm_clip_rect_t *pbox = dPriv->pClipRects; - const int pitch = intelScreen->front.pitch / intelScreen->front.cpp; - const int cpp = intelScreen->front.cpp; - const int srcpitch = surf->stride / cpp; - int BR13, CMD; - int i; - - ASSERT(surf->buffer); - - DBG(SWAP, "screen pitch %d src surface pitch %d\n", - pitch, surf->stride); - - if (cpp == 2) { - BR13 = (pitch * cpp) | (0xCC << 16) | (1 << 24); - CMD = XY_SRC_COPY_BLT_CMD; - } - else { - BR13 = (pitch * cpp) | (0xCC << 16) | (1 << 24) | (1 << 25); - CMD = (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA | - XY_SRC_COPY_BLT_WRITE_RGB); - } - - for (i = 0; i < nbox; i++, pbox++) { - drm_clip_rect_t box; - drm_clip_rect_t sbox; - - if (pbox->x1 > pbox->x2 || - pbox->y1 > pbox->y2 || - pbox->x2 > intelScreen->front.width || - pbox->y2 > intelScreen->front.height) { - /* invalid cliprect, skip it */ - continue; - } - - box = *pbox; - - if (rect) { - /* intersect cliprect with user-provided src rect */ - drm_clip_rect_t rrect; - - rrect.x1 = dPriv->x + rect->x1; - rrect.y1 = (dPriv->h - rect->y1 - rect->y2) + dPriv->y; - rrect.x2 = rect->x2 + rrect.x1; - rrect.y2 = rect->y2 + rrect.y1; - if (rrect.x1 > box.x1) - box.x1 = rrect.x1; - if (rrect.y1 > box.y1) - box.y1 = rrect.y1; - if (rrect.x2 < box.x2) - box.x2 = rrect.x2; - if (rrect.y2 < box.y2) - box.y2 = rrect.y2; - - if (box.x1 > box.x2 || box.y1 > box.y2) - continue; - } - - /* restrict blit to size of actually rendered area */ - if (box.x2 - box.x1 > srcWidth) - box.x2 = srcWidth + box.x1; - if (box.y2 - box.y1 > srcHeight) - box.y2 = srcHeight + box.y1; - - DBG(SWAP, "box x1 x2 y1 y2 %d %d %d %d\n", - box.x1, box.x2, box.y1, box.y2); - - sbox.x1 = box.x1 - dPriv->x; - sbox.y1 = box.y1 - dPriv->y; - - assert(box.x1 < box.x2); - assert(box.y1 < box.y2); - - /* XXX this could be done with pipe->surface_copy() */ - /* XXX should have its own batch buffer */ - if (!BEGIN_BATCH(8, 2)) { - /* - * Since we share this batch buffer with a context - * we can't flush it since that risks a GPU lockup - */ - assert(0); - continue; - } - - OUT_BATCH(CMD); - OUT_BATCH(BR13); - OUT_BATCH((box.y1 << 16) | box.x1); - OUT_BATCH((box.y2 << 16) | box.x2); - - OUT_RELOC(intelScreen->front.buffer, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, - DRM_BO_MASK_MEM | DRM_BO_FLAG_WRITE, 0); - OUT_BATCH((sbox.y1 << 16) | sbox.x1); - OUT_BATCH((srcpitch * cpp) & 0xffff); - OUT_RELOC(dri_bo(surf->buffer), - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, - DRM_BO_MASK_MEM | DRM_BO_FLAG_READ, 0); - - } - - if (intel->first_swap_fence) - driFenceUnReference(&intel->first_swap_fence); - intel->first_swap_fence = intel_be_batchbuffer_flush(intel->base.batch); - } - - UNLOCK_HARDWARE(intel); - - if (intel->lastStamp != dPriv->lastStamp) { - intelUpdateWindowSize(dPriv); - intel->lastStamp = dPriv->lastStamp; - } -} - - - -/** - * This will be called whenever the currently bound window is moved/resized. - */ -void -intelUpdateWindowSize(__DRIdrawablePrivate *dPriv) -{ - struct intel_framebuffer *intelfb = intel_framebuffer(dPriv); - assert(intelfb->stfb); - st_resize_framebuffer(intelfb->stfb, dPriv->w, dPriv->h); -} - - - -void -intelSwapBuffers(__DRIdrawablePrivate * dPriv) -{ - struct intel_framebuffer *intel_fb = intel_framebuffer(dPriv); - struct pipe_surface *back_surf; - - assert(intel_fb); - assert(intel_fb->stfb); - - back_surf = st_get_framebuffer_surface(intel_fb->stfb, - ST_SURFACE_BACK_LEFT); - if (back_surf) { - st_notify_swapbuffers(intel_fb->stfb); - intelDisplaySurface(dPriv, back_surf, NULL); - st_notify_swapbuffers_complete(intel_fb->stfb); - } -} - - -/** - * Called via glXCopySubBufferMESA() to copy a subrect of the back - * buffer to the front buffer/screen. - */ -void -intelCopySubBuffer(__DRIdrawablePrivate * dPriv, int x, int y, int w, int h) -{ - struct intel_framebuffer *intel_fb = intel_framebuffer(dPriv); - struct pipe_surface *back_surf; - - assert(intel_fb); - assert(intel_fb->stfb); - - back_surf = st_get_framebuffer_surface(intel_fb->stfb, - ST_SURFACE_BACK_LEFT); - if (back_surf) { - drm_clip_rect_t rect; - rect.x1 = x; - rect.y1 = y; - rect.x2 = w; - rect.y2 = h; - - st_notify_swapbuffers(intel_fb->stfb); - intelDisplaySurface(dPriv, back_surf, &rect); - } -} diff --git a/src/gallium/winsys/drm/intel/dri/server/i830_common.h b/src/gallium/winsys/drm/intel/dri/server/i830_common.h deleted file mode 100644 index 3452ddb3c9..0000000000 --- a/src/gallium/winsys/drm/intel/dri/server/i830_common.h +++ /dev/null @@ -1,255 +0,0 @@ -/************************************************************************** - -Copyright 2001 VA Linux Systems Inc., Fremont, California. -Copyright 2002 Tungsten Graphics Inc., Cedar Park, Texas. - -All Rights Reserved. - -Permission is hereby granted, free of charge, to any person obtaining a -copy of this software and associated documentation files (the "Software"), -to deal in the Software without restriction, including without limitation -on the rights to use, copy, modify, merge, publish, distribute, sub -license, and/or sell copies of the Software, and to permit persons to whom -the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice (including the next -paragraph) shall be included in all copies or substantial portions of the -Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL -ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, -DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR -OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE -USE OR OTHER DEALINGS IN THE SOFTWARE. - -**************************************************************************/ - - -#ifndef _I830_COMMON_H_ -#define _I830_COMMON_H_ - - -#define I830_NR_TEX_REGIONS 255 /* maximum due to use of chars for next/prev */ -#define I830_LOG_MIN_TEX_REGION_SIZE 14 - - -/* Driver specific DRM command indices - * NOTE: these are not OS specific, but they are driver specific - */ -#define DRM_I830_INIT 0x00 -#define DRM_I830_FLUSH 0x01 -#define DRM_I830_FLIP 0x02 -#define DRM_I830_BATCHBUFFER 0x03 -#define DRM_I830_IRQ_EMIT 0x04 -#define DRM_I830_IRQ_WAIT 0x05 -#define DRM_I830_GETPARAM 0x06 -#define DRM_I830_SETPARAM 0x07 -#define DRM_I830_ALLOC 0x08 -#define DRM_I830_FREE 0x09 -#define DRM_I830_INIT_HEAP 0x0a -#define DRM_I830_CMDBUFFER 0x0b -#define DRM_I830_DESTROY_HEAP 0x0c -#define DRM_I830_SET_VBLANK_PIPE 0x0d -#define DRM_I830_GET_VBLANK_PIPE 0x0e -#define DRM_I830_MMIO 0x10 - -typedef struct { - enum { - I830_INIT_DMA = 0x01, - I830_CLEANUP_DMA = 0x02, - I830_RESUME_DMA = 0x03 - } func; - unsigned int mmio_offset; - int sarea_priv_offset; - unsigned int ring_start; - unsigned int ring_end; - unsigned int ring_size; - unsigned int front_offset; - unsigned int back_offset; - unsigned int depth_offset; - unsigned int w; - unsigned int h; - unsigned int pitch; - unsigned int pitch_bits; - unsigned int back_pitch; - unsigned int depth_pitch; - unsigned int cpp; - unsigned int chipset; -} drmI830Init; - -typedef struct { - drmTextureRegion texList[I830_NR_TEX_REGIONS+1]; - int last_upload; /* last time texture was uploaded */ - int last_enqueue; /* last time a buffer was enqueued */ - int last_dispatch; /* age of the most recently dispatched buffer */ - int ctxOwner; /* last context to upload state */ - /** Last context that used the buffer manager. */ - int texAge; - int pf_enabled; /* is pageflipping allowed? */ - int pf_active; - int pf_current_page; /* which buffer is being displayed? */ - int perf_boxes; /* performance boxes to be displayed */ - int width, height; /* screen size in pixels */ - - drm_handle_t front_handle; - int front_offset; - int front_size; - - drm_handle_t back_handle; - int back_offset; - int back_size; - - drm_handle_t depth_handle; - int depth_offset; - int depth_size; - - drm_handle_t tex_handle; - int tex_offset; - int tex_size; - int log_tex_granularity; - int pitch; - int rotation; /* 0, 90, 180 or 270 */ - int rotated_offset; - int rotated_size; - int rotated_pitch; - int virtualX, virtualY; - - unsigned int front_tiled; - unsigned int back_tiled; - unsigned int depth_tiled; - unsigned int rotated_tiled; - unsigned int rotated2_tiled; - - int planeA_x; - int planeA_y; - int planeA_w; - int planeA_h; - int planeB_x; - int planeB_y; - int planeB_w; - int planeB_h; - - /* Triple buffering */ - drm_handle_t third_handle; - int third_offset; - int third_size; - unsigned int third_tiled; - - /* buffer object handles for the static buffers. May change - * over the lifetime of the client, though it doesn't in our current - * implementation. - */ - unsigned int front_bo_handle; - unsigned int back_bo_handle; - unsigned int third_bo_handle; - unsigned int depth_bo_handle; -} drmI830Sarea; - -/* Flags for perf_boxes - */ -#define I830_BOX_RING_EMPTY 0x1 /* populated by kernel */ -#define I830_BOX_FLIP 0x2 /* populated by kernel */ -#define I830_BOX_WAIT 0x4 /* populated by kernel & client */ -#define I830_BOX_TEXTURE_LOAD 0x8 /* populated by kernel */ -#define I830_BOX_LOST_CONTEXT 0x10 /* populated by client */ - - -typedef struct { - int start; /* agp offset */ - int used; /* nr bytes in use */ - int DR1; /* hw flags for GFX_OP_DRAWRECT_INFO */ - int DR4; /* window origin for GFX_OP_DRAWRECT_INFO*/ - int num_cliprects; /* mulitpass with multiple cliprects? */ - drm_clip_rect_t *cliprects; /* pointer to userspace cliprects */ -} drmI830BatchBuffer; - -typedef struct { - char *buf; /* agp offset */ - int sz; /* nr bytes in use */ - int DR1; /* hw flags for GFX_OP_DRAWRECT_INFO */ - int DR4; /* window origin for GFX_OP_DRAWRECT_INFO*/ - int num_cliprects; /* mulitpass with multiple cliprects? */ - drm_clip_rect_t *cliprects; /* pointer to userspace cliprects */ -} drmI830CmdBuffer; - -typedef struct { - int *irq_seq; -} drmI830IrqEmit; - -typedef struct { - int irq_seq; -} drmI830IrqWait; - -typedef struct { - int param; - int *value; -} drmI830GetParam; - -#define I830_PARAM_IRQ_ACTIVE 1 -#define I830_PARAM_ALLOW_BATCHBUFFER 2 - -typedef struct { - int param; - int value; -} drmI830SetParam; - -#define I830_SETPARAM_USE_MI_BATCHBUFFER_START 1 -#define I830_SETPARAM_TEX_LRU_LOG_GRANULARITY 2 -#define I830_SETPARAM_ALLOW_BATCHBUFFER 3 - - -/* A memory manager for regions of shared memory: - */ -#define I830_MEM_REGION_AGP 1 - -typedef struct { - int region; - int alignment; - int size; - int *region_offset; /* offset from start of fb or agp */ -} drmI830MemAlloc; - -typedef struct { - int region; - int region_offset; -} drmI830MemFree; - -typedef struct { - int region; - int size; - int start; -} drmI830MemInitHeap; - -typedef struct { - int region; -} drmI830MemDestroyHeap; - -#define DRM_I830_VBLANK_PIPE_A 1 -#define DRM_I830_VBLANK_PIPE_B 2 - -typedef struct { - int pipe; -} drmI830VBlankPipe; - -#define MMIO_READ 0 -#define MMIO_WRITE 1 - -#define MMIO_REGS_IA_PRIMATIVES_COUNT 0 -#define MMIO_REGS_IA_VERTICES_COUNT 1 -#define MMIO_REGS_VS_INVOCATION_COUNT 2 -#define MMIO_REGS_GS_PRIMITIVES_COUNT 3 -#define MMIO_REGS_GS_INVOCATION_COUNT 4 -#define MMIO_REGS_CL_PRIMITIVES_COUNT 5 -#define MMIO_REGS_CL_INVOCATION_COUNT 6 -#define MMIO_REGS_PS_INVOCATION_COUNT 7 -#define MMIO_REGS_PS_DEPTH_COUNT 8 - -typedef struct { - unsigned int read_write:1; - unsigned int reg:31; - void __user *data; -} drmI830MMIO; - -#endif /* _I830_DRM_H_ */ diff --git a/src/gallium/winsys/drm/intel/dri/server/i830_dri.h b/src/gallium/winsys/drm/intel/dri/server/i830_dri.h deleted file mode 100644 index 0d514b6c38..0000000000 --- a/src/gallium/winsys/drm/intel/dri/server/i830_dri.h +++ /dev/null @@ -1,62 +0,0 @@ - -#ifndef _I830_DRI_H -#define _I830_DRI_H - -#include "xf86drm.h" -#include "i830_common.h" - -#define I830_MAX_DRAWABLES 256 - -#define I830_MAJOR_VERSION 1 -#define I830_MINOR_VERSION 7 -#define I830_PATCHLEVEL 2 - -#define I830_REG_SIZE 0x80000 - -typedef struct _I830DRIRec { - drm_handle_t regs; - drmSize regsSize; - - drmSize unused1; /* backbufferSize */ - drm_handle_t unused2; /* backbuffer */ - - drmSize unused3; /* depthbufferSize */ - drm_handle_t unused4; /* depthbuffer */ - - drmSize unused5; /* rotatedSize */ - drm_handle_t unused6; /* rotatedbuffer */ - - drm_handle_t unused7; /* textures */ - int unused8; /* textureSize */ - - drm_handle_t unused9; /* agp_buffers */ - drmSize unused10; /* agp_buf_size */ - - int deviceID; - int width; - int height; - int mem; - int cpp; - int bitsPerPixel; - - int unused11[8]; /* was front/back/depth/rotated offset/pitch */ - - int unused12; /* logTextureGranularity */ - int unused13; /* textureOffset */ - - int irq; - int sarea_priv_offset; -} I830DRIRec, *I830DRIPtr; - -typedef struct { - /* Nothing here yet */ - int dummy; -} I830ConfigPrivRec, *I830ConfigPrivPtr; - -typedef struct { - /* Nothing here yet */ - int dummy; -} I830DRIContextRec, *I830DRIContextPtr; - - -#endif diff --git a/src/gallium/winsys/drm/intel/egl/Makefile b/src/gallium/winsys/drm/intel/egl/Makefile index f0b5a44389..c5217ad2d6 100644 --- a/src/gallium/winsys/drm/intel/egl/Makefile +++ b/src/gallium/winsys/drm/intel/egl/Makefile @@ -1,26 +1,26 @@ TOP = ../../../../../.. +GALLIUMDIR = ../../../.. include $(TOP)/configs/current LIBNAME = EGL_i915.so PIPE_DRIVERS = \ + $(TOP)/src/gallium/state_trackers/egl/libegldrm.a \ + $(GALLIUMDIR)/winsys/drm/intel/gem/libinteldrm.a \ $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ - $(TOP)/src/gallium/drivers/i915simple/libi915simple.a \ - ../common/libinteldrm.a + $(TOP)/src/gallium/drivers/i915simple/libi915simple.a -DRIVER_SOURCES = \ - intel_swapbuffers.c \ - intel_context.c \ - intel_device.c \ - intel_egl.c +DRIVER_SOURCES = C_SOURCES = \ $(COMMON_GALLIUM_SOURCES) \ $(DRIVER_SOURCES) +DRIVER_EXTRAS = -ldrm_intel + ASM_SOURCES = -DRIVER_DEFINES = -I../common $(shell pkg-config libdrm --atleast-version=2.3.1 \ +DRIVER_DEFINES = -I../gem $(shell pkg-config libdrm --atleast-version=2.3.1 \ && echo "-DDRM_VBLANK_FLIP=DRM_VBLANK_FLIP") include ../../Makefile.template diff --git a/src/gallium/winsys/drm/intel/egl/SConscript b/src/gallium/winsys/drm/intel/egl/SConscript deleted file mode 100644 index 0ad19d42a8..0000000000 --- a/src/gallium/winsys/drm/intel/egl/SConscript +++ /dev/null @@ -1,39 +0,0 @@ -Import('*') - -env = drienv.Clone() - -env.Append(CPPPATH = [ - '../intel', - 'server' -]) - -#MINIGLX_SOURCES = server/intel_dri.c - -DRIVER_SOURCES = [ - 'intel_winsys_pipe.c', - 'intel_winsys_softpipe.c', - 'intel_winsys_i915.c', - 'intel_batchbuffer.c', - 'intel_swapbuffers.c', - 'intel_context.c', - 'intel_lock.c', - 'intel_screen.c', - 'intel_batchpool.c', -] - -sources = \ - COMMON_GALLIUM_SOURCES + \ - COMMON_BM_SOURCES + \ - DRIVER_SOURCES - -drivers = [ - softpipe, - i915simple -] - -# TODO: write a wrapper function http://www.scons.org/wiki/WrapperFunctions -env.SharedLibrary( - target ='i915tex_dri.so', - source = sources, - LIBS = drivers + mesa + auxiliaries + env['LIBS'], -)
\ No newline at end of file diff --git a/src/gallium/winsys/drm/intel/egl/intel_batchbuffer.h b/src/gallium/winsys/drm/intel/egl/intel_batchbuffer.h deleted file mode 100644 index 3e95326168..0000000000 --- a/src/gallium/winsys/drm/intel/egl/intel_batchbuffer.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef INTEL_BATCHBUFFER_H -#define INTEL_BATCHBUFFER_H - -#include "intel_be_batchbuffer.h" - -/* - * Need to redefine the BATCH defines - */ - -#undef BEGIN_BATCH -#define BEGIN_BATCH(dwords, relocs) \ - (i915_batchbuffer_check(&intel->base.batch->base, dwords, relocs)) - -#undef OUT_BATCH -#define OUT_BATCH(d) \ - i915_batchbuffer_dword(&intel->base.batch->base, d) - -#undef OUT_RELOC -#define OUT_RELOC(buf,flags,mask,delta) do { \ - assert((delta) >= 0); \ - intel_be_offset_relocation(intel->base.batch, delta, buf, flags, mask); \ -} while (0) - -#endif diff --git a/src/gallium/winsys/drm/intel/egl/intel_context.c b/src/gallium/winsys/drm/intel/egl/intel_context.c deleted file mode 100644 index 927addb834..0000000000 --- a/src/gallium/winsys/drm/intel/egl/intel_context.c +++ /dev/null @@ -1,242 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "i915simple/i915_screen.h" - -#include "intel_device.h" -#include "intel_context.h" -#include "intel_batchbuffer.h" - -#include "state_tracker/st_public.h" -#include "pipe/p_defines.h" -#include "pipe/p_context.h" -#include "intel_egl.h" -#include "utils.h" - -#ifdef DEBUG -int __intel_debug = 0; -#endif - - -#define need_GL_ARB_multisample -#define need_GL_ARB_point_parameters -#define need_GL_ARB_texture_compression -#define need_GL_ARB_vertex_buffer_object -#define need_GL_ARB_vertex_program -#define need_GL_ARB_window_pos -#define need_GL_EXT_blend_color -#define need_GL_EXT_blend_equation_separate -#define need_GL_EXT_blend_func_separate -#define need_GL_EXT_blend_minmax -#define need_GL_EXT_cull_vertex -#define need_GL_EXT_fog_coord -#define need_GL_EXT_framebuffer_object -#define need_GL_EXT_multi_draw_arrays -#define need_GL_EXT_secondary_color -#define need_GL_NV_vertex_program -#include "extension_helper.h" - - -/** - * Extension strings exported by the intel driver. - * - * \note - * It appears that ARB_texture_env_crossbar has "disappeared" compared to the - * old i830-specific driver. - */ -const struct dri_extension card_extensions[] = { - {"GL_ARB_multisample", GL_ARB_multisample_functions}, - {"GL_ARB_multitexture", NULL}, - {"GL_ARB_point_parameters", GL_ARB_point_parameters_functions}, - {"GL_ARB_texture_border_clamp", NULL}, - {"GL_ARB_texture_compression", GL_ARB_texture_compression_functions}, - {"GL_ARB_texture_cube_map", NULL}, - {"GL_ARB_texture_env_add", NULL}, - {"GL_ARB_texture_env_combine", NULL}, - {"GL_ARB_texture_env_dot3", NULL}, - {"GL_ARB_texture_mirrored_repeat", NULL}, - {"GL_ARB_texture_rectangle", NULL}, - {"GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions}, - {"GL_ARB_pixel_buffer_object", NULL}, - {"GL_ARB_vertex_program", GL_ARB_vertex_program_functions}, - {"GL_ARB_window_pos", GL_ARB_window_pos_functions}, - {"GL_EXT_blend_color", GL_EXT_blend_color_functions}, - {"GL_EXT_blend_equation_separate", GL_EXT_blend_equation_separate_functions}, - {"GL_EXT_blend_func_separate", GL_EXT_blend_func_separate_functions}, - {"GL_EXT_blend_minmax", GL_EXT_blend_minmax_functions}, - {"GL_EXT_blend_subtract", NULL}, - {"GL_EXT_cull_vertex", GL_EXT_cull_vertex_functions}, - {"GL_EXT_fog_coord", GL_EXT_fog_coord_functions}, - {"GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions}, - {"GL_EXT_multi_draw_arrays", GL_EXT_multi_draw_arrays_functions}, - {"GL_EXT_packed_depth_stencil", NULL}, - {"GL_EXT_pixel_buffer_object", NULL}, - {"GL_EXT_secondary_color", GL_EXT_secondary_color_functions}, - {"GL_EXT_stencil_wrap", NULL}, - {"GL_EXT_texture_edge_clamp", NULL}, - {"GL_EXT_texture_env_combine", NULL}, - {"GL_EXT_texture_env_dot3", NULL}, - {"GL_EXT_texture_filter_anisotropic", NULL}, - {"GL_EXT_texture_lod_bias", NULL}, - {"GL_3DFX_texture_compression_FXT1", NULL}, - {"GL_APPLE_client_storage", NULL}, - {"GL_MESA_pack_invert", NULL}, - {"GL_MESA_ycbcr_texture", NULL}, - {"GL_NV_blend_square", NULL}, - {"GL_NV_vertex_program", GL_NV_vertex_program_functions}, - {"GL_NV_vertex_program1_1", NULL}, - {"GL_SGIS_generate_mipmap", NULL }, - {NULL, NULL} -}; - - -/* - * Hardware lock functions. - * Doesn't do anything in EGL - */ - -static void -intel_lock_hardware(struct intel_be_context *context) -{ - (void)context; -} - -static void -intel_unlock_hardware(struct intel_be_context *context) -{ - (void)context; -} - -static boolean -intel_locked_hardware(struct intel_be_context *context) -{ - (void)context; - return FALSE; -} - - -/* - * Misc functions. - */ - -int -intel_create_context(struct egl_drm_context *egl_context, const __GLcontextModes *visual, void *sharedContextPrivate) -{ - struct intel_context *intel = CALLOC_STRUCT(intel_context); - struct intel_device *device = (struct intel_device *)egl_context->device->priv; - struct pipe_context *pipe; - struct st_context *st_share = NULL; - - egl_context->priv = intel; - - intel->intel_device = device; - intel->egl_context = egl_context; - intel->egl_device = egl_context->device; - - intel->base.hardware_lock = intel_lock_hardware; - intel->base.hardware_unlock = intel_unlock_hardware; - intel->base.hardware_locked = intel_locked_hardware; - - intel_be_init_context(&intel->base, &device->base); - -#if 0 - pipe = intel_create_softpipe(intel, screen->winsys); -#else - pipe = i915_create_context(device->pipe, &device->base.base, &intel->base.base); -#endif - - pipe->priv = intel; - - intel->st = st_create_context(pipe, visual, st_share); - - device->dummy = intel; - - return TRUE; -} - -int -intel_destroy_context(struct egl_drm_context *egl_context) -{ - struct intel_context *intel = egl_context->priv; - - if (intel->intel_device->dummy == intel) - intel->intel_device->dummy = NULL; - - st_destroy_context(intel->st); - intel_be_destroy_context(&intel->base); - free(intel); - return TRUE; -} - -void -intel_make_current(struct egl_drm_context *context, struct egl_drm_drawable *draw, struct egl_drm_drawable *read) -{ - if (context) { - struct intel_context *intel = (struct intel_context *)context->priv; - struct intel_framebuffer *draw_fb = (struct intel_framebuffer *)draw->priv; - struct intel_framebuffer *read_fb = (struct intel_framebuffer *)read->priv; - - assert(draw_fb->stfb); - assert(read_fb->stfb); - - st_make_current(intel->st, draw_fb->stfb, read_fb->stfb); - - intel->egl_drawable = draw; - - st_resize_framebuffer(draw_fb->stfb, draw->w, draw->h); - - if (draw != read) - st_resize_framebuffer(read_fb->stfb, read->w, read->h); - - } else { - st_make_current(NULL, NULL, NULL); - } -} - -void -intel_bind_frontbuffer(struct egl_drm_drawable *draw, struct egl_drm_frontbuffer *front) -{ - struct intel_device *device = (struct intel_device *)draw->device->priv; - struct intel_framebuffer *draw_fb = (struct intel_framebuffer *)draw->priv; - - if (draw_fb->front_buffer) - driBOUnReference(draw_fb->front_buffer); - - draw_fb->front_buffer = NULL; - draw_fb->front = NULL; - - /* to unbind just call this function with front == NULL */ - if (!front) - return; - - draw_fb->front = front; - - driGenBuffers(device->base.staticPool, "front", 1, &draw_fb->front_buffer, 0, 0, 0); - driBOSetReferenced(draw_fb->front_buffer, front->handle); - - st_resize_framebuffer(draw_fb->stfb, draw->w, draw->h); -} diff --git a/src/gallium/winsys/drm/intel/egl/intel_context.h b/src/gallium/winsys/drm/intel/egl/intel_context.h deleted file mode 100644 index 477fdec7f7..0000000000 --- a/src/gallium/winsys/drm/intel/egl/intel_context.h +++ /dev/null @@ -1,118 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef INTEL_CONTEXT_H -#define INTEL_CONTEXT_H - -#include "pipe/p_debug.h" -#include "intel_be_context.h" - - -struct st_context; -struct egl_drm_device; -struct egl_drm_context; -struct egl_drm_frontbuffer; - - -/** - * Intel rendering context, contains a state tracker and intel-specific info. - */ -struct intel_context -{ - struct intel_be_context base; - - struct st_context *st; - - struct intel_device *intel_device; - - /* new egl stuff */ - struct egl_drm_device *egl_device; - struct egl_drm_context *egl_context; - struct egl_drm_drawable *egl_drawable; -}; - - - -/** - * Intel framebuffer. - */ -struct intel_framebuffer -{ - struct st_framebuffer *stfb; - - struct intel_device *device; - struct _DriBufferObject *front_buffer; - struct egl_drm_frontbuffer *front; -}; - - - - -/* These are functions now: - */ -void LOCK_HARDWARE( struct intel_context *intel ); -void UNLOCK_HARDWARE( struct intel_context *intel ); - -extern char *__progname; - - - -/* ================================================================ - * Debugging: - */ -#ifdef DEBUG -extern int __intel_debug; - -#define DEBUG_SWAP 0x1 -#define DEBUG_LOCK 0x2 -#define DEBUG_IOCTL 0x4 -#define DEBUG_BATCH 0x8 - -#define DBG(flag, ...) do { \ - if (__intel_debug & (DEBUG_##flag)) \ - printf(__VA_ARGS__); \ -} while(0) - -#else -#define DBG(flag, ...) -#endif - - -#define PCI_CHIP_845_G 0x2562 -#define PCI_CHIP_I830_M 0x3577 -#define PCI_CHIP_I855_GM 0x3582 -#define PCI_CHIP_I865_G 0x2572 -#define PCI_CHIP_I915_G 0x2582 -#define PCI_CHIP_I915_GM 0x2592 -#define PCI_CHIP_I945_G 0x2772 -#define PCI_CHIP_I945_GM 0x27A2 -#define PCI_CHIP_I945_GME 0x27AE -#define PCI_CHIP_G33_G 0x29C2 -#define PCI_CHIP_Q35_G 0x29B2 -#define PCI_CHIP_Q33_G 0x29D2 - -#endif diff --git a/src/gallium/winsys/drm/intel/egl/intel_device.c b/src/gallium/winsys/drm/intel/egl/intel_device.c deleted file mode 100644 index b9649cbec7..0000000000 --- a/src/gallium/winsys/drm/intel/egl/intel_device.c +++ /dev/null @@ -1,137 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -#include "utils.h" - -#include "state_tracker/st_public.h" -#include "i915simple/i915_screen.h" - -#include "intel_context.h" -#include "intel_device.h" -#include "intel_batchbuffer.h" -#include "intel_egl.h" - - -extern const struct dri_extension card_extensions[]; - - -int -intel_create_device(struct egl_drm_device *device) -{ - struct intel_device *intel_device; - - /* Allocate the private area */ - intel_device = CALLOC_STRUCT(intel_device); - if (!intel_device) - return FALSE; - - device->priv = (void *)intel_device; - intel_device->device = device; - - intel_device->deviceID = device->deviceID; - - intel_be_init_device(&intel_device->base, device->drmFD, intel_device->deviceID); - - intel_device->pipe = i915_create_screen(&intel_device->base.base, intel_device->deviceID); - - /* hack */ - driInitExtensions(NULL, card_extensions, GL_FALSE); - - return TRUE; -} - -int -intel_destroy_device(struct egl_drm_device *device) -{ - struct intel_device *intel_device = (struct intel_device *)device->priv; - - intel_be_destroy_device(&intel_device->base); - - free(intel_device); - device->priv = NULL; - - return TRUE; -} - -int -intel_create_drawable(struct egl_drm_drawable *drawable, - const __GLcontextModes * visual) -{ - enum pipe_format colorFormat, depthFormat, stencilFormat; - struct intel_framebuffer *intelfb = CALLOC_STRUCT(intel_framebuffer); - - if (!intelfb) - return GL_FALSE; - - intelfb->device = drawable->device->priv; - - if (visual->redBits == 5) - colorFormat = PIPE_FORMAT_R5G6B5_UNORM; - else - colorFormat = PIPE_FORMAT_A8R8G8B8_UNORM; - - if (visual->depthBits == 16) - depthFormat = PIPE_FORMAT_Z16_UNORM; - else if (visual->depthBits == 24) - depthFormat = PIPE_FORMAT_S8Z24_UNORM; - else - depthFormat = PIPE_FORMAT_NONE; - - if (visual->stencilBits == 8) - stencilFormat = PIPE_FORMAT_S8Z24_UNORM; - else - stencilFormat = PIPE_FORMAT_NONE; - - intelfb->stfb = st_create_framebuffer(visual, - colorFormat, - depthFormat, - stencilFormat, - drawable->w, - drawable->h, - (void*) intelfb); - - if (!intelfb->stfb) { - free(intelfb); - return GL_FALSE; - } - - drawable->priv = (void *) intelfb; - return GL_TRUE; -} - -int -intel_destroy_drawable(struct egl_drm_drawable *drawable) -{ - struct intel_framebuffer *intelfb = (struct intel_framebuffer *)drawable->priv; - drawable->priv = NULL; - - assert(intelfb->stfb); - st_unreference_framebuffer(&intelfb->stfb); - free(intelfb); - return TRUE; -} diff --git a/src/gallium/winsys/drm/intel/egl/intel_egl.c b/src/gallium/winsys/drm/intel/egl/intel_egl.c deleted file mode 100644 index 3204ed3131..0000000000 --- a/src/gallium/winsys/drm/intel/egl/intel_egl.c +++ /dev/null @@ -1,796 +0,0 @@ - -#include <assert.h> -#include <stdlib.h> -#include <string.h> -#include <stdio.h> -#include <stdint.h> - -#include "eglconfig.h" -#include "eglcontext.h" -#include "egldisplay.h" -#include "egldriver.h" -#include "eglglobals.h" -#include "eglmode.h" -#include "eglscreen.h" -#include "eglsurface.h" -#include "egllog.h" - -#include "intel_egl.h" - -#include "xf86drm.h" -#include "xf86drmMode.h" - -#include "intel_context.h" - -#include "state_tracker/st_public.h" - -#define MAX_SCREENS 16 - -static void -drm_get_device_id(struct egl_drm_device *device) -{ - char path[512]; - FILE *file; - - /* TODO get the real minor */ - int minor = 0; - - snprintf(path, sizeof(path), "/sys/class/drm/card%d/device/device", minor); - file = fopen(path, "r"); - if (!file) { - _eglLog(_EGL_WARNING, "Could not retrive device ID\n"); - return; - } - - fgets(path, sizeof( path ), file); - sscanf(path, "%x", &device->deviceID); - fclose(file); -} - -static struct egl_drm_device* -egl_drm_create_device(int drmFD) -{ - struct egl_drm_device *device = malloc(sizeof(*device)); - memset(device, 0, sizeof(*device)); - device->drmFD = drmFD; - - device->version = drmGetVersion(device->drmFD); - - drm_get_device_id(device); - - if (!intel_create_device(device)) { - free(device); - return NULL; - } - - return device; -} - -static void -_egl_context_modes_destroy(__GLcontextModes *modes) -{ - _eglLog(_EGL_DEBUG, "%s", __FUNCTION__); - - while (modes) { - __GLcontextModes * const next = modes->next; - free(modes); - modes = next; - } -} -/** - * Create a linked list of 'count' GLcontextModes. - * These are used during the client/server visual negotiation phase, - * then discarded. - */ -static __GLcontextModes * -_egl_context_modes_create(unsigned count, size_t minimum_size) -{ - /* This code copied from libGLX, and modified */ - const size_t size = (minimum_size > sizeof(__GLcontextModes)) - ? minimum_size : sizeof(__GLcontextModes); - __GLcontextModes * head = NULL; - __GLcontextModes ** next; - unsigned i; - - _eglLog(_EGL_DEBUG, "%s %d %d", __FUNCTION__, count, minimum_size); - - next = & head; - for (i = 0 ; i < count ; i++) { - *next = (__GLcontextModes *) calloc(1, size); - if (*next == NULL) { - _egl_context_modes_destroy(head); - head = NULL; - break; - } - - (*next)->doubleBufferMode = 1; - (*next)->visualID = GLX_DONT_CARE; - (*next)->visualType = GLX_DONT_CARE; - (*next)->visualRating = GLX_NONE; - (*next)->transparentPixel = GLX_NONE; - (*next)->transparentRed = GLX_DONT_CARE; - (*next)->transparentGreen = GLX_DONT_CARE; - (*next)->transparentBlue = GLX_DONT_CARE; - (*next)->transparentAlpha = GLX_DONT_CARE; - (*next)->transparentIndex = GLX_DONT_CARE; - (*next)->xRenderable = GLX_DONT_CARE; - (*next)->fbconfigID = GLX_DONT_CARE; - (*next)->swapMethod = GLX_SWAP_UNDEFINED_OML; - (*next)->bindToTextureRgb = GLX_DONT_CARE; - (*next)->bindToTextureRgba = GLX_DONT_CARE; - (*next)->bindToMipmapTexture = GLX_DONT_CARE; - (*next)->bindToTextureTargets = 0; - (*next)->yInverted = GLX_DONT_CARE; - - next = & ((*next)->next); - } - - return head; -} - -struct drm_screen; - -struct drm_driver -{ - _EGLDriver base; /* base class/object */ - - drmModeResPtr res; - - struct drm_screen *screens[MAX_SCREENS]; - size_t count_screens; - - struct egl_drm_device *device; -}; - -struct drm_surface -{ - _EGLSurface base; /* base class/object */ - - struct egl_drm_drawable *drawable; -}; - -struct drm_context -{ - _EGLContext base; /* base class/object */ - - struct egl_drm_context *context; -}; - -struct drm_screen -{ - _EGLScreen base; - - /* currently only support one connector */ - drmModeConnectorPtr connector; - - /* Has this screen been shown */ - int shown; - - /* Surface that is currently attached to this screen */ - struct drm_surface *surf; - - /* backing buffer */ - drmBO buffer; - - /* framebuffer */ - drmModeFBPtr fb; - uint32_t fbID; - - /* crtc and mode used */ - drmModeCrtcPtr crtc; - uint32_t crtcID; - - struct drm_mode_modeinfo *mode; - - /* geometry of the screen */ - struct egl_drm_frontbuffer front; -}; - -static void -drm_update_res(struct drm_driver *drm_drv) -{ - drmModeFreeResources(drm_drv->res); - drm_drv->res = drmModeGetResources(drm_drv->device->drmFD); -} - -static void -drm_add_modes_from_connector(_EGLScreen *screen, drmModeConnectorPtr connector) -{ - struct drm_mode_modeinfo *m; - int i; - - for (i = 0; i < connector->count_modes; i++) { - m = &connector->modes[i]; - _eglAddNewMode(screen, m->hdisplay, m->vdisplay, m->vrefresh, m->name); - } -} - - -static EGLBoolean -drm_initialize(_EGLDriver *drv, EGLDisplay dpy, EGLint *major, EGLint *minor) -{ - _EGLDisplay *disp = _eglLookupDisplay(dpy); - struct drm_driver *drm_drv = (struct drm_driver *)drv; - struct drm_screen *screen = NULL; - drmModeConnectorPtr connector = NULL; - drmModeResPtr res = NULL; - unsigned count_connectors = 0; - int num_screens = 0; - - EGLint i; - int fd; - - fd = drmOpen("i915", NULL); - if (fd < 0) { - return EGL_FALSE; - } - - drm_drv->device = egl_drm_create_device(fd); - if (!drm_drv->device) { - drmClose(fd); - return EGL_FALSE; - } - - drm_update_res(drm_drv); - res = drm_drv->res; - if (res) - count_connectors = res->count_connectors; - - for(i = 0; i < count_connectors && i < MAX_SCREENS; i++) { - connector = drmModeGetConnector(fd, res->connectors[i]); - - if (!connector) - continue; - - if (connector->connection != DRM_MODE_CONNECTED) { - drmModeFreeConnector(connector); - continue; - } - - screen = malloc(sizeof(struct drm_screen)); - memset(screen, 0, sizeof(*screen)); - screen->connector = connector; - _eglInitScreen(&screen->base); - _eglAddScreen(disp, &screen->base); - drm_add_modes_from_connector(&screen->base, connector); - drm_drv->screens[num_screens++] = screen; - } - drm_drv->count_screens = num_screens; - - /* for now we only have one config */ - _EGLConfig *config = calloc(1, sizeof(*config)); - memset(config, 1, sizeof(*config)); - _eglInitConfig(config, 1); - _eglSetConfigAttrib(config, EGL_RED_SIZE, 8); - _eglSetConfigAttrib(config, EGL_GREEN_SIZE, 8); - _eglSetConfigAttrib(config, EGL_BLUE_SIZE, 8); - _eglSetConfigAttrib(config, EGL_ALPHA_SIZE, 8); - _eglSetConfigAttrib(config, EGL_BUFFER_SIZE, 32); - _eglSetConfigAttrib(config, EGL_DEPTH_SIZE, 24); - _eglSetConfigAttrib(config, EGL_STENCIL_SIZE, 8); - _eglSetConfigAttrib(config, EGL_SURFACE_TYPE, EGL_PBUFFER_BIT); - _eglAddConfig(disp, config); - - drv->Initialized = EGL_TRUE; - - *major = 1; - *minor = 4; - - return EGL_TRUE; -} - -static void -drm_takedown_shown_screen(_EGLDriver *drv, struct drm_screen *screen) -{ - struct drm_driver *drm_drv = (struct drm_driver *)drv; - unsigned int i; - - intel_bind_frontbuffer(screen->surf->drawable, NULL); - screen->surf = NULL; - - for (i = 0; i < drm_drv->res->count_crtcs; i++) { - drmModeSetCrtc( - drm_drv->device->drmFD, - drm_drv->res->crtcs[i], - 0, // FD - 0, 0, - NULL, 0, // List of output ids - NULL); - } - - drmModeRmFB(drm_drv->device->drmFD, screen->fbID); - drmModeFreeFB(screen->fb); - screen->fb = NULL; - - drmBOUnreference(drm_drv->device->drmFD, &screen->buffer); - - screen->shown = 0; -} - -static EGLBoolean -drm_terminate(_EGLDriver *drv, EGLDisplay dpy) -{ - struct drm_driver *drm_drv = (struct drm_driver *)drv; - struct drm_screen *screen; - int i = 0; - - intel_destroy_device(drm_drv->device); - drmFreeVersion(drm_drv->device->version); - - for (i = 0; i < drm_drv->count_screens; i++) { - screen = drm_drv->screens[i]; - - if (screen->shown) - drm_takedown_shown_screen(drv, screen); - - drmModeFreeConnector(screen->connector); - _eglDestroyScreen(&screen->base); - drm_drv->screens[i] = NULL; - } - - drmClose(drm_drv->device->drmFD); - - free(drm_drv->device); - - _eglCleanupDisplay(_eglLookupDisplay(dpy)); - free(drm_drv); - - return EGL_TRUE; -} - - -static struct drm_context * -lookup_drm_context(EGLContext context) -{ - _EGLContext *c = _eglLookupContext(context); - return (struct drm_context *) c; -} - - -static struct drm_surface * -lookup_drm_surface(EGLSurface surface) -{ - _EGLSurface *s = _eglLookupSurface(surface); - return (struct drm_surface *) s; -} - -static struct drm_screen * -lookup_drm_screen(EGLDisplay dpy, EGLScreenMESA screen) -{ - _EGLScreen *s = _eglLookupScreen(dpy, screen); - return (struct drm_screen *) s; -} - -static __GLcontextModes* -visual_from_config(_EGLConfig *conf) -{ - __GLcontextModes *visual; - (void)conf; - - visual = _egl_context_modes_create(1, sizeof(*visual)); - visual->redBits = 8; - visual->greenBits = 8; - visual->blueBits = 8; - visual->alphaBits = 8; - - visual->rgbBits = 32; - visual->doubleBufferMode = 1; - - visual->depthBits = 24; - visual->haveDepthBuffer = visual->depthBits > 0; - visual->stencilBits = 8; - visual->haveStencilBuffer = visual->stencilBits > 0; - - return visual; -} - - - -static EGLContext -drm_create_context(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config, EGLContext share_list, const EGLint *attrib_list) -{ - struct drm_driver *drm_drv = (struct drm_driver *)drv; - struct drm_context *c; - struct drm_egl_context *share = NULL; - _EGLConfig *conf; - int i; - int ret; - __GLcontextModes *visual; - struct egl_drm_context *context; - - conf = _eglLookupConfig(drv, dpy, config); - if (!conf) { - _eglError(EGL_BAD_CONFIG, "eglCreateContext"); - return EGL_NO_CONTEXT; - } - - for (i = 0; attrib_list && attrib_list[i] != EGL_NONE; i++) { - switch (attrib_list[i]) { - /* no attribs defined for now */ - default: - _eglError(EGL_BAD_ATTRIBUTE, "eglCreateContext"); - return EGL_NO_CONTEXT; - } - } - - c = (struct drm_context *) calloc(1, sizeof(struct drm_context)); - if (!c) - return EGL_NO_CONTEXT; - - _eglInitContext(drv, dpy, &c->base, config, attrib_list); - - context = malloc(sizeof(*context)); - memset(context, 0, sizeof(*context)); - - if (!context) - goto err_c; - - context->device = drm_drv->device; - visual = visual_from_config(conf); - - ret = intel_create_context(context, visual, share); - free(visual); - - if (!ret) - goto err_gl; - - c->context = context; - - /* generate handle and insert into hash table */ - _eglSaveContext(&c->base); - assert(_eglGetContextHandle(&c->base)); - - return _eglGetContextHandle(&c->base); -err_gl: - free(context); -err_c: - free(c); - return EGL_NO_CONTEXT; -} - -static EGLBoolean -drm_destroy_context(_EGLDriver *drv, EGLDisplay dpy, EGLContext context) -{ - struct drm_context *fc = lookup_drm_context(context); - _eglRemoveContext(&fc->base); - if (fc->base.IsBound) { - fc->base.DeletePending = EGL_TRUE; - } else { - intel_destroy_context(fc->context); - free(fc->context); - free(fc); - } - return EGL_TRUE; -} - - -static EGLSurface -drm_create_window_surface(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config, NativeWindowType window, const EGLint *attrib_list) -{ - return EGL_NO_SURFACE; -} - - -static EGLSurface -drm_create_pixmap_surface(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config, NativePixmapType pixmap, const EGLint *attrib_list) -{ - return EGL_NO_SURFACE; -} - - -static EGLSurface -drm_create_pbuffer_surface(_EGLDriver *drv, EGLDisplay dpy, EGLConfig config, - const EGLint *attrib_list) -{ - struct drm_driver *drm_drv = (struct drm_driver *)drv; - int i; - int ret; - int width = -1; - int height = -1; - struct drm_surface *surf = NULL; - struct egl_drm_drawable *drawable = NULL; - __GLcontextModes *visual; - _EGLConfig *conf; - - conf = _eglLookupConfig(drv, dpy, config); - if (!conf) { - _eglError(EGL_BAD_CONFIG, "eglCreatePbufferSurface"); - return EGL_NO_CONTEXT; - } - - for (i = 0; attrib_list && attrib_list[i] != EGL_NONE; i++) { - switch (attrib_list[i]) { - case EGL_WIDTH: - width = attrib_list[++i]; - break; - case EGL_HEIGHT: - height = attrib_list[++i]; - break; - default: - _eglError(EGL_BAD_ATTRIBUTE, "eglCreatePbufferSurface"); - return EGL_NO_SURFACE; - } - } - - if (width < 1 || height < 1) { - _eglError(EGL_BAD_ATTRIBUTE, "eglCreatePbufferSurface"); - return EGL_NO_SURFACE; - } - - surf = (struct drm_surface *) calloc(1, sizeof(struct drm_surface)); - if (!surf) - goto err; - - if (!_eglInitSurface(drv, dpy, &surf->base, EGL_PBUFFER_BIT, config, attrib_list)) - goto err_surf; - - drawable = malloc(sizeof(*drawable)); - memset(drawable, 0, sizeof(*drawable)); - - drawable->w = width; - drawable->h = height; - - visual = visual_from_config(conf); - - drawable->device = drm_drv->device; - ret = intel_create_drawable(drawable, visual); - free(visual); - - if (!ret) - goto err_draw; - - surf->drawable = drawable; - - _eglSaveSurface(&surf->base); - return surf->base.Handle; - -err_draw: - free(drawable); -err_surf: - free(surf); -err: - return EGL_NO_SURFACE; -} - -static EGLSurface -drm_create_screen_surface_mesa(_EGLDriver *drv, EGLDisplay dpy, EGLConfig cfg, - const EGLint *attrib_list) -{ - EGLSurface surf = drm_create_pbuffer_surface(drv, dpy, cfg, attrib_list); - - return surf; -} - -static struct drm_mode_modeinfo * -drm_find_mode(drmModeConnectorPtr connector, _EGLMode *mode) -{ - int i; - struct drm_mode_modeinfo *m; - - for (i = 0; i < connector->count_modes; i++) { - m = &connector->modes[i]; - if (m->hdisplay == mode->Width && m->vdisplay == mode->Height && m->vrefresh == mode->RefreshRate) - break; - m = &connector->modes[0]; /* if we can't find one, return first */ - } - - return m; -} -static void -draw(size_t x, size_t y, size_t w, size_t h, size_t pitch, size_t v, unsigned int *ptr) -{ - int i, j; - - for (i = x; i < x + w; i++) - for(j = y; j < y + h; j++) - ptr[(i * pitch / 4) + j] = v; - -} - -static void -prettyColors(int fd, unsigned int handle, size_t pitch) -{ - drmBO bo; - unsigned int *ptr; - void *p; - int i; - - drmBOReference(fd, handle, &bo); - drmBOMap(fd, &bo, DRM_BO_FLAG_READ | DRM_BO_FLAG_WRITE, 0, &p); - ptr = (unsigned int*)p; - - for (i = 0; i < (bo.size / 4); i++) - ptr[i] = 0xFFFFFFFF; - - for (i = 0; i < 4; i++) - draw(i * 40, i * 40, 40, 40, pitch, 0, ptr); - - - draw(200, 100, 40, 40, pitch, 0xff00ff, ptr); - draw(100, 200, 40, 40, pitch, 0xff00ff, ptr); - - drmBOUnmap(fd, &bo); -} - -static EGLBoolean -drm_show_screen_surface_mesa(_EGLDriver *drv, EGLDisplay dpy, - EGLScreenMESA screen, - EGLSurface surface, EGLModeMESA m) -{ - struct drm_driver *drm_drv = (struct drm_driver *)drv; - struct drm_surface *surf = lookup_drm_surface(surface); - struct drm_screen *scrn = lookup_drm_screen(dpy, screen); - _EGLMode *mode = _eglLookupMode(dpy, m); - size_t pitch = mode->Width * 4; - size_t size = mode->Height * pitch; - int ret; - unsigned int i,j,k; - - if (scrn->shown) - drm_takedown_shown_screen(drv, scrn); - - ret = drmBOCreate(drm_drv->device->drmFD, size, 0, 0, - DRM_BO_FLAG_READ | - DRM_BO_FLAG_WRITE | - DRM_BO_FLAG_MEM_TT | - DRM_BO_FLAG_MEM_VRAM | - DRM_BO_FLAG_NO_EVICT, - DRM_BO_HINT_DONT_FENCE, &scrn->buffer); - - if (ret) - return EGL_FALSE; - - prettyColors(drm_drv->device->drmFD, scrn->buffer.handle, pitch); - - ret = drmModeAddFB(drm_drv->device->drmFD, mode->Width, mode->Height, - 32, 32, pitch, - scrn->buffer.handle, - &scrn->fbID); - - if (ret) - goto err_bo; - - scrn->fb = drmModeGetFB(drm_drv->device->drmFD, scrn->fbID); - if (!scrn->fb) - goto err_bo; - - for (j = 0; j < drm_drv->res->count_connectors; j++) { - drmModeConnector *con = drmModeGetConnector(drm_drv->device->drmFD, drm_drv->res->connectors[j]); - scrn->mode = drm_find_mode(con, mode); - if (!scrn->mode) - goto err_fb; - - for (k = 0; k < con->count_encoders; k++) { - drmModeEncoder *enc = drmModeGetEncoder(drm_drv->device->drmFD, con->encoders[k]); - for (i = 0; i < drm_drv->res->count_crtcs; i++) { - if (enc->possible_crtcs & (1<<i)) { - ret = drmModeSetCrtc( - drm_drv->device->drmFD, - drm_drv->res->crtcs[i], - scrn->fbID, - 0, 0, - &drm_drv->res->connectors[j], 1, - scrn->mode); - /* skip the other crtcs now */ - i = drm_drv->res->count_crtcs; - } - } - } - } - - scrn->front.handle = scrn->buffer.handle; - scrn->front.pitch = pitch; - scrn->front.width = mode->Width; - scrn->front.height = mode->Height; - - scrn->surf = surf; - intel_bind_frontbuffer(surf->drawable, &scrn->front); - - scrn->shown = 1; - - return EGL_TRUE; - -err_fb: - /* TODO remove fb */ - -err_bo: - drmBOUnreference(drm_drv->device->drmFD, &scrn->buffer); - return EGL_FALSE; -} - -static EGLBoolean -drm_destroy_surface(_EGLDriver *drv, EGLDisplay dpy, EGLSurface surface) -{ - struct drm_surface *fs = lookup_drm_surface(surface); - _eglRemoveSurface(&fs->base); - if (fs->base.IsBound) { - fs->base.DeletePending = EGL_TRUE; - } else { - intel_bind_frontbuffer(fs->drawable, NULL); - intel_destroy_drawable(fs->drawable); - free(fs->drawable); - free(fs); - } - return EGL_TRUE; -} - - -static EGLBoolean -drm_make_current(_EGLDriver *drv, EGLDisplay dpy, EGLSurface draw, EGLSurface read, EGLContext context) -{ - struct drm_surface *readSurf = lookup_drm_surface(read); - struct drm_surface *drawSurf = lookup_drm_surface(draw); - struct drm_context *ctx = lookup_drm_context(context); - EGLBoolean b; - - b = _eglMakeCurrent(drv, dpy, draw, read, context); - if (!b) - return EGL_FALSE; - - if (ctx) { - if (!drawSurf || !readSurf) - return EGL_FALSE; - - intel_make_current(ctx->context, drawSurf->drawable, readSurf->drawable); - } else { - intel_make_current(NULL, NULL, NULL); - } - - return EGL_TRUE; -} - -static EGLBoolean -drm_swap_buffers(_EGLDriver *drv, EGLDisplay dpy, EGLSurface draw) -{ - struct drm_surface *surf = lookup_drm_surface(draw); - if (!surf) - return EGL_FALSE; - - /* error checking */ - if (!_eglSwapBuffers(drv, dpy, draw)) - return EGL_FALSE; - - intel_swap_buffers(surf->drawable); - return EGL_TRUE; -} - - -/** - * The bootstrap function. Return a new drm_driver object and - * plug in API functions. - */ -_EGLDriver * -_eglMain(_EGLDisplay *dpy, const char *args) -{ - struct drm_driver *drm; - - drm = (struct drm_driver *) calloc(1, sizeof(struct drm_driver)); - if (!drm) { - return NULL; - } - - /* First fill in the dispatch table with defaults */ - _eglInitDriverFallbacks(&drm->base); - /* then plug in our Drm-specific functions */ - drm->base.API.Initialize = drm_initialize; - drm->base.API.Terminate = drm_terminate; - drm->base.API.CreateContext = drm_create_context; - drm->base.API.MakeCurrent = drm_make_current; - drm->base.API.CreateWindowSurface = drm_create_window_surface; - drm->base.API.CreatePixmapSurface = drm_create_pixmap_surface; - drm->base.API.CreatePbufferSurface = drm_create_pbuffer_surface; - drm->base.API.DestroySurface = drm_destroy_surface; - drm->base.API.DestroyContext = drm_destroy_context; - drm->base.API.CreateScreenSurfaceMESA = drm_create_screen_surface_mesa; - drm->base.API.ShowScreenSurfaceMESA = drm_show_screen_surface_mesa; - drm->base.API.SwapBuffers = drm_swap_buffers; - - drm->base.ClientAPIsMask = EGL_OPENGL_BIT /*| EGL_OPENGL_ES_BIT*/; - drm->base.Name = "DRM/Gallium"; - - /* enable supported extensions */ - drm->base.Extensions.MESA_screen_surface = EGL_TRUE; - drm->base.Extensions.MESA_copy_context = EGL_TRUE; - - return &drm->base; -} diff --git a/src/gallium/winsys/drm/intel/egl/intel_egl.h b/src/gallium/winsys/drm/intel/egl/intel_egl.h deleted file mode 100644 index 1ee27e0847..0000000000 --- a/src/gallium/winsys/drm/intel/egl/intel_egl.h +++ /dev/null @@ -1,53 +0,0 @@ - -#ifndef _INTEL_EGL_H_ -#define _INTEL_EGL_H_ - -#include <xf86drm.h> - -struct egl_drm_device -{ - void *priv; - int drmFD; - - drmVersionPtr version; - int deviceID; -}; - -struct egl_drm_context -{ - void *priv; - struct egl_drm_device *device; -}; - -struct egl_drm_drawable -{ - void *priv; - struct egl_drm_device *device; - size_t h; - size_t w; -}; - -struct egl_drm_frontbuffer -{ - uint32_t handle; - uint32_t pitch; - uint32_t width; - uint32_t height; -}; - -#include "GL/internal/glcore.h" - -int intel_create_device(struct egl_drm_device *device); -int intel_destroy_device(struct egl_drm_device *device); - -int intel_create_context(struct egl_drm_context *context, const __GLcontextModes *visual, void *sharedContextPrivate); -int intel_destroy_context(struct egl_drm_context *context); - -int intel_create_drawable(struct egl_drm_drawable *drawable, const __GLcontextModes * visual); -int intel_destroy_drawable(struct egl_drm_drawable *drawable); - -void intel_make_current(struct egl_drm_context *context, struct egl_drm_drawable *draw, struct egl_drm_drawable *read); -void intel_swap_buffers(struct egl_drm_drawable *draw); -void intel_bind_frontbuffer(struct egl_drm_drawable *draw, struct egl_drm_frontbuffer *front); - -#endif diff --git a/src/gallium/winsys/drm/intel/egl/intel_swapbuffers.c b/src/gallium/winsys/drm/intel/egl/intel_swapbuffers.c deleted file mode 100644 index 2edcbc79ff..0000000000 --- a/src/gallium/winsys/drm/intel/egl/intel_swapbuffers.c +++ /dev/null @@ -1,111 +0,0 @@ -/************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "intel_device.h" -#include "intel_context.h" -#include "intel_batchbuffer.h" -#include "intel_reg.h" - -#include "pipe/p_context.h" -#include "state_tracker/st_public.h" -#include "state_tracker/st_context.h" -#include "state_tracker/st_cb_fbo.h" -#include "intel_egl.h" - - -static void -intel_display_surface(struct egl_drm_drawable *draw, - struct pipe_surface *surf); - -void intel_swap_buffers(struct egl_drm_drawable *draw) -{ - struct intel_framebuffer *intel_fb = (struct intel_framebuffer *)draw->priv; - struct pipe_surface *back_surf; - - assert(intel_fb); - assert(intel_fb->stfb); - - back_surf = st_get_framebuffer_surface(intel_fb->stfb, ST_SURFACE_BACK_LEFT); - if (back_surf) { - st_notify_swapbuffers(intel_fb->stfb); - if (intel_fb->front) - intel_display_surface(draw, back_surf); - st_notify_swapbuffers_complete(intel_fb->stfb); - } -} - -static void -intel_display_surface(struct egl_drm_drawable *draw, - struct pipe_surface *surf) -{ - struct intel_context *intel = NULL; - struct intel_framebuffer *intel_fb = (struct intel_framebuffer *)draw->priv; - struct _DriFenceObject *fence; - - //const int srcWidth = surf->width; - //const int srcHeight = surf->height; - - intel = intel_fb->device->dummy; - if (!intel) { - printf("No dummy context\n"); - return; - } - - const int dstWidth = intel_fb->front->width; - const int dstHeight = intel_fb->front->height; - const int dstPitch = intel_fb->front->pitch / 4;//draw->front.cpp; - - const int cpp = 4;//intel_fb->front->cpp; - const int srcPitch = surf->stride / cpp; - - int BR13, CMD; - //int i; - - BR13 = (dstPitch * cpp) | (0xCC << 16) | (1 << 24) | (1 << 25); - CMD = (XY_SRC_COPY_BLT_CMD | XY_SRC_COPY_BLT_WRITE_ALPHA | - XY_SRC_COPY_BLT_WRITE_RGB); - - BEGIN_BATCH(8, 2); - OUT_BATCH(CMD); - OUT_BATCH(BR13); - OUT_BATCH((0 << 16) | 0); - OUT_BATCH((dstHeight << 16) | dstWidth); - - OUT_RELOC(intel_fb->front_buffer, - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_WRITE, - DRM_BO_MASK_MEM | DRM_BO_FLAG_WRITE, 0); - - OUT_BATCH((0 << 16) | 0); - OUT_BATCH((srcPitch * cpp) & 0xffff); - OUT_RELOC(dri_bo(surf->buffer), - DRM_BO_FLAG_MEM_TT | DRM_BO_FLAG_READ, - DRM_BO_MASK_MEM | DRM_BO_FLAG_READ, 0); - - fence = intel_be_batchbuffer_flush(intel->base.batch); - driFenceUnReference(&fence); - intel_be_batchbuffer_finish(intel->base.batch); -} diff --git a/src/gallium/winsys/drm/intel/gem/Makefile b/src/gallium/winsys/drm/intel/gem/Makefile new file mode 100644 index 0000000000..7ab1a2a771 --- /dev/null +++ b/src/gallium/winsys/drm/intel/gem/Makefile @@ -0,0 +1,16 @@ +TOP = ../../../../../.. +include $(TOP)/configs/current + +LIBNAME = inteldrm + +C_SOURCES = \ + intel_be_batchbuffer.c \ + intel_be_context.c \ + intel_be_device.c \ + intel_be_api.c + +LIBRARY_INCLUDES = $(shell pkg-config libdrm --cflags-only-I) + +LIBRARY_DEFINES = $(shell pkg-config libdrm --cflags-only-other) + +include ../../../../Makefile.template diff --git a/src/gallium/winsys/drm/intel/gem/intel_be_api.c b/src/gallium/winsys/drm/intel/gem/intel_be_api.c new file mode 100644 index 0000000000..6cffed5134 --- /dev/null +++ b/src/gallium/winsys/drm/intel/gem/intel_be_api.c @@ -0,0 +1,12 @@ + +#include "intel_be_api.h" + +struct drm_api drm_api_hocks = +{ + /* intel_be_context.c */ + .create_context = intel_be_create_context, + /* intel_be_screen.c */ + .create_screen = intel_be_create_screen, + .buffer_from_handle = intel_be_buffer_from_handle, + .handle_from_buffer = intel_be_handle_from_buffer, +}; diff --git a/src/gallium/winsys/drm/intel/gem/intel_be_api.h b/src/gallium/winsys/drm/intel/gem/intel_be_api.h new file mode 100644 index 0000000000..73e458d4ba --- /dev/null +++ b/src/gallium/winsys/drm/intel/gem/intel_be_api.h @@ -0,0 +1,14 @@ + +#ifndef _INTEL_BE_API_H_ +#define _INTEL_BE_API_H_ + +#include "pipe/p_compiler.h" + +#include "state_tracker/drm_api.h" + +#include "intel_be_device.h" + +struct pipe_screen *intel_be_create_screen(int drmFD, int pciID); +struct pipe_context *intel_be_create_context(struct pipe_screen *screen); + +#endif diff --git a/src/gallium/winsys/drm/intel/gem/intel_be_batchbuffer.c b/src/gallium/winsys/drm/intel/gem/intel_be_batchbuffer.c new file mode 100644 index 0000000000..d9556e1f38 --- /dev/null +++ b/src/gallium/winsys/drm/intel/gem/intel_be_batchbuffer.c @@ -0,0 +1,140 @@ + +#include "i915simple/i915_debug.h" +#include "intel_be_batchbuffer.h" +#include "intel_be_context.h" +#include "intel_be_device.h" +#include "intel_be_fence.h" +#include <errno.h> + +#include "util/u_memory.h" + +struct intel_be_batchbuffer * +intel_be_batchbuffer_alloc(struct intel_be_context *intel) +{ + struct intel_be_batchbuffer *batch = CALLOC_STRUCT(intel_be_batchbuffer); + + + batch->base.buffer = NULL; + batch->base.winsys = &intel->base; + batch->base.map = NULL; + batch->base.ptr = NULL; + batch->base.size = 0; + batch->base.actual_size = intel->device->max_batch_size; + batch->base.relocs = 0; + batch->base.max_relocs = INTEL_DEFAULT_RELOCS; + + batch->base.map = malloc(batch->base.actual_size); + memset(batch->base.map, 0, batch->base.actual_size); + + batch->base.ptr = batch->base.map; + + intel_be_batchbuffer_reset(batch); + + return batch; +} + +void +intel_be_batchbuffer_reset(struct intel_be_batchbuffer *batch) +{ + struct intel_be_context *intel = intel_be_context(batch->base.winsys); + struct intel_be_device *dev = intel->device; + + if (batch->bo) + drm_intel_bo_unreference(batch->bo); + + memset(batch->base.map, 0, batch->base.actual_size); + batch->base.ptr = batch->base.map; + batch->base.size = batch->base.actual_size - BATCH_RESERVED; + + batch->base.relocs = 0; + batch->base.max_relocs = INTEL_DEFAULT_RELOCS; + + batch->bo = drm_intel_bo_alloc(dev->pools.gem, + "gallium3d_batch_buffer", + batch->base.actual_size, 0); +} + +int +intel_be_offset_relocation(struct intel_be_batchbuffer *batch, + unsigned pre_add, + drm_intel_bo *bo, + uint32_t read_domains, + uint32_t write_domain) +{ + unsigned offset; + int ret = 0; + + assert(batch->base.relocs < batch->base.max_relocs); + + offset = (unsigned)(batch->base.ptr - batch->base.map); + + ret = drm_intel_bo_emit_reloc(batch->bo, offset, + bo, pre_add, + read_domains, + write_domain); + + ((uint32_t*)batch->base.ptr)[0] = bo->offset + pre_add; + batch->base.ptr += 4; + + if (!ret) + batch->base.relocs++; + + return ret; +} + +void +intel_be_batchbuffer_flush(struct intel_be_batchbuffer *batch, + struct intel_be_fence **fence) +{ + struct i915_batchbuffer *i915 = &batch->base; + unsigned used = 0; + int ret = 0; + + assert(i915_batchbuffer_space(i915) >= 0); + + used = batch->base.ptr - batch->base.map; + assert((used & 3) == 0); + + if (used & 4) { + i915_batchbuffer_dword(i915, (0x0<<29)|(0x4<<23)|(1<<0)); // MI_FLUSH | FLUSH_MAP_CACHE; + i915_batchbuffer_dword(i915, (0x0<<29)|(0x0<<23)); // MI_NOOP + i915_batchbuffer_dword(i915, (0x0<<29)|(0xA<<23)); // MI_BATCH_BUFFER_END; + } else { + i915_batchbuffer_dword(i915, (0x0<<29)|(0x4<<23)|(1<<0)); //MI_FLUSH | FLUSH_MAP_CACHE; + i915_batchbuffer_dword(i915, (0x0<<29)|(0xA<<23)); // MI_BATCH_BUFFER_END; + } + + used = batch->base.ptr - batch->base.map; + + drm_intel_bo_subdata(batch->bo, 0, used, batch->base.map); + ret = drm_intel_bo_exec(batch->bo, used, NULL, 0, 0); + + assert(ret == 0); + + intel_be_batchbuffer_reset(batch); + + if (fence) { + if (*fence) + intel_be_fence_unreference(*fence); + + (*fence) = CALLOC_STRUCT(intel_be_fence); + (*fence)->refcount = 1; + (*fence)->bo = NULL; + } +} + +void +intel_be_batchbuffer_finish(struct intel_be_batchbuffer *batch) +{ + +} + +void +intel_be_batchbuffer_free(struct intel_be_batchbuffer *batch) +{ + if (batch->bo) + drm_intel_bo_unreference(batch->bo); + + free(batch->base.map); + free(batch); +} diff --git a/src/gallium/winsys/drm/intel/common/intel_be_batchbuffer.h b/src/gallium/winsys/drm/intel/gem/intel_be_batchbuffer.h index f150e3a674..195bf8dee7 100644 --- a/src/gallium/winsys/drm/intel/common/intel_be_batchbuffer.h +++ b/src/gallium/winsys/drm/intel/gem/intel_be_batchbuffer.h @@ -4,7 +4,8 @@ #include "i915simple/i915_batch.h" -#include "ws_dri_bufmgr.h" +#include "drm.h" +#include "intel_bufmgr.h" #define BATCH_RESERVED 16 @@ -16,6 +17,7 @@ struct intel_be_context; struct intel_be_device; +struct intel_be_fence; struct intel_be_batchbuffer { @@ -24,24 +26,7 @@ struct intel_be_batchbuffer struct intel_be_context *intel; struct intel_be_device *device; - struct _DriBufferObject *buffer; - struct _DriFenceObject *last_fence; - uint32_t flags; - - struct _DriBufferList *list; - size_t list_count; - - uint32_t *reloc; - size_t reloc_size; - size_t nr_relocs; - - uint32_t dirty_state; - uint32_t id; - - uint32_t poolOffset; - uint8_t *drmBOVirtual; - struct _drmBONode *node; /* Validation list node for this buffer */ - int dest_location; /* Validation list sequence for this buffer */ + drm_intel_bo *bo; }; struct intel_be_batchbuffer * @@ -53,17 +38,18 @@ intel_be_batchbuffer_free(struct intel_be_batchbuffer *batch); void intel_be_batchbuffer_finish(struct intel_be_batchbuffer *batch); -struct _DriFenceObject * -intel_be_batchbuffer_flush(struct intel_be_batchbuffer *batch); +void +intel_be_batchbuffer_flush(struct intel_be_batchbuffer *batch, + struct intel_be_fence **fence); void intel_be_batchbuffer_reset(struct intel_be_batchbuffer *batch); -void +int intel_be_offset_relocation(struct intel_be_batchbuffer *batch, - unsigned pre_add, - struct _DriBufferObject *driBO, - uint64_t val_flags, - uint64_t val_mask); + unsigned pre_add, + drm_intel_bo *bo, + uint32_t read_domains, + uint32_t write_doman); #endif diff --git a/src/gallium/winsys/drm/intel/gem/intel_be_context.c b/src/gallium/winsys/drm/intel/gem/intel_be_context.c new file mode 100644 index 0000000000..bb6f1b916c --- /dev/null +++ b/src/gallium/winsys/drm/intel/gem/intel_be_context.c @@ -0,0 +1,118 @@ + +#include "pipe/p_screen.h" + +#include "intel_be_device.h" +#include "intel_be_context.h" +#include "intel_be_batchbuffer.h" + +#include "i915_drm.h" + +#include "intel_be_api.h" + +static struct i915_batchbuffer * +intel_be_batch_get(struct i915_winsys *sws) +{ + struct intel_be_context *intel = intel_be_context(sws); + return &intel->batch->base; +} + +static void +intel_be_batch_reloc(struct i915_winsys *sws, + struct pipe_buffer *buf, + unsigned access_flags, + unsigned delta) +{ + struct intel_be_context *intel = intel_be_context(sws); + drm_intel_bo *bo = intel_bo(buf); + int ret; + uint32_t read = 0; + uint32_t write = 0; + + if (access_flags & I915_BUFFER_ACCESS_WRITE) { + write = I915_GEM_DOMAIN_RENDER; + read = I915_GEM_DOMAIN_RENDER; + } + + if (access_flags & I915_BUFFER_ACCESS_READ) { + read |= I915_GEM_DOMAIN_VERTEX; + } + + ret = intel_be_offset_relocation(intel->batch, + delta, + bo, + read, + write); + assert(ret == 0); + + /* TODO change return type */ + /* return ret; */ +} + +static void +intel_be_batch_flush(struct i915_winsys *sws, + struct pipe_fence_handle **fence) +{ + struct intel_be_context *intel = intel_be_context(sws); + struct intel_be_fence **f = (struct intel_be_fence **)fence; + + if (fence && *fence) + assert(0); + + intel_be_batchbuffer_flush(intel->batch, f); +} + + +/* + * Misc functions. + */ + +static void +intel_be_destroy_context(struct i915_winsys *winsys) +{ + struct intel_be_context *intel = intel_be_context(winsys); + + intel_be_batchbuffer_free(intel->batch); + + free(intel); +} + +boolean +intel_be_init_context(struct intel_be_context *intel, struct intel_be_device *device) +{ + assert(intel); + assert(device); + intel->device = device; + + intel->base.batch_get = intel_be_batch_get; + intel->base.batch_reloc = intel_be_batch_reloc; + intel->base.batch_flush = intel_be_batch_flush; + + intel->base.destroy = intel_be_destroy_context; + + intel->batch = intel_be_batchbuffer_alloc(intel); + + return true; +} + +struct pipe_context * +intel_be_create_context(struct pipe_screen *screen) +{ + struct intel_be_context *intel; + struct pipe_context *pipe; + struct intel_be_device *device = intel_be_device(screen->winsys); + + intel = (struct intel_be_context *)malloc(sizeof(*intel)); + memset(intel, 0, sizeof(*intel)); + + intel_be_init_context(intel, device); + +#if 0 + pipe = intel_create_softpipe(intel, screen->winsys); +#else + pipe = i915_create_context(screen, &device->base, &intel->base); +#endif + + pipe->priv = intel; + + return pipe; +} diff --git a/src/gallium/winsys/drm/intel/gem/intel_be_context.h b/src/gallium/winsys/drm/intel/gem/intel_be_context.h new file mode 100644 index 0000000000..5a369669c0 --- /dev/null +++ b/src/gallium/winsys/drm/intel/gem/intel_be_context.h @@ -0,0 +1,31 @@ + +#ifndef INTEL_BE_CONTEXT_H +#define INTEL_BE_CONTEXT_H + +#include "i915simple/i915_winsys.h" + +struct intel_be_context +{ + /** Interface to i915simple driver */ + struct i915_winsys base; + + struct intel_be_device *device; + struct intel_be_batchbuffer *batch; +}; + +static INLINE struct intel_be_context * +intel_be_context(struct i915_winsys *sws) +{ + return (struct intel_be_context *)sws; +} + +/** + * Intialize a allocated intel_be_context struct. + * + * Remember to set the hardware_* functions. + */ +boolean +intel_be_init_context(struct intel_be_context *intel, + struct intel_be_device *device); + +#endif diff --git a/src/gallium/winsys/drm/intel/gem/intel_be_device.c b/src/gallium/winsys/drm/intel/gem/intel_be_device.c new file mode 100644 index 0000000000..a2163a1e6d --- /dev/null +++ b/src/gallium/winsys/drm/intel/gem/intel_be_device.c @@ -0,0 +1,283 @@ + +#include "intel_be_device.h" + +#include "pipe/internal/p_winsys_screen.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" +#include "pipe/p_inlines.h" +#include "util/u_memory.h" + +#include "intel_be_fence.h" + +#include "i915simple/i915_screen.h" + +#include "intel_be_api.h" + +/* + * Buffer + */ + +static void * +intel_be_buffer_map(struct pipe_winsys *winsys, + struct pipe_buffer *buf, + unsigned flags) +{ + drm_intel_bo *bo = intel_bo(buf); + int write = 0; + int ret; + + if (flags & PIPE_BUFFER_USAGE_CPU_WRITE) + write = 1; + + ret = drm_intel_bo_map(bo, write); + + if (ret) + return NULL; + + return bo->virtual; +} + +static void +intel_be_buffer_unmap(struct pipe_winsys *winsys, + struct pipe_buffer *buf) +{ + drm_intel_bo_unmap(intel_bo(buf)); +} + +static void +intel_be_buffer_destroy(struct pipe_winsys *winsys, + struct pipe_buffer *buf) +{ + drm_intel_bo_unreference(intel_bo(buf)); + free(buf); +} + +static struct pipe_buffer * +intel_be_buffer_create(struct pipe_winsys *winsys, + unsigned alignment, + unsigned usage, + unsigned size) +{ + struct intel_be_buffer *buffer = CALLOC_STRUCT(intel_be_buffer); + struct intel_be_device *dev = intel_be_device(winsys); + drm_intel_bufmgr *pool; + char *name; + + if (!buffer) + return NULL; + + buffer->base.refcount = 1; + buffer->base.alignment = alignment; + buffer->base.usage = usage; + buffer->base.size = size; + + if (usage & (PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_CONSTANT)) { + /* Local buffer */ + name = "gallium3d_local"; + pool = dev->pools.gem; + } else if (usage & PIPE_BUFFER_USAGE_CUSTOM) { + /* For vertex buffers */ + name = "gallium3d_internal_vertex"; + pool = dev->pools.gem; + } else { + /* Regular buffers */ + name = "gallium3d_regular"; + pool = dev->pools.gem; + } + + buffer->bo = drm_intel_bo_alloc(pool, name, size, alignment); + + if (!buffer->bo) + goto err; + + return &buffer->base; + +err: + free(buffer); + return NULL; +} + +static struct pipe_buffer * +intel_be_user_buffer_create(struct pipe_winsys *winsys, void *ptr, unsigned bytes) +{ + struct intel_be_buffer *buffer = CALLOC_STRUCT(intel_be_buffer); + struct intel_be_device *dev = intel_be_device(winsys); + int ret; + + if (!buffer) + return NULL; + + buffer->base.refcount = 1; + buffer->base.alignment = 0; + buffer->base.usage = 0; + buffer->base.size = bytes; + + buffer->bo = drm_intel_bo_alloc(dev->pools.gem, + "gallium3d_user_buffer", + bytes, 0); + + if (!buffer->bo) + goto err; + + ret = drm_intel_bo_subdata(buffer->bo, + 0, bytes, ptr); + + if (ret) + goto err; + + return &buffer->base; + +err: + free(buffer); + return NULL; +} + +struct pipe_buffer * +intel_be_buffer_from_handle(struct pipe_winsys *winsys, + const char* name, unsigned handle) +{ + struct intel_be_device *dev = intel_be_device(winsys); + struct intel_be_buffer *buffer = CALLOC_STRUCT(intel_be_buffer); + + if (!buffer) + return NULL; + + buffer->bo = drm_intel_bo_gem_create_from_name(dev->pools.gem, name, handle); + + if (!buffer->bo) + goto err; + + buffer->base.refcount = 1; + buffer->base.alignment = buffer->bo->align; + buffer->base.usage = PIPE_BUFFER_USAGE_GPU_READ | + PIPE_BUFFER_USAGE_GPU_WRITE | + PIPE_BUFFER_USAGE_CPU_READ | + PIPE_BUFFER_USAGE_CPU_WRITE; + buffer->base.size = buffer->bo->size; + + return &buffer->base; + +err: + free(buffer); + return NULL; +} + +unsigned +intel_be_handle_from_buffer(struct pipe_winsys *winsys, + struct pipe_buffer *buf) +{ + drm_intel_bo *bo = intel_bo(buf); + return bo->handle; +} + +/* + * Fence + */ + +static void +intel_be_fence_refunref(struct pipe_winsys *sws, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ + struct intel_be_fence **p = (struct intel_be_fence **)ptr; + struct intel_be_fence *f = (struct intel_be_fence *)fence; + + assert(p); + + if (f) + intel_be_fence_reference(f); + + if (*p) + intel_be_fence_unreference(*p); + + *p = f; +} + +static int +intel_be_fence_signalled(struct pipe_winsys *sws, + struct pipe_fence_handle *fence, + unsigned flag) +{ + assert(0); + + return 0; +} + +static int +intel_be_fence_finish(struct pipe_winsys *sws, + struct pipe_fence_handle *fence, + unsigned flag) +{ + struct intel_be_fence *f = (struct intel_be_fence *)fence; + + /* fence already expired */ + if (!f->bo) + return 0; + + drm_intel_bo_wait_rendering(f->bo); + drm_intel_bo_unreference(f->bo); + f->bo = NULL; + + return 0; +} + +/* + * Misc functions + */ + +static void +intel_be_destroy_winsys(struct pipe_winsys *winsys) +{ + struct intel_be_device *dev = intel_be_device(winsys); + + drm_intel_bufmgr_destroy(dev->pools.gem); + + free(dev); +} + +boolean +intel_be_init_device(struct intel_be_device *dev, int fd, unsigned id) +{ + dev->fd = fd; + dev->id = id; + dev->max_batch_size = 16 * 4096; + dev->max_vertex_size = 128 * 4096; + + dev->base.buffer_create = intel_be_buffer_create; + dev->base.user_buffer_create = intel_be_user_buffer_create; + dev->base.buffer_map = intel_be_buffer_map; + dev->base.buffer_unmap = intel_be_buffer_unmap; + dev->base.buffer_destroy = intel_be_buffer_destroy; + + /* Not used anymore */ + dev->base.surface_buffer_create = NULL; + + dev->base.fence_reference = intel_be_fence_refunref; + dev->base.fence_signalled = intel_be_fence_signalled; + dev->base.fence_finish = intel_be_fence_finish; + + dev->base.destroy = intel_be_destroy_winsys; + + dev->pools.gem = drm_intel_bufmgr_gem_init(dev->fd, dev->max_batch_size); + + return true; +} + +struct pipe_screen * +intel_be_create_screen(int drmFD, int deviceID) +{ + struct intel_be_device *dev; + struct pipe_screen *screen; + + /* Allocate the private area */ + dev = malloc(sizeof(*dev)); + if (!dev) + return NULL; + memset(dev, 0, sizeof(*dev)); + + intel_be_init_device(dev, drmFD, deviceID); + + screen = i915_create_screen(&dev->base, deviceID); + + return screen; +} diff --git a/src/gallium/winsys/drm/intel/gem/intel_be_device.h b/src/gallium/winsys/drm/intel/gem/intel_be_device.h new file mode 100644 index 0000000000..c4837e65fa --- /dev/null +++ b/src/gallium/winsys/drm/intel/gem/intel_be_device.h @@ -0,0 +1,79 @@ + +#ifndef INTEL_DRM_DEVICE_H +#define INTEL_DRM_DEVICE_H + +#include "pipe/internal/p_winsys_screen.h" +#include "pipe/p_context.h" + +#include "drm.h" +#include "intel_bufmgr.h" + +/* + * Device + */ + +struct intel_be_device +{ + struct pipe_winsys base; + + int fd; /**< Drm file discriptor */ + + unsigned id; + + size_t max_batch_size; + size_t max_vertex_size; + + struct { + drm_intel_bufmgr *gem; + } pools; +}; + +static INLINE struct intel_be_device * +intel_be_device(struct pipe_winsys *winsys) +{ + return (struct intel_be_device *)winsys; +} + +boolean +intel_be_init_device(struct intel_be_device *device, int fd, unsigned id); + +/* + * Buffer + */ + +struct intel_be_buffer { + struct pipe_buffer base; + drm_intel_bo *bo; +}; + +/** + * Create a be buffer from a drm bo handle. + * + * Takes a reference. + */ +struct pipe_buffer * +intel_be_buffer_from_handle(struct pipe_winsys *winsys, + const char* name, unsigned handle); + +/** + * Gets a handle from a buffer. + * + * If buffer is destroyed handle may become invalid. + */ +unsigned +intel_be_handle_from_buffer(struct pipe_winsys *winsys, + struct pipe_buffer *buffer); + +static INLINE struct intel_be_buffer * +intel_be_buffer(struct pipe_buffer *buf) +{ + return (struct intel_be_buffer *)buf; +} + +static INLINE drm_intel_bo * +intel_bo(struct pipe_buffer *buf) +{ + return intel_be_buffer(buf)->bo; +} + +#endif diff --git a/src/gallium/winsys/drm/intel/gem/intel_be_fence.h b/src/gallium/winsys/drm/intel/gem/intel_be_fence.h new file mode 100644 index 0000000000..0fe18f66f8 --- /dev/null +++ b/src/gallium/winsys/drm/intel/gem/intel_be_fence.h @@ -0,0 +1,38 @@ + +#ifndef INTEL_BE_FENCE_H +#define INTEL_BE_FENCE_H + +#include "pipe/p_defines.h" + +#include "drm.h" +#include "intel_bufmgr.h" + +/** + * Because gem does not have fence's we have to create our own fences. + * + * They work by keeping the batchbuffer around and checking if that has + * been idled. If bo is NULL fence has expired. + */ +struct intel_be_fence +{ + uint32_t refcount; + drm_intel_bo *bo; +}; + +static INLINE void +intel_be_fence_reference(struct intel_be_fence *f) +{ + f->refcount++; +} + +static INLINE void +intel_be_fence_unreference(struct intel_be_fence *f) +{ + if (!--f->refcount) { + if (f->bo) + drm_intel_bo_unreference(f->bo); + free(f); + } +} + +#endif diff --git a/src/gallium/winsys/drm/nouveau/Makefile b/src/gallium/winsys/drm/nouveau/Makefile new file mode 100644 index 0000000000..652cf7146c --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/Makefile @@ -0,0 +1,12 @@ +# src/gallium/winsys/drm/nouveau/Makefile +TOP = ../../../../.. +include $(TOP)/configs/current + +SUBDIRS = common dri + +default install clean: + @for dir in $(SUBDIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE) $@) || exit 1; \ + fi \ + done diff --git a/src/gallium/winsys/drm/nouveau/common/Makefile b/src/gallium/winsys/drm/nouveau/common/Makefile new file mode 100644 index 0000000000..f675f7caf1 --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/common/Makefile @@ -0,0 +1,22 @@ +TOP = ../../../../../.. +include $(TOP)/configs/current + +LIBNAME = nouveaudrm + +C_SOURCES = \ + nouveau_context.c \ + nouveau_lock.c \ + nouveau_screen.c \ + nouveau_winsys.c \ + nouveau_winsys_pipe.c \ + nouveau_winsys_softpipe.c + +LIBRARY_INCLUDES = $(shell pkg-config libdrm libdrm_nouveau --cflags-only-I) + +LIBRARY_DEFINES = $(shell pkg-config libdrm --cflags-only-other \ + && pkg-config libdrm --atleast-version=2.3.1 \ + && pkg-config libdrm_nouveau --exact-version=0.5 \ + && pkg-config libdrm_nouveau --cflags-only-other \ + && echo "-DDRM_VBLANK_FLIP=DRM_VBLANK_FLIP") + +include ../../../../Makefile.template diff --git a/src/gallium/winsys/drm/nouveau/common/nouveau_context.c b/src/gallium/winsys/drm/nouveau/common/nouveau_context.c new file mode 100644 index 0000000000..d6ae0827cd --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/common/nouveau_context.c @@ -0,0 +1,206 @@ +#include <pipe/p_defines.h> +#include <pipe/p_context.h> +#include <pipe/p_screen.h> +#include <util/u_memory.h> +#include "nouveau_context.h" +#include "nouveau_dri.h" +#include "nouveau_local.h" +#include "nouveau_screen.h" +#include "nouveau_winsys_pipe.h" + +static void +nouveau_channel_context_destroy(struct nouveau_channel_context *nvc) +{ + nouveau_channel_free(&nvc->channel); + + FREE(nvc); +} + +static struct nouveau_channel_context * +nouveau_channel_context_create(struct nouveau_device *dev) +{ + struct nouveau_channel_context *nvc; + int ret; + + nvc = CALLOC_STRUCT(nouveau_channel_context); + if (!nvc) + return NULL; + + if ((ret = nouveau_channel_alloc(dev, 0x8003d001, 0x8003d002, + &nvc->channel))) { + NOUVEAU_ERR("Error creating GPU channel: %d\n", ret); + nouveau_channel_context_destroy(nvc); + return NULL; + } + + nvc->next_handle = 0x77000000; + return nvc; +} + +int +nouveau_context_init(struct nouveau_screen *nv_screen, + drm_context_t hHWContext, drmLock *sarea_lock, + struct nouveau_context *nv_share, + struct nouveau_context *nv) +{ + struct pipe_context *pipe = NULL; + struct nouveau_channel_context *nvc = NULL; + struct nouveau_device *dev = nv_screen->device; + int i; + + switch (dev->chipset & 0xf0) { + case 0x00: + /* NV04 */ + case 0x10: + case 0x20: + /* NV10 */ + case 0x30: + /* NV30 */ + case 0x40: + case 0x60: + /* NV40 */ + case 0x50: + case 0x80: + case 0x90: + /* G80 */ + break; + default: + NOUVEAU_ERR("Unsupported chipset: NV%02x\n", dev->chipset); + return 1; + } + + nv->nv_screen = nv_screen; + + { + struct nouveau_device_priv *nvdev = nouveau_device(dev); + + nvdev->ctx = hHWContext; + nvdev->lock = sarea_lock; + } + + /* Attempt to share a single channel between multiple contexts from + * a single process. + */ + nvc = nv_screen->nvc; + if (!nvc && nv_share) + nvc = nv_share->nvc; + + /*XXX: temporary - disable multi-context/single-channel on pre-NV4x */ + switch (dev->chipset & 0xf0) { + case 0x40: + case 0x60: + /* NV40 class */ + case 0x50: + case 0x80: + case 0x90: + /* G80 class */ + break; + default: + nvc = NULL; + break; + } + + if (!nvc) { + nvc = nouveau_channel_context_create(dev); + if (!nvc) { + NOUVEAU_ERR("Failed initialising GPU context\n"); + return 1; + } + nv_screen->nvc = nvc; + } + + nvc->refcount++; + nv->nvc = nvc; + + /* Find a free slot for a pipe context, allocate a new one if needed */ + nv->pctx_id = -1; + for (i = 0; i < nvc->nr_pctx; i++) { + if (nvc->pctx[i] == NULL) { + nv->pctx_id = i; + break; + } + } + + if (nv->pctx_id < 0) { + nv->pctx_id = nvc->nr_pctx++; + nvc->pctx = + realloc(nvc->pctx, + sizeof(struct pipe_context *) * nvc->nr_pctx); + } + + /* Create pipe */ + if (!getenv("NOUVEAU_FORCE_SOFTPIPE")) { + struct pipe_screen *pscreen; + + pipe = nouveau_pipe_create(nv); + if (!pipe) + NOUVEAU_ERR("Couldn't create hw pipe\n"); + pscreen = nvc->pscreen; + + nv->cap.hw_vertex_buffer = + pscreen->get_param(pscreen, NOUVEAU_CAP_HW_VTXBUF); + nv->cap.hw_index_buffer = + pscreen->get_param(pscreen, NOUVEAU_CAP_HW_IDXBUF); + } + + if (!pipe) { + NOUVEAU_MSG("Using softpipe\n"); + pipe = nouveau_create_softpipe(nv); + if (!pipe) { + NOUVEAU_ERR("Error creating pipe, bailing\n"); + return 1; + } + } + + { + struct pipe_texture *fb_tex; + struct pipe_surface *fb_surf; + struct nouveau_pipe_buffer *fb_buf; + enum pipe_format format; + + fb_buf = calloc(1, sizeof(struct nouveau_pipe_buffer)); + fb_buf->base.refcount = 1; + fb_buf->base.usage = PIPE_BUFFER_USAGE_PIXEL; + + nouveau_bo_fake(dev, nv_screen->front_offset, NOUVEAU_BO_VRAM, + nv_screen->front_pitch*nv_screen->front_height, + NULL, &fb_buf->bo); + + if (nv_screen->front_cpp == 4) + format = PIPE_FORMAT_A8R8G8B8_UNORM; + else + format = PIPE_FORMAT_R5G6B5_UNORM; + + fb_surf = nouveau_surface_buffer_ref(nv, &fb_buf->base, format, + nv_screen->front_pitch / + nv_screen->front_cpp, + nv_screen->front_height, + nv_screen->front_pitch, + &fb_tex); + + nv->frontbuffer = fb_surf; + nv->frontbuffer_texture = fb_tex; + } + + pipe->priv = nv; + return 0; +} + +void +nouveau_context_cleanup(struct nouveau_context *nv) +{ + struct nouveau_channel_context *nvc = nv->nvc; + + assert(nv); + + if (nv->pctx_id >= 0) { + nvc->pctx[nv->pctx_id] = NULL; + if (--nvc->refcount <= 0) { + nouveau_channel_context_destroy(nvc); + nv->nv_screen->nvc = NULL; + } + } + + /* XXX: Who cleans up the pipe? */ +} + diff --git a/src/gallium/winsys/drm/nouveau/common/nouveau_context.h b/src/gallium/winsys/drm/nouveau/common/nouveau_context.h new file mode 100644 index 0000000000..02d2745680 --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/common/nouveau_context.h @@ -0,0 +1,59 @@ +#ifndef __NOUVEAU_CONTEXT_H__ +#define __NOUVEAU_CONTEXT_H__ + +#include "nouveau/nouveau_winsys.h" +#include "nouveau_drmif.h" +#include "nouveau_device.h" +#include "nouveau_channel.h" +#include "nouveau_pushbuf.h" +#include "nouveau_bo.h" +#include "nouveau_grobj.h" +#include "nouveau_notifier.h" +#include "nouveau_class.h" +#include "nouveau_local.h" + +struct nouveau_channel_context { + struct pipe_screen *pscreen; + int refcount; + + unsigned cur_pctx; + unsigned nr_pctx; + struct pipe_context **pctx; + + struct nouveau_channel *channel; + unsigned next_handle; +}; + +struct nouveau_context { + int locked; + struct nouveau_screen *nv_screen; + struct pipe_surface *frontbuffer; + struct pipe_texture *frontbuffer_texture; + + struct { + int hw_vertex_buffer; + int hw_index_buffer; + } cap; + + /* Hardware context */ + struct nouveau_channel_context *nvc; + int pctx_id; +}; + +extern int nouveau_context_init(struct nouveau_screen *nv_screen, + drm_context_t hHWContext, drmLock *sarea_lock, + struct nouveau_context *nv_share, + struct nouveau_context *nv); +extern void nouveau_context_cleanup(struct nouveau_context *nv); + +extern void LOCK_HARDWARE(struct nouveau_context *); +extern void UNLOCK_HARDWARE(struct nouveau_context *); + +extern uint32_t *nouveau_pipe_dma_beginp(struct nouveau_grobj *, int, int); +extern void nouveau_pipe_dma_kickoff(struct nouveau_channel *); + +/* Must be provided by clients of common code */ +extern void +nouveau_contended_lock(struct nouveau_context *nv); + +#endif diff --git a/src/gallium/winsys/drm/nouveau/common/nouveau_dri.h b/src/gallium/winsys/drm/nouveau/common/nouveau_dri.h new file mode 100644 index 0000000000..1207c2d609 --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/common/nouveau_dri.h @@ -0,0 +1,28 @@ +#ifndef _NOUVEAU_DRI_ +#define _NOUVEAU_DRI_ + +#include "xf86drm.h" +#include "drm.h" +#include "nouveau_drm.h" + +struct nouveau_dri { + uint32_t device_id; /**< \brief PCI device ID */ + uint32_t width; /**< \brief width in pixels of display */ + uint32_t height; /**< \brief height in scanlines of display */ + uint32_t depth; /**< \brief depth of display (8, 15, 16, 24) */ + uint32_t bpp; /**< \brief bit depth of display (8, 16, 24, 32) */ + + uint32_t bus_type; /**< \brief ths bus type */ + uint32_t bus_mode; /**< \brief bus mode (used for AGP, maybe also for PCI-E ?) */ + + uint32_t front_offset; /**< \brief front buffer offset */ + uint32_t front_pitch; /**< \brief front buffer pitch */ + uint32_t back_offset; /**< \brief private back buffer offset */ + uint32_t back_pitch; /**< \brief private back buffer pitch */ + uint32_t depth_offset; /**< \brief private depth buffer offset */ + uint32_t depth_pitch; /**< \brief private depth buffer pitch */ + +}; + +#endif + diff --git a/src/gallium/winsys/drm/nouveau/common/nouveau_local.h b/src/gallium/winsys/drm/nouveau/common/nouveau_local.h new file mode 100644 index 0000000000..11175bce7a --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/common/nouveau_local.h @@ -0,0 +1,19 @@ +#ifndef __NOUVEAU_LOCAL_H__ +#define __NOUVEAU_LOCAL_H__ + +#include "pipe/p_compiler.h" +#include "nouveau_winsys_pipe.h" +#include <stdio.h> + +/* Debug output */ +#define NOUVEAU_MSG(fmt, args...) do { \ + fprintf(stdout, "nouveau: "fmt, ##args); \ + fflush(stdout); \ +} while(0) + +#define NOUVEAU_ERR(fmt, args...) do { \ + fprintf(stderr, "%s:%d - "fmt, __func__, __LINE__, ##args); \ + fflush(stderr); \ +} while(0) + +#endif diff --git a/src/gallium/winsys/drm/nouveau/common/nouveau_lock.c b/src/gallium/winsys/drm/nouveau/common/nouveau_lock.c new file mode 100644 index 0000000000..e8cf051ed9 --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/common/nouveau_lock.c @@ -0,0 +1,72 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include <pipe/p_thread.h> +#include "nouveau_context.h" +#include "nouveau_screen.h" + +pipe_static_mutex(lockMutex); + +/* Lock the hardware and validate our state. + */ +void +LOCK_HARDWARE(struct nouveau_context *nv) +{ + struct nouveau_screen *nv_screen = nv->nv_screen; + struct nouveau_device *dev = nv_screen->device; + struct nouveau_device_priv *nvdev = nouveau_device(dev); + char __ret=0; + + assert(!nv->locked); + pipe_mutex_lock(lockMutex); + + DRM_CAS(nvdev->lock, nvdev->ctx, + (DRM_LOCK_HELD | nvdev->ctx), __ret); + + if (__ret) { + drmGetLock(nvdev->fd, nvdev->ctx, 0); + nouveau_contended_lock(nv); + } + nv->locked = 1; +} + +/* Unlock the hardware using the global current context + */ +void +UNLOCK_HARDWARE(struct nouveau_context *nv) +{ + struct nouveau_screen *nv_screen = nv->nv_screen; + struct nouveau_device *dev = nv_screen->device; + struct nouveau_device_priv *nvdev = nouveau_device(dev); + + assert(nv->locked); + nv->locked = 0; + + DRM_UNLOCK(nvdev->fd, nvdev->lock, nvdev->ctx); + + pipe_mutex_unlock(lockMutex); +} diff --git a/src/gallium/winsys/drm/nouveau/common/nouveau_screen.c b/src/gallium/winsys/drm/nouveau/common/nouveau_screen.c new file mode 100644 index 0000000000..422fbf0207 --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/common/nouveau_screen.c @@ -0,0 +1,31 @@ +#include <util/u_memory.h> +#include "nouveau_dri.h" +#include "nouveau_local.h" +#include "nouveau_screen.h" + +int +nouveau_screen_init(struct nouveau_dri *nv_dri, int dev_fd, + struct nouveau_screen *nv_screen) +{ + int ret; + + ret = nouveau_device_open_existing(&nv_screen->device, 0, + dev_fd, 0); + if (ret) { + NOUVEAU_ERR("Failed opening nouveau device: %d\n", ret); + return 1; + } + + nv_screen->front_offset = nv_dri->front_offset; + nv_screen->front_pitch = nv_dri->front_pitch * (nv_dri->bpp / 8); + nv_screen->front_cpp = nv_dri->bpp / 8; + nv_screen->front_height = nv_dri->height; + + return 0; +} + +void +nouveau_screen_cleanup(struct nouveau_screen *nv_screen) +{ + nouveau_device_close(&nv_screen->device); +} diff --git a/src/gallium/winsys/drm/nouveau/common/nouveau_screen.h b/src/gallium/winsys/drm/nouveau/common/nouveau_screen.h new file mode 100644 index 0000000000..3e68e219d8 --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/common/nouveau_screen.h @@ -0,0 +1,27 @@ +#ifndef __NOUVEAU_SCREEN_H__ +#define __NOUVEAU_SCREEN_H__ + +#include <stdint.h> + +struct nouveau_device; +struct nouveau_dri; + +struct nouveau_screen { + struct nouveau_device *device; + + uint32_t front_offset; + uint32_t front_pitch; + uint32_t front_cpp; + uint32_t front_height; + + void *nvc; +}; + +int +nouveau_screen_init(struct nouveau_dri *nv_dri, int dev_fd, + struct nouveau_screen *nv_screen); + +void +nouveau_screen_cleanup(struct nouveau_screen *nv_screen); + +#endif diff --git a/src/gallium/winsys/drm/nouveau/common/nouveau_winsys.c b/src/gallium/winsys/drm/nouveau/common/nouveau_winsys.c new file mode 100644 index 0000000000..b6199f8e6d --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/common/nouveau_winsys.c @@ -0,0 +1,141 @@ +#include "util/u_memory.h" + +#include "nouveau_context.h" +#include "nouveau_screen.h" +#include "nouveau_winsys_pipe.h" + +#include "nouveau/nouveau_winsys.h" + +static int +nouveau_pipe_notifier_alloc(struct nouveau_winsys *nvws, int count, + struct nouveau_notifier **notify) +{ + struct nouveau_context *nv = nvws->nv; + + return nouveau_notifier_alloc(nv->nvc->channel, nv->nvc->next_handle++, + count, notify); +} + +static int +nouveau_pipe_grobj_alloc(struct nouveau_winsys *nvws, int grclass, + struct nouveau_grobj **grobj) +{ + struct nouveau_context *nv = nvws->nv; + struct nouveau_channel *chan = nv->nvc->channel; + int ret; + + ret = nouveau_grobj_alloc(chan, nv->nvc->next_handle++, + grclass, grobj); + if (ret) + return ret; + + BEGIN_RING(chan, *grobj, 0x0000, 1); + OUT_RING (chan, (*grobj)->handle); + (*grobj)->bound = NOUVEAU_GROBJ_BOUND_EXPLICIT; + return 0; +} + +static int +nouveau_pipe_push_reloc(struct nouveau_winsys *nvws, void *ptr, + struct pipe_buffer *buf, uint32_t data, + uint32_t flags, uint32_t vor, uint32_t tor) +{ + struct nouveau_bo *bo = nouveau_pipe_buffer(buf)->bo; + + return nouveau_pushbuf_emit_reloc(nvws->channel, ptr, bo, + data, flags, vor, tor); +} + +static int +nouveau_pipe_push_flush(struct nouveau_winsys *nvws, unsigned size, + struct pipe_fence_handle **fence) +{ + if (fence) + *fence = NULL; + + return nouveau_pushbuf_flush(nvws->channel, size); +} + +static struct nouveau_bo * +nouveau_pipe_get_bo(struct pipe_buffer *pb) +{ + return nouveau_pipe_buffer(pb)->bo; +} + +struct pipe_context * +nouveau_pipe_create(struct nouveau_context *nv) +{ + struct nouveau_channel_context *nvc = nv->nvc; + struct nouveau_winsys *nvws = CALLOC_STRUCT(nouveau_winsys); + struct pipe_screen *(*hws_create)(struct pipe_winsys *, + struct nouveau_winsys *); + struct pipe_context *(*hw_create)(struct pipe_screen *, unsigned); + struct pipe_winsys *ws; + unsigned chipset = nv->nv_screen->device->chipset; + + if (!nvws) + return NULL; + + switch (chipset & 0xf0) { + case 0x00: + hws_create = nv04_screen_create; + hw_create = nv04_create; + break; + case 0x10: + hws_create = nv10_screen_create; + hw_create = nv10_create; + break; + case 0x20: + hws_create = nv20_screen_create; + hw_create = nv20_create; + break; + case 0x30: + hws_create = nv30_screen_create; + hw_create = nv30_create; + break; + case 0x40: + case 0x60: + hws_create = nv40_screen_create; + hw_create = nv40_create; + break; + case 0x50: + case 0x80: + case 0x90: + hws_create = nv50_screen_create; + hw_create = nv50_create; + break; + default: + NOUVEAU_ERR("Unknown chipset NV%02x\n", chipset); + return NULL; + } + + nvws->nv = nv; + nvws->channel = nv->nvc->channel; + + nvws->res_init = nouveau_resource_init; + nvws->res_alloc = nouveau_resource_alloc; + nvws->res_free = nouveau_resource_free; + + nvws->push_reloc = nouveau_pipe_push_reloc; + nvws->push_flush = nouveau_pipe_push_flush; + + nvws->grobj_alloc = nouveau_pipe_grobj_alloc; + nvws->grobj_free = nouveau_grobj_free; + + nvws->notifier_alloc = nouveau_pipe_notifier_alloc; + nvws->notifier_free = nouveau_notifier_free; + nvws->notifier_reset = nouveau_notifier_reset; + nvws->notifier_status = nouveau_notifier_status; + nvws->notifier_retval = nouveau_notifier_return_val; + nvws->notifier_wait = nouveau_notifier_wait_status; + + nvws->get_bo = nouveau_pipe_get_bo; + + ws = nouveau_create_pipe_winsys(nv); + + if (!nvc->pscreen) + nvc->pscreen = hws_create(ws, nvws); + nvc->pctx[nv->pctx_id] = hw_create(nvc->pscreen, nv->pctx_id); + return nvc->pctx[nv->pctx_id]; +} + diff --git a/src/gallium/winsys/drm/nouveau/common/nouveau_winsys_pipe.c b/src/gallium/winsys/drm/nouveau/common/nouveau_winsys_pipe.c new file mode 100644 index 0000000000..e3ee985afc --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/common/nouveau_winsys_pipe.c @@ -0,0 +1,245 @@ +#include "pipe/internal/p_winsys_screen.h" +#include <pipe/p_defines.h> +#include <pipe/p_inlines.h> +#include <util/u_memory.h> +#include "nouveau_context.h" +#include "nouveau_local.h" +#include "nouveau_screen.h" +#include "nouveau_winsys_pipe.h" + +static const char * +nouveau_get_name(struct pipe_winsys *pws) +{ + return "Nouveau/DRI"; +} + +static uint32_t +nouveau_flags_from_usage(struct nouveau_context *nv, unsigned usage) +{ + struct nouveau_device *dev = nv->nv_screen->device; + uint32_t flags = NOUVEAU_BO_LOCAL; + + if (usage & NOUVEAU_BUFFER_USAGE_TRANSFER) + flags |= NOUVEAU_BO_GART; + + if (usage & PIPE_BUFFER_USAGE_PIXEL) { + if (usage & NOUVEAU_BUFFER_USAGE_TEXTURE) + flags |= NOUVEAU_BO_GART; + if (!(usage & PIPE_BUFFER_USAGE_CPU_READ_WRITE)) + flags |= NOUVEAU_BO_VRAM; + + switch (dev->chipset & 0xf0) { + case 0x50: + case 0x80: + case 0x90: + flags |= NOUVEAU_BO_TILED; + if (usage & NOUVEAU_BUFFER_USAGE_ZETA) + flags |= NOUVEAU_BO_ZTILE; + break; + default: + break; + } + } + + if (usage & PIPE_BUFFER_USAGE_VERTEX) { + if (nv->cap.hw_vertex_buffer) + flags |= NOUVEAU_BO_GART; + } + + if (usage & PIPE_BUFFER_USAGE_INDEX) { + if (nv->cap.hw_index_buffer) + flags |= NOUVEAU_BO_GART; + } + + return flags; +} + +static struct pipe_buffer * +nouveau_pipe_bo_create(struct pipe_winsys *pws, unsigned alignment, + unsigned usage, unsigned size) +{ + struct nouveau_pipe_winsys *nvpws = (struct nouveau_pipe_winsys *)pws; + struct nouveau_context *nv = nvpws->nv; + struct nouveau_device *dev = nv->nv_screen->device; + struct nouveau_pipe_buffer *nvbuf; + uint32_t flags; + + nvbuf = CALLOC_STRUCT(nouveau_pipe_buffer); + if (!nvbuf) + return NULL; + nvbuf->base.refcount = 1; + nvbuf->base.alignment = alignment; + nvbuf->base.usage = usage; + nvbuf->base.size = size; + + flags = nouveau_flags_from_usage(nv, usage); + + if (nouveau_bo_new(dev, flags, alignment, size, &nvbuf->bo)) { + FREE(nvbuf); + return NULL; + } + + return &nvbuf->base; +} + +static struct pipe_buffer * +nouveau_pipe_bo_user_create(struct pipe_winsys *pws, void *ptr, unsigned bytes) +{ + struct nouveau_pipe_winsys *nvpws = (struct nouveau_pipe_winsys *)pws; + struct nouveau_device *dev = nvpws->nv->nv_screen->device; + struct nouveau_pipe_buffer *nvbuf; + + nvbuf = CALLOC_STRUCT(nouveau_pipe_buffer); + if (!nvbuf) + return NULL; + nvbuf->base.refcount = 1; + nvbuf->base.size = bytes; + + if (nouveau_bo_user(dev, ptr, bytes, &nvbuf->bo)) { + FREE(nvbuf); + return NULL; + } + + return &nvbuf->base; +} + +static void +nouveau_pipe_bo_del(struct pipe_winsys *ws, struct pipe_buffer *buf) +{ + struct nouveau_pipe_buffer *nvbuf = nouveau_pipe_buffer(buf); + + nouveau_bo_ref(NULL, &nvbuf->bo); + FREE(nvbuf); +} + +static void * +nouveau_pipe_bo_map(struct pipe_winsys *pws, struct pipe_buffer *buf, + unsigned flags) +{ + struct nouveau_pipe_buffer *nvbuf = nouveau_pipe_buffer(buf); + uint32_t map_flags = 0; + + if (flags & PIPE_BUFFER_USAGE_CPU_READ) + map_flags |= NOUVEAU_BO_RD; + if (flags & PIPE_BUFFER_USAGE_CPU_WRITE) + map_flags |= NOUVEAU_BO_WR; + +#if 0 + if (flags & PIPE_BUFFER_USAGE_DISCARD && + !(flags & PIPE_BUFFER_USAGE_CPU_READ) && + nouveau_bo_busy(nvbuf->bo, map_flags)) { + struct nouveau_pipe_winsys *nvpws = (struct nouveau_pipe_winsys *)pws; + struct nouveau_context *nv = nvpws->nv; + struct nouveau_device *dev = nv->nv_screen->device; + struct nouveau_bo *rename; + uint32_t flags = nouveau_flags_from_usage(nv, buf->usage); + + if (!nouveau_bo_new(dev, flags, buf->alignment, buf->size, &rename)) { + nouveau_bo_ref(NULL, &nvbuf->bo); + nvbuf->bo = rename; + } + } +#endif + + if (nouveau_bo_map(nvbuf->bo, map_flags)) + return NULL; + return nvbuf->bo->map; +} + +static void +nouveau_pipe_bo_unmap(struct pipe_winsys *pws, struct pipe_buffer *buf) +{ + struct nouveau_pipe_buffer *nvbuf = nouveau_pipe_buffer(buf); + + nouveau_bo_unmap(nvbuf->bo); +} + +static void +nouveau_pipe_fence_reference(struct pipe_winsys *ws, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *pfence) +{ + *ptr = pfence; +} + +static int +nouveau_pipe_fence_signalled(struct pipe_winsys *ws, + struct pipe_fence_handle *pfence, unsigned flag) +{ + return 0; +} + +static int +nouveau_pipe_fence_finish(struct pipe_winsys *ws, + struct pipe_fence_handle *pfence, unsigned flag) +{ + return 0; +} + +struct pipe_surface * +nouveau_surface_buffer_ref(struct nouveau_context *nv, struct pipe_buffer *pb, + enum pipe_format format, int w, int h, + unsigned pitch, struct pipe_texture **ppt) +{ + struct pipe_screen *pscreen = nv->nvc->pscreen; + struct pipe_texture tmpl, *pt; + struct pipe_surface *ps; + + memset(&tmpl, 0, sizeof(tmpl)); + tmpl.tex_usage = PIPE_TEXTURE_USAGE_DISPLAY_TARGET | + NOUVEAU_TEXTURE_USAGE_LINEAR; + tmpl.target = PIPE_TEXTURE_2D; + tmpl.width[0] = w; + tmpl.height[0] = h; + tmpl.depth[0] = 1; + tmpl.format = format; + pf_get_block(tmpl.format, &tmpl.block); + tmpl.nblocksx[0] = pf_get_nblocksx(&tmpl.block, w); + tmpl.nblocksy[0] = pf_get_nblocksy(&tmpl.block, h); + + pt = pscreen->texture_blanket(pscreen, &tmpl, &pitch, pb); + if (!pt) + return NULL; + + ps = pscreen->get_tex_surface(pscreen, pt, 0, 0, 0, + PIPE_BUFFER_USAGE_GPU_WRITE); + + *ppt = pt; + return ps; +} + +static void +nouveau_destroy(struct pipe_winsys *pws) +{ + FREE(pws); +} + +struct pipe_winsys * +nouveau_create_pipe_winsys(struct nouveau_context *nv) +{ + struct nouveau_pipe_winsys *nvpws; + struct pipe_winsys *pws; + + nvpws = CALLOC_STRUCT(nouveau_pipe_winsys); + if (!nvpws) + return NULL; + nvpws->nv = nv; + pws = &nvpws->pws; + + pws->flush_frontbuffer = nouveau_flush_frontbuffer; + + pws->buffer_create = nouveau_pipe_bo_create; + pws->buffer_destroy = nouveau_pipe_bo_del; + pws->user_buffer_create = nouveau_pipe_bo_user_create; + pws->buffer_map = nouveau_pipe_bo_map; + pws->buffer_unmap = nouveau_pipe_bo_unmap; + + pws->fence_reference = nouveau_pipe_fence_reference; + pws->fence_signalled = nouveau_pipe_fence_signalled; + pws->fence_finish = nouveau_pipe_fence_finish; + + pws->get_name = nouveau_get_name; + pws->destroy = nouveau_destroy; + + return &nvpws->pws; +} diff --git a/src/gallium/winsys/drm/nouveau/common/nouveau_winsys_pipe.h b/src/gallium/winsys/drm/nouveau/common/nouveau_winsys_pipe.h new file mode 100644 index 0000000000..1eb8043478 --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/common/nouveau_winsys_pipe.h @@ -0,0 +1,44 @@ +#ifndef NOUVEAU_PIPE_WINSYS_H +#define NOUVEAU_PIPE_WINSYS_H + +#include "pipe/p_context.h" +#include "pipe/internal/p_winsys_screen.h" +#include "nouveau_context.h" + +struct nouveau_pipe_buffer { + struct pipe_buffer base; + struct nouveau_bo *bo; +}; + +static INLINE struct nouveau_pipe_buffer * +nouveau_pipe_buffer(struct pipe_buffer *buf) +{ + return (struct nouveau_pipe_buffer *)buf; +} + +struct nouveau_pipe_winsys { + struct pipe_winsys pws; + + struct nouveau_context *nv; +}; + +extern struct pipe_winsys * +nouveau_create_pipe_winsys(struct nouveau_context *nv); + +struct pipe_context * +nouveau_create_softpipe(struct nouveau_context *nv); + +struct pipe_context * +nouveau_pipe_create(struct nouveau_context *nv); + +/* Must be provided by clients of common code */ +extern void +nouveau_flush_frontbuffer(struct pipe_winsys *pws, struct pipe_surface *surf, + void *context_private); + +struct pipe_surface * +nouveau_surface_buffer_ref(struct nouveau_context *nv, struct pipe_buffer *pb, + enum pipe_format format, int w, int h, + unsigned pitch, struct pipe_texture **ppt); + +#endif diff --git a/src/gallium/winsys/drm/nouveau/common/nouveau_winsys_softpipe.c b/src/gallium/winsys/drm/nouveau/common/nouveau_winsys_softpipe.c new file mode 100644 index 0000000000..396e4f2a2e --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/common/nouveau_winsys_softpipe.c @@ -0,0 +1,101 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ +/* + * Authors: Keith Whitwell <keithw-at-tungstengraphics-dot-com> + */ + +#include "pipe/internal/p_winsys_screen.h" +#include <pipe/p_screen.h> +#include <pipe/p_defines.h> +#include <pipe/p_format.h> +#include <softpipe/sp_winsys.h> +#include <util/u_memory.h> +#include "nouveau_context.h" +#include "nouveau_winsys_pipe.h" + +struct nouveau_softpipe_winsys { + struct softpipe_winsys sws; + struct nouveau_context *nv; +}; + +/** + * Return list of surface formats supported by this driver. + */ +static boolean +nouveau_is_format_supported(struct softpipe_winsys *sws, + enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + return TRUE; + default: + break; + }; + + return FALSE; +} + +struct pipe_context * +nouveau_create_softpipe(struct nouveau_context *nv) +{ + struct nouveau_softpipe_winsys *nvsws; + struct pipe_screen *pscreen; + struct pipe_winsys *ws; + struct pipe_context *pipe; + + ws = nouveau_create_pipe_winsys(nv); + if (!ws) + return NULL; + pscreen = softpipe_create_screen(ws); + if (!pscreen) { + ws->destroy(ws); + return NULL; + } + nvsws = CALLOC_STRUCT(nouveau_softpipe_winsys); + if (!nvsws) { + ws->destroy(ws); + pscreen->destroy(pscreen); + return NULL; + } + + nvsws->sws.is_format_supported = nouveau_is_format_supported; + nvsws->nv = nv; + + pipe = softpipe_create(pscreen, ws, &nvsws->sws); + if (!pipe) { + ws->destroy(ws); + pscreen->destroy(pscreen); + FREE(nvsws); + return NULL; + } + + return pipe; +} + diff --git a/src/gallium/winsys/drm/nouveau/dri/Makefile b/src/gallium/winsys/drm/nouveau/dri/Makefile new file mode 100644 index 0000000000..a73e8d5cb4 --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/dri/Makefile @@ -0,0 +1,34 @@ +TOP = ../../../../../.. +include $(TOP)/configs/current + +LIBNAME = nouveau_dri.so + +MINIGLX_SOURCES = + +PIPE_DRIVERS = \ + $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ + $(TOP)/src/gallium/drivers/nv04/libnv04.a \ + $(TOP)/src/gallium/drivers/nv10/libnv10.a \ + $(TOP)/src/gallium/drivers/nv20/libnv20.a \ + $(TOP)/src/gallium/drivers/nv30/libnv30.a \ + $(TOP)/src/gallium/drivers/nv40/libnv40.a \ + $(TOP)/src/gallium/drivers/nv50/libnv50.a + +DRIVER_SOURCES = \ + nouveau_context_dri.c \ + nouveau_screen_dri.c \ + nouveau_swapbuffers.c \ + ../common/libnouveaudrm.a + +C_SOURCES = \ + $(COMMON_GALLIUM_SOURCES) \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + +DRIVER_DEFINES = $(shell pkg-config libdrm_nouveau --cflags) +DRI_LIB_DEPS += $(shell pkg-config libdrm_nouveau --libs) + +include ../../Makefile.template + +symlinks: diff --git a/src/gallium/winsys/drm/nouveau/dri/nouveau_context_dri.c b/src/gallium/winsys/drm/nouveau/dri/nouveau_context_dri.c new file mode 100644 index 0000000000..aacfe984d1 --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/dri/nouveau_context_dri.c @@ -0,0 +1,124 @@ +#include <main/glheader.h> +#include <glapi/glthread.h> +#include <GL/internal/glcore.h> +#include <utils.h> + +#include <state_tracker/st_public.h> +#include <state_tracker/st_context.h> +#include <pipe/p_defines.h> +#include <pipe/p_context.h> +#include <pipe/p_screen.h> + +#include "../common/nouveau_winsys_pipe.h" +#include "../common/nouveau_dri.h" +#include "../common/nouveau_local.h" +#include "nouveau_context_dri.h" +#include "nouveau_screen_dri.h" + +#ifdef DEBUG +static const struct dri_debug_control debug_control[] = { + { "bo", DEBUG_BO }, + { NULL, 0 } +}; +int __nouveau_debug = 0; +#endif + +GLboolean +nouveau_context_create(const __GLcontextModes *glVis, + __DRIcontextPrivate *driContextPriv, + void *sharedContextPrivate) +{ + __DRIscreenPrivate *driScrnPriv = driContextPriv->driScreenPriv; + struct nouveau_screen_dri *nv_screen = driScrnPriv->private; + struct nouveau_context_dri *nv = CALLOC_STRUCT(nouveau_context_dri); + struct st_context *st_share = NULL; + struct nouveau_context_dri *nv_share = NULL; + struct pipe_context *pipe; + + if (sharedContextPrivate) { + st_share = ((struct nouveau_context_dri *)sharedContextPrivate)->st; + nv_share = st_share->pipe->priv; + } + + if (nouveau_context_init(&nv_screen->base, driContextPriv->hHWContext, + (drmLock *)&driScrnPriv->pSAREA->lock, + &nv_share->base, &nv->base)) { + return GL_FALSE; + } + + pipe = nv->base.nvc->pctx[nv->base.pctx_id]; + driContextPriv->driverPrivate = (void *)nv; + //nv->nv_screen = nv_screen; + nv->dri_screen = driScrnPriv; + + driParseConfigFiles(&nv->dri_option_cache, &nv_screen->option_cache, + nv->dri_screen->myNum, "nouveau"); +#ifdef DEBUG + __nouveau_debug = driParseDebugString(getenv("NOUVEAU_DEBUG"), + debug_control); +#endif + + nv->st = st_create_context(pipe, glVis, st_share); + return GL_TRUE; +} + +void +nouveau_context_destroy(__DRIcontextPrivate *driContextPriv) +{ + struct nouveau_context_dri *nv = driContextPriv->driverPrivate; + + assert(nv); + + st_finish(nv->st); + st_destroy_context(nv->st); + + nouveau_context_cleanup(&nv->base); + + FREE(nv); +} + +GLboolean +nouveau_context_bind(__DRIcontextPrivate *driContextPriv, + __DRIdrawablePrivate *driDrawPriv, + __DRIdrawablePrivate *driReadPriv) +{ + struct nouveau_context_dri *nv; + struct nouveau_framebuffer *draw, *read; + + if (!driContextPriv) { + st_make_current(NULL, NULL, NULL); + return GL_TRUE; + } + + nv = driContextPriv->driverPrivate; + draw = driDrawPriv->driverPrivate; + read = driReadPriv->driverPrivate; + + st_make_current(nv->st, draw->stfb, read->stfb); + + if ((nv->dri_drawable != driDrawPriv) || + (nv->last_stamp != driDrawPriv->lastStamp)) { + nv->dri_drawable = driDrawPriv; + st_resize_framebuffer(draw->stfb, driDrawPriv->w, + driDrawPriv->h); + nv->last_stamp = driDrawPriv->lastStamp; + } + + if (driDrawPriv != driReadPriv) { + st_resize_framebuffer(read->stfb, driReadPriv->w, + driReadPriv->h); + } + + return GL_TRUE; +} + +GLboolean +nouveau_context_unbind(__DRIcontextPrivate *driContextPriv) +{ + struct nouveau_context_dri *nv = driContextPriv->driverPrivate; + (void)nv; + + st_flush(nv->st, 0, NULL); + return GL_TRUE; +} + diff --git a/src/gallium/winsys/drm/nouveau/dri/nouveau_context_dri.h b/src/gallium/winsys/drm/nouveau/dri/nouveau_context_dri.h new file mode 100644 index 0000000000..64cf326411 --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/dri/nouveau_context_dri.h @@ -0,0 +1,47 @@ +#ifndef __NOUVEAU_CONTEXT_DRI_H__ +#define __NOUVEAU_CONTEXT_DRI_H__ + +#include <dri_util.h> +#include <xmlconfig.h> +#include <nouveau/nouveau_winsys.h> +#include "../common/nouveau_context.h" + +struct nouveau_framebuffer { + struct st_framebuffer *stfb; +}; + +struct nouveau_context_dri { + struct nouveau_context base; + struct st_context *st; + + /* DRI stuff */ + __DRIscreenPrivate *dri_screen; + __DRIdrawablePrivate *dri_drawable; + unsigned int last_stamp; + driOptionCache dri_option_cache; + drm_context_t drm_context; + drmLock drm_lock; +}; + +extern GLboolean nouveau_context_create(const __GLcontextModes *, + __DRIcontextPrivate *, void *); +extern void nouveau_context_destroy(__DRIcontextPrivate *); +extern GLboolean nouveau_context_bind(__DRIcontextPrivate *, + __DRIdrawablePrivate *draw, + __DRIdrawablePrivate *read); +extern GLboolean nouveau_context_unbind(__DRIcontextPrivate *); + +#ifdef DEBUG +extern int __nouveau_debug; + +#define DEBUG_BO (1 << 0) + +#define DBG(flag, ...) do { \ + if (__nouveau_debug & (DEBUG_##flag)) \ + NOUVEAU_ERR(__VA_ARGS__); \ +} while(0) +#else +#define DBG(flag, ...) +#endif + +#endif diff --git a/src/gallium/winsys/drm/nouveau/dri/nouveau_screen_dri.c b/src/gallium/winsys/drm/nouveau/dri/nouveau_screen_dri.c new file mode 100644 index 0000000000..964a9028aa --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/dri/nouveau_screen_dri.c @@ -0,0 +1,259 @@ +#include <utils.h> +#include <vblank.h> +#include <xmlpool.h> + +#include <pipe/p_context.h> +#include <state_tracker/st_public.h> +#include <state_tracker/st_cb_fbo.h> +#include <nouveau_drm.h> +#include "../common/nouveau_dri.h" +#include "../common/nouveau_local.h" +#include "nouveau_context_dri.h" +#include "nouveau_screen_dri.h" +#include "nouveau_swapbuffers.h" + +#if NOUVEAU_DRM_HEADER_PATCHLEVEL != 12 +#error nouveau_drm.h version does not match expected version +#endif + +/* Extension stuff, enabling of extensions handled by Gallium's GL state + * tracker. But, we still need to define the entry points we want. + */ +#define need_GL_ARB_fragment_program +#define need_GL_ARB_multisample +#define need_GL_ARB_occlusion_query +#define need_GL_ARB_point_parameters +#define need_GL_ARB_shader_objects +#define need_GL_ARB_texture_compression +#define need_GL_ARB_vertex_program +#define need_GL_ARB_vertex_shader +#define need_GL_ARB_vertex_buffer_object +#define need_GL_EXT_compiled_vertex_array +#define need_GL_EXT_fog_coord +#define need_GL_EXT_secondary_color +#define need_GL_EXT_framebuffer_object +#define need_GL_VERSION_2_0 +#define need_GL_VERSION_2_1 +#include "extension_helper.h" + +const struct dri_extension card_extensions[] = +{ + { "GL_ARB_multisample", GL_ARB_multisample_functions }, + { "GL_ARB_occlusion_query", GL_ARB_occlusion_query_functions }, + { "GL_ARB_point_parameters", GL_ARB_point_parameters_functions }, + { "GL_ARB_shader_objects", GL_ARB_shader_objects_functions }, + { "GL_ARB_shading_language_100", GL_VERSION_2_0_functions }, + { "GL_ARB_shading_language_120", GL_VERSION_2_1_functions }, + { "GL_ARB_texture_compression", GL_ARB_texture_compression_functions }, + { "GL_ARB_vertex_program", GL_ARB_vertex_program_functions }, + { "GL_ARB_vertex_shader", GL_ARB_vertex_shader_functions }, + { "GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions }, + { "GL_EXT_compiled_vertex_array", GL_EXT_compiled_vertex_array_functions }, + { "GL_EXT_fog_coord", GL_EXT_fog_coord_functions }, + { "GL_EXT_framebuffer_object", GL_EXT_framebuffer_object_functions }, + { "GL_EXT_secondary_color", GL_EXT_secondary_color_functions }, + { NULL, 0 } +}; + +PUBLIC const char __driConfigOptions[] = +DRI_CONF_BEGIN +DRI_CONF_END; +static const GLuint __driNConfigOptions = 0; + +extern const struct dri_extension common_extensions[]; +extern const struct dri_extension nv40_extensions[]; + +static GLboolean +nouveau_create_buffer(__DRIscreenPrivate * driScrnPriv, + __DRIdrawablePrivate * driDrawPriv, + const __GLcontextModes *glVis, GLboolean pixmapBuffer) +{ + struct nouveau_framebuffer *nvfb; + enum pipe_format colour, depth, stencil; + + if (pixmapBuffer) + return GL_FALSE; + + nvfb = CALLOC_STRUCT(nouveau_framebuffer); + if (!nvfb) + return GL_FALSE; + + if (glVis->redBits == 5) + colour = PIPE_FORMAT_R5G6B5_UNORM; + else + colour = PIPE_FORMAT_A8R8G8B8_UNORM; + + if (glVis->depthBits == 16) + depth = PIPE_FORMAT_Z16_UNORM; + else if (glVis->depthBits == 24) + depth = PIPE_FORMAT_Z24S8_UNORM; + else + depth = PIPE_FORMAT_NONE; + + if (glVis->stencilBits == 8) + stencil = PIPE_FORMAT_Z24S8_UNORM; + else + stencil = PIPE_FORMAT_NONE; + + nvfb->stfb = st_create_framebuffer(glVis, colour, depth, stencil, + driDrawPriv->w, driDrawPriv->h, + (void*)nvfb); + if (!nvfb->stfb) { + free(nvfb); + return GL_FALSE; + } + + driDrawPriv->driverPrivate = (void *)nvfb; + return GL_TRUE; +} + +static void +nouveau_destroy_buffer(__DRIdrawablePrivate * driDrawPriv) +{ + struct nouveau_framebuffer *nvfb; + + nvfb = (struct nouveau_framebuffer *)driDrawPriv->driverPrivate; + st_unreference_framebuffer(nvfb->stfb); + free(nvfb); +} + +static __DRIconfig ** +nouveau_fill_in_modes(__DRIscreenPrivate *psp, + unsigned pixel_bits, unsigned depth_bits, + unsigned stencil_bits, GLboolean have_back_buffer) +{ + __DRIconfig **configs; + unsigned depth_buffer_factor; + unsigned back_buffer_factor; + GLenum fb_format; + GLenum fb_type; + + static const GLenum back_buffer_modes[] = { + GLX_NONE, GLX_SWAP_UNDEFINED_OML, + }; + + uint8_t depth_bits_array[3]; + uint8_t stencil_bits_array[3]; + uint8_t msaa_samples_array[1]; + + depth_bits_array[0] = 0; + depth_bits_array[1] = depth_bits; + depth_bits_array[2] = depth_bits; + + /* Just like with the accumulation buffer, always provide some modes + * with a stencil buffer. It will be a sw fallback, but some apps won't + * care about that. + */ + stencil_bits_array[0] = 0; + stencil_bits_array[1] = 0; + if (depth_bits == 24) + stencil_bits_array[1] = (stencil_bits == 0) ? 8 : stencil_bits; + stencil_bits_array[2] = (stencil_bits == 0) ? 8 : stencil_bits; + + msaa_samples_array[0] = 0; + + depth_buffer_factor = + ((depth_bits != 0) || (stencil_bits != 0)) ? 3 : 1; + back_buffer_factor = (have_back_buffer) ? 3 : 1; + + if (pixel_bits == 16) { + fb_format = GL_RGB; + fb_type = GL_UNSIGNED_SHORT_5_6_5; + } + else { + fb_format = GL_BGRA; + fb_type = GL_UNSIGNED_INT_8_8_8_8_REV; + } + + configs = driCreateConfigs(fb_format, fb_type, + depth_bits_array, stencil_bits_array, + depth_buffer_factor, back_buffer_modes, + back_buffer_factor, msaa_samples_array, 1); + if (configs == NULL) { + fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", + __func__, __LINE__); + return NULL; + } + + return configs; +} + +static const __DRIconfig ** +nouveau_screen_create(__DRIscreenPrivate *psp) +{ + struct nouveau_dri *nv_dri = psp->pDevPriv; + struct nouveau_screen_dri *nv_screen; + static const __DRIversion ddx_expected = + { 0, 0, NOUVEAU_DRM_HEADER_PATCHLEVEL }; + static const __DRIversion dri_expected = { 4, 0, 0 }; + static const __DRIversion drm_expected = + { 0, 0, NOUVEAU_DRM_HEADER_PATCHLEVEL }; + + if (!driCheckDriDdxDrmVersions2("nouveau", + &psp->dri_version, &dri_expected, + &psp->ddx_version, &ddx_expected, + &psp->drm_version, &drm_expected)) { + return NULL; + } + + if (drm_expected.patch != psp->drm_version.patch) { + fprintf(stderr, "Incompatible DRM patch level.\n" + "Expected: %d\n" "Current : %d\n", + drm_expected.patch, psp->drm_version.patch); + return NULL; + } + + driInitExtensions(NULL, card_extensions, GL_FALSE); + + if (psp->devPrivSize != sizeof(struct nouveau_dri)) { + NOUVEAU_ERR("DRI struct mismatch between DDX/DRI\n"); + return NULL; + } + + nv_screen = CALLOC_STRUCT(nouveau_screen_dri); + if (!nv_screen) + return NULL; + + driParseOptionInfo(&nv_screen->option_cache, + __driConfigOptions, __driNConfigOptions); + + if (nouveau_screen_init(nv_dri, psp->fd, &nv_screen->base)) { + FREE(nv_screen); + return NULL; + } + + nv_screen->driScrnPriv = psp; + psp->private = (void *)nv_screen; + + return (const __DRIconfig **) + nouveau_fill_in_modes(psp, nv_dri->bpp, + (nv_dri->bpp == 16) ? 16 : 24, + (nv_dri->bpp == 16) ? 0 : 8, 1); +} + +static void +nouveau_screen_destroy(__DRIscreenPrivate *driScrnPriv) +{ + struct nouveau_screen_dri *nv_screen = driScrnPriv->private; + + driScrnPriv->private = NULL; + nouveau_screen_cleanup(&nv_screen->base); + FREE(nv_screen); +} + +const struct __DriverAPIRec +driDriverAPI = { + .InitScreen = nouveau_screen_create, + .DestroyScreen = nouveau_screen_destroy, + .CreateContext = nouveau_context_create, + .DestroyContext = nouveau_context_destroy, + .CreateBuffer = nouveau_create_buffer, + .DestroyBuffer = nouveau_destroy_buffer, + .SwapBuffers = nouveau_swap_buffers, + .MakeCurrent = nouveau_context_bind, + .UnbindContext = nouveau_context_unbind, + .CopySubBuffer = nouveau_copy_sub_buffer, + + .InitScreen2 = NULL, /* one day, I promise! */ +}; + diff --git a/src/gallium/winsys/drm/nouveau/dri/nouveau_screen_dri.h b/src/gallium/winsys/drm/nouveau/dri/nouveau_screen_dri.h new file mode 100644 index 0000000000..1498087819 --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/dri/nouveau_screen_dri.h @@ -0,0 +1,13 @@ +#ifndef __NOUVEAU_SCREEN_DRI_H__ +#define __NOUVEAU_SCREEN_DRI_H__ + +#include "../common/nouveau_screen.h" +#include "xmlconfig.h" + +struct nouveau_screen_dri { + struct nouveau_screen base; + __DRIscreenPrivate *driScrnPriv; + driOptionCache option_cache; +}; + +#endif diff --git a/src/gallium/winsys/drm/nouveau/dri/nouveau_swapbuffers.c b/src/gallium/winsys/drm/nouveau/dri/nouveau_swapbuffers.c new file mode 100644 index 0000000000..58cb6f7265 --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/dri/nouveau_swapbuffers.c @@ -0,0 +1,113 @@ +#include <main/glheader.h> +#include <glapi/glthread.h> +#include <GL/internal/glcore.h> + +#include <pipe/p_context.h> +#include <state_tracker/st_public.h> +#include <state_tracker/st_context.h> +#include <state_tracker/st_cb_fbo.h> + +#include "../common/nouveau_local.h" +#include "nouveau_context_dri.h" +#include "nouveau_screen_dri.h" +#include "nouveau_swapbuffers.h" + +void +nouveau_copy_buffer(__DRIdrawablePrivate *dPriv, struct pipe_surface *surf, + const drm_clip_rect_t *rect) +{ + struct nouveau_context_dri *nv = dPriv->driContextPriv->driverPrivate; + struct pipe_context *pipe = nv->base.nvc->pctx[nv->base.pctx_id]; + drm_clip_rect_t *pbox; + int nbox, i; + + LOCK_HARDWARE(&nv->base); + if (!dPriv->numClipRects) { + UNLOCK_HARDWARE(&nv->base); + return; + } + pbox = dPriv->pClipRects; + nbox = dPriv->numClipRects; + + for (i = 0; i < nbox; i++, pbox++) { + int sx, sy, dx, dy, w, h; + + sx = pbox->x1 - dPriv->x; + sy = pbox->y1 - dPriv->y; + dx = pbox->x1; + dy = pbox->y1; + w = pbox->x2 - pbox->x1; + h = pbox->y2 - pbox->y1; + + pipe->surface_copy(pipe, FALSE, nv->base.frontbuffer, + dx, dy, surf, sx, sy, w, h); + } + + FIRE_RING(nv->base.nvc->channel); + UNLOCK_HARDWARE(&nv->base); + + if (nv->last_stamp != dPriv->lastStamp) { + struct nouveau_framebuffer *nvfb = dPriv->driverPrivate; + st_resize_framebuffer(nvfb->stfb, dPriv->w, dPriv->h); + nv->last_stamp = dPriv->lastStamp; + } +} + +void +nouveau_copy_sub_buffer(__DRIdrawablePrivate *dPriv, int x, int y, int w, int h) +{ + struct nouveau_framebuffer *nvfb = dPriv->driverPrivate; + struct pipe_surface *surf; + + st_get_framebuffer_surface(nvfb->stfb, ST_SURFACE_BACK_LEFT, &surf); + if (surf) { + drm_clip_rect_t rect; + rect.x1 = x; + rect.y1 = y; + rect.x2 = x + w; + rect.y2 = y + h; + + st_notify_swapbuffers(nvfb->stfb); + nouveau_copy_buffer(dPriv, surf, &rect); + } +} + +void +nouveau_swap_buffers(__DRIdrawablePrivate *dPriv) +{ + struct nouveau_framebuffer *nvfb = dPriv->driverPrivate; + struct pipe_surface *surf; + + st_get_framebuffer_surface(nvfb->stfb, ST_SURFACE_BACK_LEFT, &surf); + if (surf) { + st_notify_swapbuffers(nvfb->stfb); + nouveau_copy_buffer(dPriv, surf, NULL); + } +} + +void +nouveau_flush_frontbuffer(struct pipe_winsys *pws, struct pipe_surface *surf, + void *context_private) +{ + struct nouveau_context_dri *nv = context_private; + __DRIdrawablePrivate *dPriv = nv->dri_drawable; + + nouveau_copy_buffer(dPriv, surf, NULL); +} + +void +nouveau_contended_lock(struct nouveau_context *nv) +{ + struct nouveau_context_dri *nv_sub = (struct nouveau_context_dri*)nv; + __DRIdrawablePrivate *dPriv = nv_sub->dri_drawable; + __DRIscreenPrivate *sPriv = nv_sub->dri_screen; + + /* If the window moved, may need to set a new cliprect now. + * + * NOTE: This releases and regains the hw lock, so all state + * checking must be done *after* this call: + */ + if (dPriv) + DRI_VALIDATE_DRAWABLE_INFO(sPriv, dPriv); +} + diff --git a/src/gallium/winsys/drm/nouveau/dri/nouveau_swapbuffers.h b/src/gallium/winsys/drm/nouveau/dri/nouveau_swapbuffers.h new file mode 100644 index 0000000000..825d3da6da --- /dev/null +++ b/src/gallium/winsys/drm/nouveau/dri/nouveau_swapbuffers.h @@ -0,0 +1,10 @@ +#ifndef __NOUVEAU_SWAPBUFFERS_H__ +#define __NOUVEAU_SWAPBUFFERS_H__ + +extern void nouveau_copy_buffer(__DRIdrawablePrivate *, struct pipe_surface *, + const drm_clip_rect_t *); +extern void nouveau_copy_sub_buffer(__DRIdrawablePrivate *, + int x, int y, int w, int h); +extern void nouveau_swap_buffers(__DRIdrawablePrivate *); + +#endif diff --git a/src/gallium/winsys/drm/radeon/Makefile b/src/gallium/winsys/drm/radeon/Makefile new file mode 100644 index 0000000000..dca1e3233a --- /dev/null +++ b/src/gallium/winsys/drm/radeon/Makefile @@ -0,0 +1,32 @@ + +TOP = ../../../../.. +include $(TOP)/configs/current + +LIBNAME = radeon_dri.so + +MINIGLX_SOURCES = + +PIPE_DRIVERS = \ + $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ + $(TOP)/src/gallium/drivers/r300/libr300.a + +DRIVER_SOURCES = \ + radeon_buffer.c \ + radeon_context.c \ + radeon_r300.c \ + radeon_screen.c \ + radeon_winsys_softpipe.c + +C_SOURCES = \ + $(COMMON_GALLIUM_SOURCES) \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + +DRIVER_DEFINES = -I../../../drivers/r300 + +include ../Makefile.template + +DRI_LIB_DEPS += -ldrm_radeon + +symlinks: diff --git a/src/gallium/winsys/drm/radeon/SConscript b/src/gallium/winsys/drm/radeon/SConscript new file mode 100644 index 0000000000..2435211a32 --- /dev/null +++ b/src/gallium/winsys/drm/radeon/SConscript @@ -0,0 +1,29 @@ +Import('*') + +if 'mesa' in env['statetrackers']: + + env = drienv.Clone() + + DRIVER_SOURCES = [ + 'radeon_buffer.c', + 'radeon_context.c', + 'radeon_screen.c', + 'radeon_winsys_softpipe.c', + ] + + sources = \ + COMMON_GALLIUM_SOURCES + \ + DRIVER_SOURCES + + drivers = [ + softpipe, + r300 + ] + + # TODO: write a wrapper function http://www.scons.org/wiki/WrapperFunctions + env.SharedLibrary( + target ='radeon_dri.so', + source = sources, + LIBS = drivers + mesa + auxiliaries + env['LIBS'], + ) + diff --git a/src/gallium/winsys/drm/radeon/radeon_buffer.c b/src/gallium/winsys/drm/radeon/radeon_buffer.c new file mode 100644 index 0000000000..259a505c0a --- /dev/null +++ b/src/gallium/winsys/drm/radeon/radeon_buffer.c @@ -0,0 +1,239 @@ +/* + * Copyright © 2008 Jérôme Glisse + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ +/* + * Authors: + * Jérôme Glisse <glisse@freedesktop.org> + */ +#include <stdio.h> +#include "dri_util.h" +#include "state_tracker/st_public.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "radeon_buffer.h" +#include "radeon_screen.h" +#include "radeon_context.h" +#include "radeon_bo.h" +#include "radeon_drm.h" + +static const char *radeon_get_name(struct pipe_winsys *ws) +{ + return "RADEON/DRI2"; +} + +static struct pipe_buffer *radeon_buffer_create(struct pipe_winsys *ws, + unsigned alignment, + unsigned usage, + unsigned size) +{ + struct radeon_pipe_winsys *radeon_ws = (struct radeon_pipe_winsys *)ws; + struct radeon_pipe_buffer *radeon_buffer; + uint32_t domain; + + radeon_buffer = calloc(1, sizeof(*radeon_buffer)); + if (radeon_buffer == NULL) { + return NULL; + } + radeon_buffer->base.refcount = 1; + radeon_buffer->base.alignment = alignment; + radeon_buffer->base.usage = usage; + radeon_buffer->base.size = size; + + domain = 0; + + if (usage & PIPE_BUFFER_USAGE_PIXEL) { + domain |= RADEON_GEM_DOMAIN_VRAM; + } + if (usage & PIPE_BUFFER_USAGE_VERTEX) { + domain |= RADEON_GEM_DOMAIN_GTT; + } + + if (usage & PIPE_BUFFER_USAGE_INDEX) { + domain |= RADEON_GEM_DOMAIN_GTT; + } + radeon_buffer->bo = radeon_bo_open(radeon_ws->radeon_screen->bom, 0, + size, alignment, domain, 0); + if (radeon_buffer->bo == NULL) { + free(radeon_buffer); + } + return &radeon_buffer->base; +} + +static struct pipe_buffer *radeon_buffer_user_create(struct pipe_winsys *ws, + void *ptr, + unsigned bytes) +{ + struct radeon_pipe_buffer *radeon_buffer; + + radeon_buffer = (struct radeon_pipe_buffer*)radeon_buffer_create(ws, 0, 0, bytes); + if (radeon_buffer == NULL) { + return NULL; + } + radeon_bo_map(radeon_buffer->bo, 1); + memcpy(radeon_buffer->bo->ptr, ptr, bytes); + radeon_bo_unmap(radeon_buffer->bo); + return &radeon_buffer->base; +} + +static void radeon_buffer_del(struct pipe_winsys *ws, struct pipe_buffer *buffer) +{ + struct radeon_pipe_buffer *radeon_buffer = (struct radeon_pipe_buffer*)buffer; + + radeon_bo_unref(radeon_buffer->bo); + free(radeon_buffer); +} + +static void *radeon_buffer_map(struct pipe_winsys *ws, + struct pipe_buffer *buffer, + unsigned flags) +{ + struct radeon_pipe_buffer *radeon_buffer = (struct radeon_pipe_buffer*)buffer; + int write = 0; + + if (flags & PIPE_BUFFER_USAGE_CPU_WRITE) { + write = 1; + } + if (radeon_bo_map(radeon_buffer->bo, write)) + return NULL; + return radeon_buffer->bo->ptr; +} + +static void radeon_buffer_unmap(struct pipe_winsys *ws, struct pipe_buffer *buffer) +{ + struct radeon_pipe_buffer *radeon_buffer = (struct radeon_pipe_buffer*)buffer; + + radeon_bo_unmap(radeon_buffer->bo); +} + +static void radeon_fence_reference(struct pipe_winsys *ws, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *pfence) +{ +} + +static int radeon_fence_signalled(struct pipe_winsys *ws, + struct pipe_fence_handle *pfence, + unsigned flag) +{ + return 1; +} + +static int radeon_fence_finish(struct pipe_winsys *ws, + struct pipe_fence_handle *pfence, + unsigned flag) +{ + return 0; +} + +static void radeon_flush_frontbuffer(struct pipe_winsys *pipe_winsys, + struct pipe_surface *pipe_surface, + void *context_private) +{ + /* TODO: call dri2CopyRegion */ +} + +struct pipe_winsys *radeon_pipe_winsys(struct radeon_screen *radeon_screen) +{ + struct radeon_pipe_winsys *radeon_ws; + + radeon_ws = calloc(1, sizeof(struct radeon_pipe_winsys)); + if (radeon_ws == NULL) { + return NULL; + } + radeon_ws->radeon_screen = radeon_screen; + + radeon_ws->winsys.flush_frontbuffer = radeon_flush_frontbuffer; + + radeon_ws->winsys.buffer_create = radeon_buffer_create; + radeon_ws->winsys.buffer_destroy = radeon_buffer_del; + radeon_ws->winsys.user_buffer_create = radeon_buffer_user_create; + radeon_ws->winsys.buffer_map = radeon_buffer_map; + radeon_ws->winsys.buffer_unmap = radeon_buffer_unmap; + + radeon_ws->winsys.fence_reference = radeon_fence_reference; + radeon_ws->winsys.fence_signalled = radeon_fence_signalled; + radeon_ws->winsys.fence_finish = radeon_fence_finish; + + radeon_ws->winsys.get_name = radeon_get_name; + + return &radeon_ws->winsys; +} + +static struct pipe_buffer *radeon_buffer_from_handle(struct radeon_screen *radeon_screen, + uint32_t handle) +{ + struct radeon_pipe_buffer *radeon_buffer; + struct radeon_bo *bo = NULL; + + bo = radeon_bo_open(radeon_screen->bom, handle, 0, 0, 0, 0); + if (bo == NULL) { + return NULL; + } + radeon_buffer = calloc(1, sizeof(struct radeon_pipe_buffer)); + if (radeon_buffer == NULL) { + radeon_bo_unref(bo); + return NULL; + } + radeon_buffer->base.refcount = 1; + radeon_buffer->base.usage = PIPE_BUFFER_USAGE_PIXEL; + radeon_buffer->bo = bo; + return &radeon_buffer->base; +} + +struct pipe_surface *radeon_surface_from_handle(struct radeon_context *radeon_context, + uint32_t handle, + enum pipe_format format, + int w, int h, int pitch) +{ + struct pipe_screen *pipe_screen = radeon_context->pipe_screen; + struct pipe_winsys *pipe_winsys = radeon_context->pipe_winsys; + struct pipe_texture tmpl; + struct pipe_surface *ps; + struct pipe_texture *pt; + struct pipe_buffer *pb; + + pb = radeon_buffer_from_handle(radeon_context->radeon_screen, handle); + if (pb == NULL) { + return NULL; + } + memset(&tmpl, 0, sizeof(tmpl)); + tmpl.tex_usage = PIPE_TEXTURE_USAGE_DISPLAY_TARGET; + tmpl.target = PIPE_TEXTURE_2D; + tmpl.width[0] = w; + tmpl.height[0] = h; + tmpl.depth[0] = 1; + tmpl.format = format; + pf_get_block(tmpl.format, &tmpl.block); + tmpl.nblocksx[0] = pf_get_nblocksx(&tmpl.block, w); + tmpl.nblocksy[0] = pf_get_nblocksy(&tmpl.block, h); + + pt = pipe_screen->texture_blanket(pipe_screen, &tmpl, &pitch, pb); + if (pt == NULL) { + pipe_buffer_reference(pipe_screen, &pb, NULL); + } + ps = pipe_screen->get_tex_surface(pipe_screen, pt, 0, 0, 0, + PIPE_BUFFER_USAGE_GPU_WRITE); + return ps; +} diff --git a/src/gallium/winsys/drm/radeon/radeon_buffer.h b/src/gallium/winsys/drm/radeon/radeon_buffer.h new file mode 100644 index 0000000000..c626c20229 --- /dev/null +++ b/src/gallium/winsys/drm/radeon/radeon_buffer.h @@ -0,0 +1,54 @@ +/* + * Copyright © 2008 Jérôme Glisse + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ +/* + * Authors: + * Jérôme Glisse <glisse@freedesktop.org> + */ +#ifndef RADEON_BUFFER_H +#define RADEON_BUFFER_H + +#include "pipe/internal/p_winsys_screen.h" +#include "radeon_screen.h" +#include "radeon_context.h" +#include "radeon_bo.h" + +struct radeon_pipe_buffer { + struct pipe_buffer base; + struct radeon_bo *bo; +}; + +struct radeon_pipe_winsys { + struct pipe_winsys winsys; + struct radeon_screen *radeon_screen; +}; + +struct pipe_winsys *radeon_pipe_winsys(struct radeon_screen *radeon_screen); +struct pipe_surface *radeon_surface_from_handle(struct radeon_context *radeon_context, + uint32_t handle, + enum pipe_format format, + int w, int h, int pitch); + +#endif diff --git a/src/gallium/winsys/drm/radeon/radeon_context.c b/src/gallium/winsys/drm/radeon/radeon_context.c new file mode 100644 index 0000000000..13a7035fec --- /dev/null +++ b/src/gallium/winsys/drm/radeon/radeon_context.c @@ -0,0 +1,272 @@ +/* + * Copyright © 2008 Jérôme Glisse + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ +/* + * Authors: + * Jérôme Glisse <glisse@freedesktop.org> + */ +#include <stdio.h> +#include "dri_util.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "state_tracker/st_public.h" +#include "state_tracker/st_context.h" +#include "radeon_screen.h" +#include "radeon_context.h" +#include "radeon_buffer.h" +#include "radeon_winsys_softpipe.h" + +#define need_GL_ARB_point_parameters +#define need_GL_ARB_vertex_buffer_object +#define need_GL_EXT_cull_vertex +#define need_GL_EXT_compiled_vertex_array +#include "extension_helper.h" + +/** + * Extension strings exported by the radeon driver. + */ +const struct dri_extension radeon_card_extensions[] = { +/* XXX these are technically not supported + {"GL_ARB_texture_rectangle", NULL}, + {"GL_ARB_pixel_buffer_object", NULL}, */ + {"GL_ARB_point_parameters", GL_ARB_point_parameters_functions}, + {"GL_ARB_vertex_buffer_object", GL_ARB_vertex_buffer_object_functions}, + {"GL_EXT_compiled_vertex_array", GL_EXT_compiled_vertex_array_functions}, + {"GL_EXT_cull_vertex", GL_EXT_cull_vertex_functions}, + {NULL, NULL} +}; + +static void radeon_update_renderbuffers(__DRIcontext *dri_context, + __DRIdrawable *dri_drawable) +{ + struct radeon_framebuffer *radeon_fb; + struct radeon_context *radeon_context; + unsigned attachments[10]; + __DRIbuffer *buffers; + __DRIscreen *screen; + int i, count; + + radeon_context = dri_context->driverPrivate; + screen = dri_drawable->driScreenPriv; + radeon_fb = dri_drawable->driverPrivate; + for (count = 0, i = 0; count < 6; count++) { + if (radeon_fb->attachments & (1 << count)) { + attachments[i++] = count; + } + } + + buffers = (*screen->dri2.loader->getBuffers)(dri_drawable, + &dri_drawable->w, + &dri_drawable->h, + attachments, + i, + &count, + dri_drawable->loaderPrivate); + if (buffers == NULL) { + return; + } + + /* set one cliprect to cover the whole dri_drawable */ + dri_drawable->x = 0; + dri_drawable->y = 0; + dri_drawable->backX = 0; + dri_drawable->backY = 0; + dri_drawable->numClipRects = 1; + dri_drawable->pClipRects[0].x1 = 0; + dri_drawable->pClipRects[0].y1 = 0; + dri_drawable->pClipRects[0].x2 = dri_drawable->w; + dri_drawable->pClipRects[0].y2 = dri_drawable->h; + dri_drawable->numBackClipRects = 1; + dri_drawable->pBackClipRects[0].x1 = 0; + dri_drawable->pBackClipRects[0].y1 = 0; + dri_drawable->pBackClipRects[0].x2 = dri_drawable->w; + dri_drawable->pBackClipRects[0].y2 = dri_drawable->h; + + for (i = 0; i < count; i++) { + struct pipe_surface *ps; + enum pipe_format format = 0; + int index = 0; + + switch (buffers[i].attachment) { + case __DRI_BUFFER_FRONT_LEFT: + index = ST_SURFACE_FRONT_LEFT; + switch (buffers[i].cpp) { + case 4: + format = PIPE_FORMAT_A8R8G8B8_UNORM; + break; + case 2: + format = PIPE_FORMAT_R5G6B5_UNORM; + break; + default: + /* FIXME: error */ + return; + } + break; + case __DRI_BUFFER_BACK_LEFT: + index = ST_SURFACE_BACK_LEFT; + switch (buffers[i].cpp) { + case 4: + format = PIPE_FORMAT_A8R8G8B8_UNORM; + break; + case 2: + format = PIPE_FORMAT_R5G6B5_UNORM; + break; + default: + /* FIXME: error */ + return; + } + break; + case __DRI_BUFFER_STENCIL: + case __DRI_BUFFER_DEPTH: + index = ST_SURFACE_DEPTH; + switch (buffers[i].cpp) { + case 4: + format = PIPE_FORMAT_Z24S8_UNORM; + break; + case 2: + format = PIPE_FORMAT_Z16_UNORM; + break; + default: + /* FIXME: error */ + return; + } + break; + case __DRI_BUFFER_ACCUM: + default: + fprintf(stderr, + "unhandled buffer attach event, attacment type %d\n", + buffers[i].attachment); + return; + } + + ps = radeon_surface_from_handle(radeon_context, + buffers[i].name, + format, + dri_drawable->w, + dri_drawable->h, + buffers[i].pitch); + assert(ps); + st_set_framebuffer_surface(radeon_fb->st_framebuffer, index, ps); + } + st_resize_framebuffer(radeon_fb->st_framebuffer, + dri_drawable->w, + dri_drawable->h); +} + +GLboolean radeon_context_create(const __GLcontextModes *visual, + __DRIcontextPrivate *dri_context, + void *shared_context) +{ + __DRIscreenPrivate *dri_screen; + struct radeon_context *radeon_context; + struct radeon_screen *radeon_screen; + struct pipe_context *pipe; + struct st_context *shared_st_context = NULL; + + dri_context->driverPrivate = NULL; + radeon_context = calloc(1, sizeof(struct radeon_context)); + if (radeon_context == NULL) { + return GL_FALSE; + } + + if (shared_context) { + shared_st_context = ((struct radeon_context*)shared_context)->st_context; + } + + dri_screen = dri_context->driScreenPriv; + radeon_screen = dri_screen->private; + radeon_context->dri_screen = dri_screen; + radeon_context->radeon_screen = radeon_screen; + radeon_context->drm_fd = dri_screen->fd; + + radeon_context->pipe_winsys = radeon_pipe_winsys(radeon_screen); + if (radeon_context->pipe_winsys == NULL) { + free(radeon_context); + return GL_FALSE; + } + + if (!getenv("RADEON_SOFTPIPE")) { + fprintf(stderr, "Creating r300 context...\n"); + pipe = + r300_create_context(NULL, + radeon_context->pipe_winsys, + radeon_create_r300_winsys(radeon_context->drm_fd)); + radeon_context->pipe_screen = pipe->screen; + } else { + pipe = radeon_create_softpipe(radeon_context); + } + radeon_context->st_context = st_create_context(pipe, visual, + shared_st_context); + driInitExtensions(radeon_context->st_context->ctx, + radeon_card_extensions, GL_TRUE); + dri_context->driverPrivate = radeon_context; + return GL_TRUE; +} + +void radeon_context_destroy(__DRIcontextPrivate *dri_context) +{ + struct radeon_context *radeon_context; + + radeon_context = dri_context->driverPrivate; + st_finish(radeon_context->st_context); + st_destroy_context(radeon_context->st_context); + free(radeon_context); +} + +GLboolean radeon_context_bind(__DRIcontextPrivate *dri_context, + __DRIdrawablePrivate *dri_drawable, + __DRIdrawablePrivate *dri_readable) +{ + struct radeon_framebuffer *drawable; + struct radeon_framebuffer *readable; + struct radeon_context *radeon_context; + + if (dri_context == NULL) { + st_make_current(NULL, NULL, NULL); + return GL_TRUE; + } + + radeon_context = dri_context->driverPrivate; + drawable = dri_drawable->driverPrivate; + readable = dri_readable->driverPrivate; + st_make_current(radeon_context->st_context, + drawable->st_framebuffer, + readable->st_framebuffer); + + radeon_update_renderbuffers(dri_context, dri_drawable); + if (dri_drawable != dri_readable) { + radeon_update_renderbuffers(dri_context, dri_readable); + } + return GL_TRUE; +} + +GLboolean radeon_context_unbind(__DRIcontextPrivate *dri_context) +{ + struct radeon_context *radeon_context; + + radeon_context = dri_context->driverPrivate; + st_flush(radeon_context->st_context, PIPE_FLUSH_RENDER_CACHE, NULL); + return GL_TRUE; +} diff --git a/src/gallium/winsys/drm/radeon/radeon_context.h b/src/gallium/winsys/drm/radeon/radeon_context.h new file mode 100644 index 0000000000..d7222b4469 --- /dev/null +++ b/src/gallium/winsys/drm/radeon/radeon_context.h @@ -0,0 +1,70 @@ +/* + * Copyright © 2008 Jérôme Glisse + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ +/* + * Authors: + * Jérôme Glisse <glisse@freedesktop.org> + */ +#ifndef RADEON_CONTEXT_H +#define RADEON_CONTEXT_H + +#include "dri_util.h" +#include "state_tracker/st_public.h" +#include "state_tracker/st_context.h" +#include "radeon_screen.h" + +#include "radeon_r300.h" + +struct radeon_framebuffer { + struct st_framebuffer *st_framebuffer; + unsigned attachments; +}; + +struct radeon_context { + /* st */ + struct st_context *st_context; + /* pipe */ + struct pipe_screen *pipe_screen; + struct pipe_winsys *pipe_winsys; + /* DRI */ + __DRIscreenPrivate *dri_screen; + __DRIdrawablePrivate *dri_drawable; + __DRIdrawablePrivate *dri_readable; + /* DRM */ + int drm_fd; + /* RADEON */ + struct radeon_screen *radeon_screen; +}; + +GLboolean radeon_context_create(const __GLcontextModes*, + __DRIcontextPrivate*, + void*); +void radeon_context_destroy(__DRIcontextPrivate*); +GLboolean radeon_context_bind(__DRIcontextPrivate*, + __DRIdrawablePrivate*, + __DRIdrawablePrivate*); +GLboolean radeon_context_unbind(__DRIcontextPrivate*); + +#endif diff --git a/src/gallium/winsys/drm/radeon/radeon_r300.c b/src/gallium/winsys/drm/radeon/radeon_r300.c new file mode 100644 index 0000000000..8fe2375e34 --- /dev/null +++ b/src/gallium/winsys/drm/radeon/radeon_r300.c @@ -0,0 +1,96 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#include "radeon_r300.h" + +static boolean radeon_r300_check_cs(struct radeon_cs* cs, int size) +{ + /* XXX check size here, lazy ass! */ + return TRUE; +} + +static void radeon_r300_write_cs_reloc(struct radeon_cs* cs, + struct pipe_buffer* pbuffer, + uint32_t rd, + uint32_t wd, + uint32_t flags) +{ + radeon_cs_write_reloc(cs, ((struct radeon_pipe_buffer*)pbuffer)->bo, rd, wd, flags); +} + +static void radeon_r300_flush_cs(struct radeon_cs* cs) +{ + radeon_cs_emit(cs); + radeon_cs_erase(cs); +} + +/* Helper function to do the ioctls needed for setup and init. */ +static void do_ioctls(struct r300_winsys* winsys, int fd) +{ + drm_radeon_getparam_t gp; + uint32_t target; + int retval; + + /* XXX is this cast safe? */ + gp.value = (int*)⌖ + + /* First, get PCI ID */ + gp.param = RADEON_PARAM_DEVICE_ID; + retval = drmCommandWriteRead(fd, DRM_RADEON_GETPARAM, &gp, sizeof(gp)); + if (retval) { + fprintf(stderr, "%s: Failed to get PCI ID, error number %d", + __FUNCTION__, retval); + exit(1); + } + winsys->pci_id = target; + + /* Then, get the number of pixel pipes */ + gp.param = RADEON_PARAM_NUM_GB_PIPES; + retval = drmCommandWriteRead(fd, DRM_RADEON_GETPARAM, &gp, sizeof(gp)); + if (retval) { + fprintf(stderr, "%s: Failed to get GB pipe count, error number %d", + __FUNCTION__, retval); + exit(1); + } + winsys->gb_pipes = target; + +} + +struct r300_winsys* radeon_create_r300_winsys(int fd) +{ + struct r300_winsys* winsys = calloc(1, sizeof(struct r300_winsys)); + + do_ioctls(winsys, fd); + + struct radeon_cs_manager* csm = radeon_cs_manager_gem_ctor(fd); + + winsys->cs = radeon_cs_create(csm, 1024 * 64 / 4); + + winsys->check_cs = radeon_r300_check_cs; + winsys->begin_cs = radeon_cs_begin; + winsys->write_cs_dword = radeon_cs_write_dword; + winsys->write_cs_reloc = radeon_r300_write_cs_reloc; + winsys->end_cs = radeon_cs_end; + winsys->flush_cs = radeon_r300_flush_cs; + + return winsys; +} diff --git a/src/gallium/winsys/drm/radeon/radeon_r300.h b/src/gallium/winsys/drm/radeon/radeon_r300.h new file mode 100644 index 0000000000..8ed95a3a9b --- /dev/null +++ b/src/gallium/winsys/drm/radeon/radeon_r300.h @@ -0,0 +1,34 @@ +/* + * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +/* XXX WTF is this! I shouldn't have to include those first three! FUCK! */ +#include <stdint.h> +#include <stdlib.h> +#include "drm.h" +#include "radeon_drm.h" +#include "radeon_cs.h" + +#include "r300_winsys.h" + +#include "radeon_buffer.h" + +struct r300_winsys* radeon_create_r300_winsys(int fd); diff --git a/src/gallium/winsys/drm/radeon/radeon_screen.c b/src/gallium/winsys/drm/radeon/radeon_screen.c new file mode 100644 index 0000000000..e31caff0bf --- /dev/null +++ b/src/gallium/winsys/drm/radeon/radeon_screen.c @@ -0,0 +1,288 @@ +/* + * Copyright © 2008 Jérôme Glisse + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ +/* + * Authors: + * Jérôme Glisse <glisse@freedesktop.org> + */ +#include <stdio.h> +#include "pipe/p_screen.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_context.h" +#include "pipe/p_state.h" +#include "state_tracker/st_public.h" +#include "state_tracker/st_context.h" +#include "utils.h" +#include "xf86drm.h" +#include "drm.h" +#include "dri_util.h" +#include "radeon_screen.h" +#include "radeon_context.h" +#include "radeon_buffer.h" +#include "radeon_bo.h" +#include "radeon_bo_gem.h" +#include "radeon_drm.h" + +extern const struct dri_extension radeon_card_extensions[]; + +static const __DRIextension *radeon_screen_extensions[] = { + &driReadDrawableExtension, + &driCopySubBufferExtension.base, + &driSwapControlExtension.base, + &driFrameTrackingExtension.base, + &driMediaStreamCounterExtension.base, + NULL +}; + +static __DRIconfig **radeon_fill_in_modes(unsigned pixel_bits, + unsigned depth_bits, + GLboolean have_back_buffer) +{ + __DRIconfig **configs; + unsigned depth_buffer_factor; + unsigned back_buffer_factor; + unsigned num_modes; + GLenum fb_format; + GLenum fb_type; + uint8_t depth_bits_array[3]; + uint8_t stencil_bits_array[3]; + uint8_t msaa_samples_array[1]; + /* TODO: pageflipping ? */ + static const GLenum back_buffer_modes[] = { + GLX_NONE, GLX_SWAP_UNDEFINED_OML, GLX_SWAP_COPY_OML + }; + + stencil_bits_array[0] = 0; + stencil_bits_array[1] = 0; + if (depth_bits == 24) { + stencil_bits_array[2] = 8; + num_modes = 3; + } + + depth_bits_array[0] = 0; + depth_bits_array[1] = depth_bits; + depth_bits_array[2] = depth_bits; + depth_buffer_factor = (depth_bits == 24) ? 3 : 2; + + back_buffer_factor = (have_back_buffer) ? 3 : 1; + + msaa_samples_array[0] = 0; + + if (pixel_bits == 16) { + fb_format = GL_RGB; + fb_type = GL_UNSIGNED_SHORT_5_6_5; + } else { + fb_format = GL_BGRA; + fb_type = GL_UNSIGNED_INT_8_8_8_8_REV; + } + + configs = (__DRIconfig **)driCreateConfigs(fb_format, + fb_type, + depth_bits_array, + stencil_bits_array, + depth_buffer_factor, + back_buffer_modes, + back_buffer_factor, + msaa_samples_array, + 1); + if (configs == NULL) { + fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", + __FILE__, __LINE__); + return NULL; + } + return configs; +} + +static void radeon_screen_destroy(__DRIscreenPrivate *dri_screen) +{ + struct radeon_screen *radeon_screen = (struct radeon_screen*)dri_screen->private; + + radeon_bo_manager_gem_dtor(radeon_screen->bom); + dri_screen = NULL; + free(radeon_screen); +} + +static const __DRIconfig **radeon_screen_init(__DRIscreenPrivate *dri_screen) +{ + struct radeon_screen *radeon_screen; + + /* Calling driInitExtensions here, with a NULL context pointer, + * does not actually enable the extensions. It just makes sure + * that all the dispatch offsets for all the extensions that + * *might* be enables are known. This is needed because the + * dispatch offsets need to be known when _mesa_context_create is + * called, but we can't enable the extensions until we have a + * context pointer. + * + * Hello chicken. Hello egg. How are you two today? + */ + driInitExtensions(NULL, radeon_card_extensions, GL_FALSE); + + radeon_screen = calloc(1, sizeof(struct radeon_screen)); + if (radeon_screen == NULL) { + fprintf(stderr, "\nERROR! Allocating private area failed\n"); + return NULL; + } + dri_screen->private = (void*)radeon_screen; + dri_screen->extensions = radeon_screen_extensions; + radeon_screen->dri_screen = dri_screen; + + radeon_screen->bom = radeon_bo_manager_gem_ctor(dri_screen->fd); + if (radeon_screen->bom == NULL) { + radeon_screen_destroy(dri_screen); + return NULL; + } + + return driConcatConfigs(radeon_fill_in_modes(16, 16, 1), + radeon_fill_in_modes(32, 24, 1)); +} + +static boolean radeon_buffer_create(__DRIscreenPrivate *dri_screen, + __DRIdrawablePrivate *dri_drawable, + const __GLcontextModes *visual, + boolean is_pixmap) +{ + if (is_pixmap) { + /* TODO: implement ? */ + return GL_FALSE; + } else { + enum pipe_format color_format, depth_format, stencil_format; + struct radeon_framebuffer *radeon_fb; + + radeon_fb = calloc(1, sizeof(struct radeon_framebuffer)); + if (radeon_fb == NULL) { + return GL_FALSE; + } + + switch (visual->redBits) { + case 5: + color_format = PIPE_FORMAT_R5G6B5_UNORM; + break; + default: + color_format = PIPE_FORMAT_A8R8G8B8_UNORM; + break; + } + + switch (visual->depthBits) { + case 24: + depth_format = PIPE_FORMAT_S8Z24_UNORM; + break; + case 16: + depth_format = PIPE_FORMAT_Z16_UNORM; + break; + default: + depth_format = PIPE_FORMAT_NONE; + break; + } + + switch (visual->stencilBits) { + case 8: + /* force depth format */ + depth_format = PIPE_FORMAT_S8Z24_UNORM; + stencil_format = PIPE_FORMAT_S8Z24_UNORM; + break; + default: + stencil_format = PIPE_FORMAT_NONE; + break; + } + + radeon_fb->st_framebuffer = st_create_framebuffer(visual, + color_format, + depth_format, + stencil_format, + dri_drawable->w, + dri_drawable->h, + (void*)radeon_fb); + if (radeon_fb->st_framebuffer == NULL) { + free(radeon_fb); + return GL_FALSE; + } + dri_drawable->driverPrivate = (void *) radeon_fb; + + radeon_fb->attachments = (1 << __DRI_BUFFER_FRONT_LEFT); + if (visual->doubleBufferMode) { + radeon_fb->attachments |= (1 << __DRI_BUFFER_BACK_LEFT); + } + if (visual->depthBits || visual->stencilBits) { + radeon_fb->attachments |= (1 << __DRI_BUFFER_DEPTH); + } + + return GL_TRUE; + } +} + +static void radeon_buffer_destroy(__DRIdrawablePrivate * dri_drawable) +{ + struct radeon_framebuffer *radeon_fb; + + radeon_fb = dri_drawable->driverPrivate; + assert(radeon_fb->st_framebuffer); + st_unreference_framebuffer(radeon_fb->st_framebuffer); + free(radeon_fb); +} + +static void radeon_swap_buffers(__DRIdrawablePrivate *dri_drawable) +{ + struct radeon_framebuffer *radeon_fb; + struct pipe_surface *back_surf = NULL; + + radeon_fb = dri_drawable->driverPrivate; + assert(radeon_fb); + assert(radeon_fb->st_framebuffer); + + st_get_framebuffer_surface(radeon_fb->st_framebuffer, + ST_SURFACE_BACK_LEFT, + &back_surf); + if (back_surf) { + st_notify_swapbuffers(radeon_fb->st_framebuffer); + /* TODO: do we want to do anythings ? */ + st_notify_swapbuffers_complete(radeon_fb->st_framebuffer); + } +} + +/** + * Called via glXCopySubBufferMESA() to copy a subrect of the back + * buffer to the front buffer/screen. + */ +static void radeon_copy_sub_buffer(__DRIdrawablePrivate *dri_drawable, + int x, int y, int w, int h) +{ + /* TODO: ... */ +} + +const struct __DriverAPIRec driDriverAPI = { + .InitScreen = NULL, + .DestroyScreen = radeon_screen_destroy, + .CreateContext = radeon_context_create, + .DestroyContext = radeon_context_destroy, + .CreateBuffer = radeon_buffer_create, + .DestroyBuffer = radeon_buffer_destroy, + .SwapBuffers = radeon_swap_buffers, + .MakeCurrent = radeon_context_bind, + .UnbindContext = radeon_context_unbind, + .CopySubBuffer = radeon_copy_sub_buffer, + .InitScreen2 = radeon_screen_init, +}; diff --git a/src/gallium/winsys/drm/radeon/radeon_screen.h b/src/gallium/winsys/drm/radeon/radeon_screen.h new file mode 100644 index 0000000000..01b7fa6531 --- /dev/null +++ b/src/gallium/winsys/drm/radeon/radeon_screen.h @@ -0,0 +1,41 @@ +/* + * Copyright © 2008 Jérôme Glisse + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ +/* + * Authors: + * Jérôme Glisse <glisse@freedesktop.org> + */ +#ifndef RADEON_SCREEN_H +#define RADEON_SCREEN_H + +#include "dri_util.h" +#include "radeon_bo.h" + +struct radeon_screen { + __DRIscreenPrivate *dri_screen; + struct radeon_bo_manager *bom; +}; + +#endif diff --git a/src/gallium/winsys/drm/intel/dri/intel_winsys_softpipe.c b/src/gallium/winsys/drm/radeon/radeon_winsys_softpipe.c index 20920a2052..8402e1fa5a 100644 --- a/src/gallium/winsys/drm/intel/dri/intel_winsys_softpipe.c +++ b/src/gallium/winsys/drm/radeon/radeon_winsys_softpipe.c @@ -1,8 +1,8 @@ /************************************************************************** - * + * * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA * All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -10,73 +10,68 @@ * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * - * + * + * **************************************************************************/ /* * Authors: Keith Whitwell <keithw-at-tungstengraphics-dot-com> */ - -#include "intel_context.h" -#include "intel_winsys_softpipe.h" +#include <stdio.h> +#include "imports.h" #include "pipe/p_defines.h" #include "pipe/p_format.h" -#include "util/u_memory.h" #include "softpipe/sp_winsys.h" +#include "radeon_context.h" +#include "radeon_winsys_softpipe.h" - -struct intel_softpipe_winsys { - struct softpipe_winsys sws; - struct intel_context *intel; +struct radeon_softpipe_winsys { + struct softpipe_winsys sp_winsys; + struct radeon_context *radeon_context; }; /** * Return list of surface formats supported by this driver. */ -static boolean -intel_is_format_supported(struct softpipe_winsys *sws, - enum pipe_format format) +static boolean radeon_is_format_supported(struct softpipe_winsys *sws, uint format) { - switch(format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - case PIPE_FORMAT_R5G6B5_UNORM: - case PIPE_FORMAT_S8Z24_UNORM: - return TRUE; - default: - return FALSE; - } + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_R5G6B5_UNORM: + case PIPE_FORMAT_Z24S8_UNORM: + return TRUE; + default: + break; + }; + return FALSE; } - -/** - * Create rendering context which uses software rendering. - */ -struct pipe_context * -intel_create_softpipe( struct intel_context *intel, - struct pipe_winsys *winsys ) +struct pipe_context *radeon_create_softpipe(struct radeon_context *radeon_context) { - struct intel_softpipe_winsys *isws = CALLOC_STRUCT( intel_softpipe_winsys ); - struct pipe_screen *screen = softpipe_create_screen(winsys); + struct radeon_softpipe_winsys *radeon_sp_ws; + struct pipe_screen *pipe_screen; - /* Fill in this struct with callbacks that softpipe will need to - * communicate with the window system, buffer manager, etc. - */ - isws->sws.is_format_supported = intel_is_format_supported; - isws->intel = intel; + pipe_screen = softpipe_create_screen(radeon_context->pipe_winsys); - /* Create the softpipe context: - */ - return softpipe_create( screen, winsys, &isws->sws ); + radeon_sp_ws = CALLOC_STRUCT(radeon_softpipe_winsys); + if (radeon_sp_ws == NULL) { + return NULL; + } + radeon_context->pipe_screen = pipe_screen; + radeon_sp_ws->radeon_context = radeon_context; + radeon_sp_ws->sp_winsys.is_format_supported = radeon_is_format_supported; + return softpipe_create(pipe_screen, + radeon_context->pipe_winsys, + &radeon_sp_ws->sp_winsys); } diff --git a/src/gallium/winsys/drm/radeon/radeon_winsys_softpipe.h b/src/gallium/winsys/drm/radeon/radeon_winsys_softpipe.h new file mode 100644 index 0000000000..519eab769c --- /dev/null +++ b/src/gallium/winsys/drm/radeon/radeon_winsys_softpipe.h @@ -0,0 +1,37 @@ +/* + * Copyright © 2008 Jérôme Glisse + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + */ +/* + * Authors: + * Jérôme Glisse <glisse@freedesktop.org> + */ +#ifndef RADEON_WINSYS_SOFTPIPE_H +#define RADEON_WINSYS_SOFTPIPE_H + +#include "radeon_context.h" + +struct pipe_context *radeon_create_softpipe(struct radeon_context *radeon_context); + +#endif diff --git a/src/gallium/winsys/egl_xlib/Makefile b/src/gallium/winsys/egl_xlib/Makefile index 76f1b56da4..02ac47caa4 100644 --- a/src/gallium/winsys/egl_xlib/Makefile +++ b/src/gallium/winsys/egl_xlib/Makefile @@ -34,7 +34,7 @@ LIBS = \ # mesa code, as done for ES 1.x, 2.x, OpenVG, etc) UNUSED_LIBS = \ $(TOP)/src/mesa/libglapi.a \ - $(TOP)/src/mesa/libmesa.a \ + $(TOP)/src/mesa/libmesagallium.a \ LOCAL_CFLAGS = -D_EGL_PLATFORM_X=1 diff --git a/src/gallium/winsys/egl_xlib/egl_xlib.c b/src/gallium/winsys/egl_xlib/egl_xlib.c index e9f821d276..9ceb67d2ac 100644 --- a/src/gallium/winsys/egl_xlib/egl_xlib.c +++ b/src/gallium/winsys/egl_xlib/egl_xlib.c @@ -38,9 +38,10 @@ #include "pipe/p_compiler.h" #include "pipe/p_format.h" #include "pipe/p_state.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "util/u_memory.h" #include "softpipe/sp_winsys.h" +#include "softpipe/sp_texture.h" #include "eglconfig.h" #include "eglconfigutil.h" @@ -276,6 +277,7 @@ display_surface(struct pipe_winsys *pws, struct pipe_surface *psurf, struct xlib_egl_surface *xsurf) { + struct softpipe_texture *spt = softpipe_texture(psurf->texture); XImage *ximage; void *data; @@ -292,13 +294,13 @@ display_surface(struct pipe_winsys *pws, assert(ximage->format); assert(ximage->bitmap_unit); - data = pws->buffer_map(pws, psurf->buffer, 0); + data = pws->buffer_map(pws, spt->buffer, 0); /* update XImage's fields */ ximage->data = data; ximage->width = psurf->width; ximage->height = psurf->height; - ximage->bytes_per_line = psurf->stride; + ximage->bytes_per_line = spt->stride[psurf->level]; XPutImage(xsurf->Dpy, xsurf->Win, xsurf->Gc, ximage, 0, 0, 0, 0, psurf->width, psurf->height); @@ -308,7 +310,7 @@ display_surface(struct pipe_winsys *pws, ximage->data = NULL; XDestroyImage(ximage); - pws->buffer_unmap(pws, psurf->buffer); + pws->buffer_unmap(pws, spt->buffer); } @@ -537,7 +539,7 @@ xlib_eglDestroySurface(_EGLDriver *drv, EGLDisplay dpy, EGLSurface surface) } else { XFreeGC(surf->Dpy, surf->Gc); - st_unreference_framebuffer(&surf->Framebuffer); + st_unreference_framebuffer(surf->Framebuffer); free(surf); } return EGL_TRUE; @@ -559,8 +561,10 @@ xlib_eglSwapBuffers(_EGLDriver *drv, EGLDisplay dpy, EGLSurface draw) { struct xlib_egl_surface *xsurf = lookup_surface(draw); struct pipe_winsys *pws = xsurf->winsys; - struct pipe_surface *psurf = - st_get_framebuffer_surface(xsurf->Framebuffer, ST_SURFACE_BACK_LEFT); + struct pipe_surface *psurf; + + st_get_framebuffer_surface(xsurf->Framebuffer, ST_SURFACE_BACK_LEFT, + &psurf); st_notify_swapbuffers(xsurf->Framebuffer); diff --git a/src/gallium/winsys/egl_xlib/sw_winsys.c b/src/gallium/winsys/egl_xlib/sw_winsys.c index 2fd190da52..739bfa1c1a 100644 --- a/src/gallium/winsys/egl_xlib/sw_winsys.c +++ b/src/gallium/winsys/egl_xlib/sw_winsys.c @@ -35,7 +35,7 @@ */ -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_state.h" #include "pipe/p_inlines.h" #include "util/u_math.h" @@ -161,65 +161,25 @@ buffer_destroy(struct pipe_winsys *pws, struct pipe_buffer *buf) } -/** - * Called via winsys->surface_alloc() to create new surfaces. - */ -static struct pipe_surface * -surface_alloc(struct pipe_winsys *ws) -{ - struct pipe_surface *surf = CALLOC_STRUCT(pipe_surface); - if (!surf) - return NULL; - - surf->refcount = 1; - surf->winsys = ws; - - return surf; -} - - -static int -surface_alloc_storage(struct pipe_winsys *winsys, - struct pipe_surface *surf, +static struct pipe_buffer * +surface_buffer_create(struct pipe_winsys *winsys, unsigned width, unsigned height, enum pipe_format format, - unsigned flags, - unsigned tex_usage) + unsigned usage, + unsigned *stride) { const unsigned alignment = 64; + struct pipe_format_block block; + unsigned nblocksx, nblocksy; - surf->width = width; - surf->height = height; - surf->format = format; - pf_get_block(surf->format, &surf->block); - surf->nblocksx = pf_get_nblocksx(&surf->block, width); - surf->nblocksy = pf_get_nblocksy(&surf->block, height); - surf->stride = round_up(surf->nblocksx * surf->block.size, alignment); - surf->usage = flags; - - assert(!surf->buffer); - surf->buffer = winsys->buffer_create(winsys, alignment, - PIPE_BUFFER_USAGE_PIXEL, - surf->stride * height); - if(!surf->buffer) - return -1; - - return 0; -} - + pf_get_block(format, &block); + nblocksx = pf_get_nblocksx(&block, width); + nblocksy = pf_get_nblocksy(&block, height); + *stride = round_up(nblocksx * block.size, alignment); -static void -surface_release(struct pipe_winsys *winsys, struct pipe_surface **s) -{ - struct pipe_surface *surf = *s; - assert(!surf->texture); - surf->refcount--; - if (surf->refcount == 0) { - if (surf->buffer) - winsys_buffer_reference(winsys, &surf->buffer, NULL); - free(surf); - } - *s = NULL; + return winsys->buffer_create(winsys, alignment, + usage, + *stride * nblocksy); } @@ -268,9 +228,7 @@ create_sw_winsys(void) ws->Base.buffer_unmap = buffer_unmap; ws->Base.buffer_destroy = buffer_destroy; - ws->Base.surface_alloc = surface_alloc; - ws->Base.surface_alloc_storage = surface_alloc_storage; - ws->Base.surface_release = surface_release; + ws->Base.surface_buffer_create = surface_buffer_create; ws->Base.fence_reference = fence_reference; ws->Base.fence_signalled = fence_signalled; diff --git a/src/gallium/winsys/g3dvl/nouveau/Makefile b/src/gallium/winsys/g3dvl/nouveau/Makefile new file mode 100644 index 0000000000..2997f6b79c --- /dev/null +++ b/src/gallium/winsys/g3dvl/nouveau/Makefile @@ -0,0 +1,50 @@ +TARGET = libnouveau_dri.so +GALLIUMDIR = ../../.. +DRMDIR ?= /usr +DRIDIR = ../../../../driclient + +OBJECTS = nouveau_screen_vl.o nouveau_context_vl.o nouveau_swapbuffers.o + +CFLAGS += -g -Wall -Werror=implicit-function-declaration -fPIC \ + -I${GALLIUMDIR}/include \ + -I${GALLIUMDIR}/winsys/g3dvl \ + -I${GALLIUMDIR}/winsys/drm/nouveau \ + -I${DRMDIR}/include \ + -I${DRMDIR}/include/drm \ + -I${DRMDIR}/include/nouveau \ + -I${GALLIUMDIR}/drivers \ + -I${GALLIUMDIR}/auxiliary \ + -I${DRIDIR}/include + +LDFLAGS += -L${DRMDIR}/lib \ + -L${DRIDIR}/lib \ + -L${GALLIUMDIR}/winsys/drm/nouveau/common \ + -L${GALLIUMDIR}/auxiliary/draw \ + -L${GALLIUMDIR}/auxiliary/tgsi \ + -L${GALLIUMDIR}/auxiliary/translate \ + -L${GALLIUMDIR}/auxiliary/rtasm \ + -L${GALLIUMDIR}/auxiliary/cso_cache \ + -L${GALLIUMDIR}/drivers/nv04 \ + -L${GALLIUMDIR}/drivers/nv10 \ + -L${GALLIUMDIR}/drivers/nv20 \ + -L${GALLIUMDIR}/drivers/nv30 \ + -L${GALLIUMDIR}/drivers/nv40 \ + -L${GALLIUMDIR}/drivers/nv50 + +LIBS += -lnouveaudrm -ldriclient -ldrm_nouveau -ldrm -lnv04 -lnv10 -lnv20 -lnv30 -lnv40 -lnv50 -ldraw -ltgsi -ltranslate -lrtasm -lcso_cache -lm + +############################################# + +.PHONY = all clean libdriclient + +all: ${TARGET} + +${TARGET}: ${OBJECTS} libdriclient + $(CC) ${LDFLAGS} -shared -o $@ ${OBJECTS} ${LIBS} + +libdriclient: + cd ${DRIDIR}/src; ${MAKE} + +clean: + cd ${DRIDIR}/src; ${MAKE} clean + rm -rf ${OBJECTS} ${TARGET} diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_context_vl.c b/src/gallium/winsys/g3dvl/nouveau/nouveau_context_vl.c new file mode 100644 index 0000000000..dfc4905bc0 --- /dev/null +++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_context_vl.c @@ -0,0 +1,172 @@ +#include "nouveau_context_vl.h" +#include <pipe/p_defines.h> +#include <pipe/p_context.h> +#include <pipe/p_screen.h> +#include <util/u_memory.h> +#include <common/nouveau_dri.h> +#include <common/nouveau_local.h> +#include <common/nouveau_winsys_pipe.h> +#include "nouveau_screen_vl.h" + +/* +#ifdef DEBUG +static const struct dri_debug_control debug_control[] = { + { "bo", DEBUG_BO }, + { NULL, 0 } +}; +int __nouveau_debug = 0; +#endif +*/ + +int +nouveau_context_create(dri_context_t *dri_context) +{ + dri_screen_t *dri_screen; + struct nouveau_screen_vl *nv_screen; + struct nouveau_context_vl *nv; + + assert (dri_context); + + dri_screen = dri_context->dri_screen; + nv_screen = dri_screen->private; + nv = CALLOC_STRUCT(nouveau_context_vl); + + if (!nv) + return 1; + + if (nouveau_context_init(&nv_screen->base, dri_context->drm_context, + (drmLock*)&dri_screen->sarea->lock, NULL, &nv->base)) + { + FREE(nv); + return 1; + } + + dri_context->private = (void*)nv; + nv->dri_context = dri_context; + nv->nv_screen = nv_screen; + + /* + driParseConfigFiles(&nv->dri_option_cache, &nv_screen->option_cache, + nv->dri_screen->myNum, "nouveau"); +#ifdef DEBUG + __nouveau_debug = driParseDebugString(getenv("NOUVEAU_DEBUG"), + debug_control); +#endif + */ + + nv->base.nvc->pctx[nv->base.pctx_id]->priv = nv; + + return 0; +} + +void +nouveau_context_destroy(dri_context_t *dri_context) +{ + struct nouveau_context_vl *nv = dri_context->private; + + assert(dri_context); + + nouveau_context_cleanup(&nv->base); + + FREE(nv); +} + +int +nouveau_context_bind(struct nouveau_context_vl *nv, dri_drawable_t *dri_drawable) +{ + assert(nv); + assert(dri_drawable); + + if (nv->dri_drawable != dri_drawable) + { + nv->dri_drawable = dri_drawable; + dri_drawable->private = nv; + } + + return 0; +} + +int +nouveau_context_unbind(struct nouveau_context_vl *nv) +{ + assert(nv); + + nv->dri_drawable = NULL; + + return 0; +} + +/* Show starts here */ + +int bind_pipe_drawable(struct pipe_context *pipe, Drawable drawable) +{ + struct nouveau_context_vl *nv; + dri_drawable_t *dri_drawable; + + assert(pipe); + + nv = pipe->priv; + + driCreateDrawable(nv->nv_screen->dri_screen, drawable, &dri_drawable); + + nouveau_context_bind(nv, dri_drawable); + + return 0; +} + +int unbind_pipe_drawable(struct pipe_context *pipe) +{ + assert (pipe); + + nouveau_context_unbind(pipe->priv); + + return 0; +} + +struct pipe_context* create_pipe_context(Display *display, int screen) +{ + dri_screen_t *dri_screen; + dri_framebuffer_t dri_framebuf; + dri_context_t *dri_context; + struct nouveau_context_vl *nv; + + assert(display); + + driCreateScreen(display, screen, &dri_screen, &dri_framebuf); + driCreateContext(dri_screen, XDefaultVisual(display, screen), &dri_context); + + nouveau_screen_create(dri_screen, &dri_framebuf); + nouveau_context_create(dri_context); + + nv = dri_context->private; + + return nv->base.nvc->pctx[nv->base.pctx_id]; +} + +int destroy_pipe_context(struct pipe_context *pipe) +{ + struct pipe_screen *screen; + struct pipe_winsys *winsys; + struct nouveau_context_vl *nv; + dri_screen_t *dri_screen; + dri_context_t *dri_context; + + assert(pipe); + + screen = pipe->screen; + winsys = pipe->winsys; + nv = pipe->priv; + dri_context = nv->dri_context; + dri_screen = dri_context->dri_screen; + + pipe->destroy(pipe); + screen->destroy(screen); + FREE(winsys); + + nouveau_context_destroy(dri_context); + nouveau_screen_destroy(dri_screen); + driDestroyContext(dri_context); + driDestroyScreen(dri_screen); + + return 0; +} diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_context_vl.h b/src/gallium/winsys/g3dvl/nouveau/nouveau_context_vl.h new file mode 100644 index 0000000000..1115c3130c --- /dev/null +++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_context_vl.h @@ -0,0 +1,39 @@ +#ifndef __NOUVEAU_CONTEXT_VL_H__ +#define __NOUVEAU_CONTEXT_VL_H__ + +#include <driclient.h> +#include <nouveau/nouveau_winsys.h> +#include <common/nouveau_context.h> + +/*#include "xmlconfig.h"*/ + +struct nouveau_context_vl { + struct nouveau_context base; + struct nouveau_screen_vl *nv_screen; + dri_context_t *dri_context; + dri_drawable_t *dri_drawable; + unsigned int last_stamp; + /*driOptionCache dri_option_cache;*/ + drm_context_t drm_context; + drmLock drm_lock; +}; + +extern int nouveau_context_create(dri_context_t *); +extern void nouveau_context_destroy(dri_context_t *); +extern int nouveau_context_bind(struct nouveau_context_vl *, dri_drawable_t *); +extern int nouveau_context_unbind(struct nouveau_context_vl *); + +#ifdef DEBUG +extern int __nouveau_debug; + +#define DEBUG_BO (1 << 0) + +#define DBG(flag, ...) do { \ + if (__nouveau_debug & (DEBUG_##flag)) \ + NOUVEAU_ERR(__VA_ARGS__); \ +} while(0) +#else +#define DBG(flag, ...) +#endif + +#endif diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_screen_vl.c b/src/gallium/winsys/g3dvl/nouveau/nouveau_screen_vl.c new file mode 100644 index 0000000000..b7c74f8299 --- /dev/null +++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_screen_vl.c @@ -0,0 +1,88 @@ +#include "nouveau_screen_vl.h" +#include <util/u_memory.h> +#include <nouveau_drm.h> +#include <common/nouveau_dri.h> +#include <common/nouveau_local.h> + +#if NOUVEAU_DRM_HEADER_PATCHLEVEL != 12 +#error nouveau_drm.h version does not match expected version +#endif + +/* +PUBLIC const char __driConfigOptions[] = +DRI_CONF_BEGIN +DRI_CONF_END; +static const GLuint __driNConfigOptions = 0; +*/ + +int nouveau_check_dri_drm_ddx(dri_version_t *dri, dri_version_t *drm, dri_version_t *ddx) +{ + static const dri_version_t ddx_expected = {0, 0, NOUVEAU_DRM_HEADER_PATCHLEVEL}; + static const dri_version_t dri_expected = {4, 0, 0}; + static const dri_version_t drm_expected = {0, 0, NOUVEAU_DRM_HEADER_PATCHLEVEL}; + + assert(dri); + assert(drm); + assert(ddx); + + if (dri->major != dri_expected.major || dri->minor < dri_expected.minor) + { + NOUVEAU_ERR("Unexpected DRI version.\n"); + return 1; + } + if (drm->major != drm_expected.major || drm->minor < drm_expected.minor) + { + NOUVEAU_ERR("Unexpected DRM version.\n"); + return 1; + } + if (ddx->major != ddx_expected.major || ddx->minor < ddx_expected.minor) + { + NOUVEAU_ERR("Unexpected DDX version.\n"); + return 1; + } + + return 0; +} + +int +nouveau_screen_create(dri_screen_t *dri_screen, dri_framebuffer_t *dri_framebuf) +{ + struct nouveau_dri *nv_dri = dri_framebuf->private; + struct nouveau_screen_vl *nv_screen; + + assert(dri_screen); + assert(dri_framebuf); + + if (nouveau_check_dri_drm_ddx(&dri_screen->dri, &dri_screen->drm, &dri_screen->ddx)) + return 1; + + nv_screen = CALLOC_STRUCT(nouveau_screen_vl); + + if (!nv_screen) + return 1; + + if (nouveau_screen_init(nv_dri, dri_screen->fd, &nv_screen->base)) + { + FREE(nv_screen); + return 1; + } + + /* + driParseOptionInfo(&nv_screen->option_cache, + __driConfigOptions, __driNConfigOptions); + */ + + nv_screen->dri_screen = dri_screen; + dri_screen->private = (void*)nv_screen; + + return 0; +} + +void +nouveau_screen_destroy(dri_screen_t *dri_screen) +{ + struct nouveau_screen_vl *nv_screen = dri_screen->private; + + nouveau_screen_cleanup(&nv_screen->base); + FREE(nv_screen); +} diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_screen_vl.h b/src/gallium/winsys/g3dvl/nouveau/nouveau_screen_vl.h new file mode 100644 index 0000000000..0c1ceca6de --- /dev/null +++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_screen_vl.h @@ -0,0 +1,20 @@ +#ifndef __NOUVEAU_SCREEN_VL_H__ +#define __NOUVEAU_SCREEN_VL_H__ + +#include <driclient.h> +#include <common/nouveau_screen.h> + +/* TODO: Investigate using DRI options for interesting things */ +/*#include "xmlconfig.h"*/ + +struct nouveau_screen_vl +{ + struct nouveau_screen base; + dri_screen_t *dri_screen; + /*driOptionCache option_cache;*/ +}; + +int nouveau_screen_create(dri_screen_t *dri_screen, dri_framebuffer_t *dri_framebuf); +void nouveau_screen_destroy(dri_screen_t *dri_screen); + +#endif diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_swapbuffers.c b/src/gallium/winsys/g3dvl/nouveau/nouveau_swapbuffers.c new file mode 100644 index 0000000000..864be37871 --- /dev/null +++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_swapbuffers.c @@ -0,0 +1,94 @@ +#include <driclient.h> +#include <common/nouveau_local.h> +#include <common/nouveau_screen.h> +#include "nouveau_context_vl.h" +#include "nouveau_swapbuffers.h" + +void +nouveau_copy_buffer(dri_drawable_t *dri_drawable, struct pipe_surface *surf, + const drm_clip_rect_t *rect) +{ + struct nouveau_context_vl *nv = dri_drawable->private; + struct pipe_context *pipe = nv->base.nvc->pctx[nv->base.pctx_id]; + drm_clip_rect_t *pbox; + int nbox, i; + + LOCK_HARDWARE(&nv->base); + if (!dri_drawable->num_cliprects) { + UNLOCK_HARDWARE(&nv->base); + return; + } + pbox = dri_drawable->cliprects; + nbox = dri_drawable->num_cliprects; + + for (i = 0; i < nbox; i++, pbox++) { + int sx, sy, dx, dy, w, h; + + sx = pbox->x1 - dri_drawable->x; + sy = pbox->y1 - dri_drawable->y; + dx = pbox->x1; + dy = pbox->y1; + w = pbox->x2 - pbox->x1; + h = pbox->y2 - pbox->y1; + + pipe->surface_copy(pipe, FALSE, nv->base.frontbuffer, + dx, dy, surf, sx, sy, w, h); + } + + FIRE_RING(nv->base.nvc->channel); + UNLOCK_HARDWARE(&nv->base); +} + +void +nouveau_copy_sub_buffer(dri_drawable_t *dri_drawable, struct pipe_surface *surf, int x, int y, int w, int h) +{ + if (surf) { + drm_clip_rect_t rect; + rect.x1 = x; + rect.y1 = y; + rect.x2 = x + w; + rect.y2 = y + h; + + nouveau_copy_buffer(dri_drawable, surf, &rect); + } +} + +void +nouveau_swap_buffers(dri_drawable_t *dri_drawable, struct pipe_surface *surf) +{ + if (surf) + nouveau_copy_buffer(dri_drawable, surf, NULL); +} + +void +nouveau_flush_frontbuffer(struct pipe_winsys *pws, struct pipe_surface *surf, + void *context_private) +{ + struct nouveau_context_vl *nv; + dri_drawable_t *dri_drawable; + + assert(pws); + assert(surf); + assert(context_private); + + nv = context_private; + dri_drawable = nv->dri_drawable; + + nouveau_copy_buffer(dri_drawable, surf, NULL); +} + +void +nouveau_contended_lock(struct nouveau_context *nv) +{ + struct nouveau_context_vl *nv_vl = (struct nouveau_context_vl*)nv; + dri_drawable_t *dri_drawable = nv_vl->dri_drawable; + dri_screen_t *dri_screen = nv_vl->dri_context->dri_screen; + + /* If the window moved, may need to set a new cliprect now. + * + * NOTE: This releases and regains the hw lock, so all state + * checking must be done *after* this call: + */ + if (dri_drawable) + DRI_VALIDATE_DRAWABLE_INFO(dri_screen, dri_drawable); +} diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_swapbuffers.h b/src/gallium/winsys/g3dvl/nouveau/nouveau_swapbuffers.h new file mode 100644 index 0000000000..35e934adba --- /dev/null +++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_swapbuffers.h @@ -0,0 +1,10 @@ +#ifndef __NOUVEAU_SWAPBUFFERS_H__ +#define __NOUVEAU_SWAPBUFFERS_H__ + +extern void nouveau_copy_buffer(dri_drawable_t *, struct pipe_surface *, + const drm_clip_rect_t *); +extern void nouveau_copy_sub_buffer(dri_drawable_t *, struct pipe_surface *, + int x, int y, int w, int h); +extern void nouveau_swap_buffers(dri_drawable_t *, struct pipe_surface *); + +#endif diff --git a/src/gallium/winsys/g3dvl/vl_winsys.h b/src/gallium/winsys/g3dvl/vl_winsys.h new file mode 100644 index 0000000000..c83db28dd9 --- /dev/null +++ b/src/gallium/winsys/g3dvl/vl_winsys.h @@ -0,0 +1,14 @@ +#ifndef vl_winsys_h +#define vl_winsys_h + +#include <X11/Xlib.h> + +struct pipe_context; + +struct pipe_context* create_pipe_context(Display *display, int screen); +int destroy_pipe_context(struct pipe_context *pipe); +int bind_pipe_drawable(struct pipe_context *pipe, Drawable drawable); +int unbind_pipe_drawable(struct pipe_context *pipe); + +#endif + diff --git a/src/gallium/winsys/g3dvl/xsp_winsys.c b/src/gallium/winsys/g3dvl/xsp_winsys.c new file mode 100644 index 0000000000..40d683234f --- /dev/null +++ b/src/gallium/winsys/g3dvl/xsp_winsys.c @@ -0,0 +1,295 @@ +#include "vl_winsys.h" +#include <X11/Xutil.h> +#include <pipe/p_winsys.h> +#include <pipe/p_state.h> +#include <pipe/p_inlines.h> +#include <util/u_memory.h> +#include <softpipe/sp_winsys.h> + +/* pipe_winsys implementation */ + +struct xsp_pipe_winsys +{ + struct pipe_winsys base; + XImage fbimage; +}; + +struct xsp_context +{ + Display *display; + int screen; + Drawable drawable; + int drawable_bound; +}; + +struct xsp_buffer +{ + struct pipe_buffer base; + boolean is_user_buffer; + void *data; + void *mapped_data; +}; + +static struct pipe_buffer* xsp_buffer_create(struct pipe_winsys *pws, unsigned alignment, unsigned usage, unsigned size) +{ + struct xsp_buffer *buffer; + + assert(pws); + + buffer = calloc(1, sizeof(struct xsp_buffer)); + buffer->base.refcount = 1; + buffer->base.alignment = alignment; + buffer->base.usage = usage; + buffer->base.size = size; + buffer->data = align_malloc(size, alignment); + + return (struct pipe_buffer*)buffer; +} + +static struct pipe_buffer* xsp_user_buffer_create(struct pipe_winsys *pws, void *data, unsigned size) +{ + struct xsp_buffer *buffer; + + assert(pws); + + buffer = calloc(1, sizeof(struct xsp_buffer)); + buffer->base.refcount = 1; + buffer->base.size = size; + buffer->is_user_buffer = TRUE; + buffer->data = data; + + return (struct pipe_buffer*)buffer; +} + +static void* xsp_buffer_map(struct pipe_winsys *pws, struct pipe_buffer *buffer, unsigned flags) +{ + struct xsp_buffer *xsp_buf = (struct xsp_buffer*)buffer; + + assert(pws); + assert(buffer); + + xsp_buf->mapped_data = xsp_buf->data; + + return xsp_buf->mapped_data; +} + +static void xsp_buffer_unmap(struct pipe_winsys *pws, struct pipe_buffer *buffer) +{ + struct xsp_buffer *xsp_buf = (struct xsp_buffer*)buffer; + + assert(pws); + assert(buffer); + + xsp_buf->mapped_data = NULL; +} + +static void xsp_buffer_destroy(struct pipe_winsys *pws, struct pipe_buffer *buffer) +{ + struct xsp_buffer *xsp_buf = (struct xsp_buffer*)buffer; + + assert(pws); + assert(buffer); + + if (!xsp_buf->is_user_buffer) + align_free(xsp_buf->data); + + free(xsp_buf); +} + +/* Borrowed from Mesa's xm_winsys */ +static unsigned int round_up(unsigned n, unsigned multiple) +{ + return (n + multiple - 1) & ~(multiple - 1); +} + +static struct pipe_buffer* xsp_surface_buffer_create +( + struct pipe_winsys *pws, + unsigned width, + unsigned height, + enum pipe_format format, + unsigned usage, + unsigned *stride +) +{ + const unsigned int ALIGNMENT = 1; + struct pipe_format_block block; + unsigned nblocksx, nblocksy; + + pf_get_block(format, &block); + nblocksx = pf_get_nblocksx(&block, width); + nblocksy = pf_get_nblocksy(&block, height); + *stride = round_up(nblocksx * block.size, ALIGNMENT); + + return winsys->buffer_create(winsys, ALIGNMENT, + usage, + *stride * nblocksy); +} + +static void xsp_fence_reference(struct pipe_winsys *pws, struct pipe_fence_handle **ptr, struct pipe_fence_handle *fence) +{ + assert(pws); + assert(ptr); + assert(fence); +} + +static int xsp_fence_signalled(struct pipe_winsys *pws, struct pipe_fence_handle *fence, unsigned flag) +{ + assert(pws); + assert(fence); + + return 0; +} + +static int xsp_fence_finish(struct pipe_winsys *pws, struct pipe_fence_handle *fence, unsigned flag) +{ + assert(pws); + assert(fence); + + return 0; +} + +static void xsp_flush_frontbuffer(struct pipe_winsys *pws, struct pipe_surface *surface, void *context_private) +{ + struct xsp_pipe_winsys *xsp_winsys; + struct xsp_context *xsp_context; + + assert(pws); + assert(surface); + assert(context_private); + + xsp_winsys = (struct xsp_pipe_winsys*)pws; + xsp_context = (struct xsp_context*)context_private; + + if (!xsp_context->drawable_bound) + return; + + xsp_winsys->fbimage.width = surface->width; + xsp_winsys->fbimage.height = surface->height; + xsp_winsys->fbimage.bytes_per_line = surface->width * (xsp_winsys->fbimage.bits_per_pixel >> 3); + xsp_winsys->fbimage.data = pipe_surface_map(surface, 0); + + XPutImage + ( + xsp_context->display, + xsp_context->drawable, + XDefaultGC(xsp_context->display, xsp_context->screen), + &xsp_winsys->fbimage, + 0, + 0, + 0, + 0, + surface->width, + surface->height + ); + XFlush(xsp_context->display); + pipe_surface_unmap(surface); +} + +static const char* xsp_get_name(struct pipe_winsys *pws) +{ + assert(pws); + return "X11 SoftPipe"; +} + +/* Show starts here */ + +int bind_pipe_drawable(struct pipe_context *pipe, Drawable drawable) +{ + struct xsp_context *xsp_context; + + assert(pipe); + + xsp_context = pipe->priv; + xsp_context->drawable = drawable; + xsp_context->drawable_bound = 1; + + return 0; +} + +int unbind_pipe_drawable(struct pipe_context *pipe) +{ + struct xsp_context *xsp_context; + + assert(pipe); + + xsp_context = pipe->priv; + xsp_context->drawable_bound = 0; + + return 0; +} + +struct pipe_context* create_pipe_context(Display *display, int screen) +{ + struct xsp_pipe_winsys *xsp_winsys; + struct xsp_context *xsp_context; + struct pipe_screen *sp_screen; + struct pipe_context *sp_pipe; + + assert(display); + + xsp_winsys = calloc(1, sizeof(struct xsp_pipe_winsys)); + xsp_winsys->base.buffer_create = xsp_buffer_create; + xsp_winsys->base.user_buffer_create = xsp_user_buffer_create; + xsp_winsys->base.buffer_map = xsp_buffer_map; + xsp_winsys->base.buffer_unmap = xsp_buffer_unmap; + xsp_winsys->base.buffer_destroy = xsp_buffer_destroy; + xsp_winsys->base.surface_buffer_create = xsp_surface_buffer_create; + xsp_winsys->base.fence_reference = xsp_fence_reference; + xsp_winsys->base.fence_signalled = xsp_fence_signalled; + xsp_winsys->base.fence_finish = xsp_fence_finish; + xsp_winsys->base.flush_frontbuffer = xsp_flush_frontbuffer; + xsp_winsys->base.get_name = xsp_get_name; + + { + /* XXX: Can't use the returned XImage* directly, + since we don't have control over winsys destruction + and we wouldn't be able to free it */ + XImage *template = XCreateImage + ( + display, + XDefaultVisual(display, XDefaultScreen(display)), + XDefaultDepth(display, XDefaultScreen(display)), + ZPixmap, + 0, + NULL, + 0, /* Don't know the width and height until flush_frontbuffer */ + 0, + 32, + 0 + ); + + memcpy(&xsp_winsys->fbimage, template, sizeof(XImage)); + XInitImage(&xsp_winsys->fbimage); + + XDestroyImage(template); + } + + sp_screen = softpipe_create_screen((struct pipe_winsys*)xsp_winsys); + sp_pipe = softpipe_create(sp_screen, (struct pipe_winsys*)xsp_winsys, NULL); + + xsp_context = calloc(1, sizeof(struct xsp_context)); + xsp_context->display = display; + xsp_context->screen = screen; + + sp_pipe->priv = xsp_context; + + return sp_pipe; +} + +int destroy_pipe_context(struct pipe_context *pipe) +{ + struct pipe_screen *screen; + struct pipe_winsys *winsys; + + assert(pipe); + + screen = pipe->screen; + winsys = pipe->winsys; + free(pipe->priv); + pipe->destroy(pipe); + screen->destroy(screen); + free(winsys); + + return 0; +} diff --git a/src/gallium/winsys/gdi/SConscript b/src/gallium/winsys/gdi/SConscript index b463fa6505..72b5df8ca2 100644 --- a/src/gallium/winsys/gdi/SConscript +++ b/src/gallium/winsys/gdi/SConscript @@ -8,26 +8,28 @@ if env['platform'] == 'windows': env = env.Clone() env.Append(CPPPATH = [ - '#src/mesa/state_tracker/wgl', + '#src/gallium/state_trackers/wgl', ]) - env.Append(CPPDEFINES = [ + env.Append(LIBS = [ + 'gdi32', + 'user32', + 'kernel32', ]) sources = [ - '#src/mesa/state_tracker/wgl/opengl32.def', 'gdi_softpipe_winsys.c', ] + + if env['toolchain'] == 'crossmingw': + sources += ['#src/gallium/state_trackers/wgl/opengl32.mingw.def'] + else: + sources += ['#src/gallium/state_trackers/wgl/opengl32.def'] drivers = [ softpipe, ] - env.Append(LIBS = [ - 'gdi32', - 'user32' - ]) - env.SharedLibrary( target ='opengl32', source = sources, diff --git a/src/gallium/winsys/gdi/gdi_softpipe_winsys.c b/src/gallium/winsys/gdi/gdi_softpipe_winsys.c index e981b4c5cd..2d961f7087 100644 --- a/src/gallium/winsys/gdi/gdi_softpipe_winsys.c +++ b/src/gallium/winsys/gdi/gdi_softpipe_winsys.c @@ -38,14 +38,15 @@ #include <windows.h> -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_format.h" #include "pipe/p_context.h" #include "pipe/p_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" #include "softpipe/sp_winsys.h" -#include "stw_winsys.h" +#include "softpipe/sp_texture.h" +#include "shared/stw_winsys.h" struct gdi_softpipe_buffer @@ -161,63 +162,25 @@ round_up(unsigned n, unsigned multiple) } -static int -gdi_softpipe_surface_alloc_storage(struct pipe_winsys *winsys, - struct pipe_surface *surf, +static struct pipe_buffer * +gdi_softpipe_surface_buffer_create(struct pipe_winsys *winsys, unsigned width, unsigned height, enum pipe_format format, - unsigned flags, - unsigned tex_usage) + unsigned usage, + unsigned *stride) { const unsigned alignment = 64; + struct pipe_format_block block; + unsigned nblocksx, nblocksy; - surf->width = width; - surf->height = height; - surf->format = format; - pf_get_block(format, &surf->block); - surf->nblocksx = pf_get_nblocksx(&surf->block, width); - surf->nblocksy = pf_get_nblocksy(&surf->block, height); - surf->stride = round_up(surf->nblocksx * surf->block.size, alignment); - surf->usage = flags; - - assert(!surf->buffer); - surf->buffer = winsys->buffer_create(winsys, alignment, - PIPE_BUFFER_USAGE_PIXEL, - surf->stride * surf->nblocksy); - if(!surf->buffer) - return -1; + pf_get_block(format, &block); + nblocksx = pf_get_nblocksx(&block, width); + nblocksy = pf_get_nblocksy(&block, height); + *stride = round_up(nblocksx * block.size, alignment); - return 0; -} - - -static struct pipe_surface * -gdi_softpipe_surface_alloc(struct pipe_winsys *winsys) -{ - struct pipe_surface *surface = CALLOC_STRUCT(pipe_surface); - - assert(winsys); - - surface->refcount = 1; - surface->winsys = winsys; - - return surface; -} - - -static void -gdi_softpipe_surface_release(struct pipe_winsys *winsys, - struct pipe_surface **s) -{ - struct pipe_surface *surf = *s; - assert(!surf->texture); - surf->refcount--; - if (surf->refcount == 0) { - if (surf->buffer) - winsys_buffer_reference(winsys, &surf->buffer, NULL); - free(surf); - } - *s = NULL; + return winsys->buffer_create(winsys, alignment, + usage, + *stride * nblocksy); } @@ -281,9 +244,7 @@ gdi_softpipe_screen_create(void) winsys->buffer_unmap = gdi_softpipe_buffer_unmap; winsys->buffer_destroy = gdi_softpipe_buffer_destroy; - winsys->surface_alloc = gdi_softpipe_surface_alloc; - winsys->surface_alloc_storage = gdi_softpipe_surface_alloc_storage; - winsys->surface_release = gdi_softpipe_surface_release; + winsys->surface_buffer_create = gdi_softpipe_surface_buffer_create; winsys->fence_reference = gdi_softpipe_fence_reference; winsys->fence_signalled = gdi_softpipe_fence_signalled; @@ -308,18 +269,21 @@ gdi_softpipe_context_create(struct pipe_screen *screen) static void -gdi_softpipe_flush_frontbuffer(struct pipe_winsys *winsys, +gdi_softpipe_flush_frontbuffer(struct pipe_screen *screen, struct pipe_surface *surface, HDC hDC) { + struct softpipe_texture *texture; struct gdi_softpipe_buffer *buffer; BITMAPINFO bmi; - buffer = gdi_softpipe_buffer(surface->buffer); + texture = softpipe_texture(surface->texture); + + buffer = gdi_softpipe_buffer(texture->buffer); memset(&bmi, 0, sizeof(BITMAPINFO)); bmi.bmiHeader.biSize = sizeof(BITMAPINFOHEADER); - bmi.bmiHeader.biWidth = surface->stride / pf_get_size(surface->format); + bmi.bmiHeader.biWidth = texture->stride[surface->level] / pf_get_size(surface->format); bmi.bmiHeader.biHeight= -(long)surface->height; bmi.bmiHeader.biPlanes = 1; bmi.bmiHeader.biBitCount = pf_get_bits(surface->format); @@ -356,4 +320,4 @@ DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpReserved) break; } return TRUE; -}
\ No newline at end of file +} diff --git a/src/gallium/winsys/xlib/Makefile b/src/gallium/winsys/xlib/Makefile index 11c7632411..bb187cc14a 100644 --- a/src/gallium/winsys/xlib/Makefile +++ b/src/gallium/winsys/xlib/Makefile @@ -19,16 +19,24 @@ INCLUDE_DIRS = \ -I$(TOP)/src/mesa/main \ -I$(TOP)/src/gallium/include \ -I$(TOP)/src/gallium/drivers \ + -I$(TOP)/src/gallium/state_trackers/glx/xlib \ -I$(TOP)/src/gallium/auxiliary +DEFINES += \ + -DGALLIUM_SOFTPIPE \ + -DGALLIUM_TRACE \ + -DGALLIUM_BRW +#-DGALLIUM_CELL will be defined by the config */ + XLIB_WINSYS_SOURCES = \ - glxapi.c \ - fakeglx.c \ - xfonts.c \ - xm_api.c \ - xm_winsys.c \ - xm_winsys_aub.c \ - brw_aub.c + xlib.c \ + xlib_cell.c \ + xlib_brw_aub.c \ + xlib_brw_context.c \ + xlib_brw_screen.c \ + xlib_softpipe.c \ + xlib_trace.c + XLIB_WINSYS_OBJECTS = $(XLIB_WINSYS_SOURCES:.c=.o) @@ -37,8 +45,9 @@ XLIB_WINSYS_OBJECTS = $(XLIB_WINSYS_SOURCES:.c=.o) LIBS = \ $(GALLIUM_DRIVERS) \ + $(TOP)/src/gallium/state_trackers/glx/xlib/libxlib.a \ $(TOP)/src/mesa/libglapi.a \ - $(TOP)/src/mesa/libmesa.a \ + $(TOP)/src/mesa/libmesagallium.a \ $(GALLIUM_AUXILIARIES) \ $(CELL_SPU_LIB) \ @@ -46,31 +55,33 @@ LIBS = \ .SUFFIXES : .cpp .c.o: - $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ + $(CC) -c $(INCLUDE_DIRS) $(DEFINES) $(CFLAGS) $< -o $@ .cpp.o: - $(CXX) -c $(INCLUDE_DIRS) $(CXXFLAGS) $< -o $@ + $(CXX) -c $(INCLUDE_DIRS) $(DEFINES) $(CXXFLAGS) $< -o $@ -default: $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME) +default: $(TOP)/$(LIB_DIR)/gallium $(TOP)/$(LIB_DIR)/gallium/$(GL_LIB_NAME) +$(TOP)/$(LIB_DIR)/gallium: + @ mkdir -p $(TOP)/$(LIB_DIR)/gallium # Make the libGL.so library -$(TOP)/$(LIB_DIR)/$(GL_LIB_NAME): $(XLIB_WINSYS_OBJECTS) $(LIBS) +$(TOP)/$(LIB_DIR)/gallium/$(GL_LIB_NAME): $(XLIB_WINSYS_OBJECTS) $(LIBS) Makefile $(TOP)/bin/mklib -o $(GL_LIB) \ -linker "$(CC)" \ -major $(GL_MAJOR) -minor $(GL_MINOR) -patch $(GL_TINY) \ - -install $(TOP)/$(LIB_DIR) \ + -install $(TOP)/$(LIB_DIR)/gallium \ $(MKLIB_OPTIONS) $(XLIB_WINSYS_OBJECTS) \ --start-group $(LIBS) --end-group $(GL_LIB_DEPS) -depend: $(ALL_SOURCES) +depend: $(XLIB_WINSYS_SOURCES) @ echo "running $(MKDEP)" @ rm -f depend # workaround oops on gutsy?!? @ touch depend - @ $(MKDEP) $(MKDEP_OPTIONS) $(DEFINES) $(INCLUDE_DIRS) $(ALL_SOURCES) \ + $(MKDEP) $(MKDEP_OPTIONS) $(DEFINES) $(INCLUDE_DIRS) $(XLIB_WINSYS_SOURCES) \ > /dev/null 2>/dev/null diff --git a/src/gallium/winsys/xlib/SConscript b/src/gallium/winsys/xlib/SConscript index 324fbef306..0fb4b50f63 100644 --- a/src/gallium/winsys/xlib/SConscript +++ b/src/gallium/winsys/xlib/SConscript @@ -5,8 +5,7 @@ Import('*') if env['platform'] == 'linux' \ and 'mesa' in env['statetrackers'] \ - and 'softpipe' in env['drivers'] \ - and 'i965simple' in env['drivers'] \ + and ('softpipe' or 'i915simple' or 'trace') in env['drivers'] \ and not env['dri']: env = env.Clone() @@ -14,32 +13,46 @@ if env['platform'] == 'linux' \ env.Append(CPPPATH = [ '#/src/mesa', '#/src/mesa/main', + '#src/gallium/state_trackers/glx/xlib', ]) + env.Append(CPPDEFINES = ['USE_XSHM']) + sources = [ - 'glxapi.c', - 'fakeglx.c', - 'xfonts.c', - 'xm_api.c', - 'xm_winsys.c', - 'xm_winsys_aub.c', - 'brw_aub.c', + 'xlib.c', ] + + drivers = [] - drivers = [ - softpipe, - i965simple, - ] - + if 'softpipe' in env['drivers']: + env.Append(CPPDEFINES = 'GALLIUM_SOFTPIPE') + sources += ['xlib_softpipe.c'] + drivers += [softpipe] + + if 'i965simple' in env['drivers']: + env.Append(CPPDEFINES = 'GALLIUM_I965SIMPLE') + sources += [ + 'xlib_brw_aub.c', + 'xlib_brw_context.c', + 'xlib_brw_screen.c', + ] + drivers += [i965simple] + + if 'cell' in env['drivers']: + env.Append(CPPDEFINES = 'GALLIUM_CELL') + sources += ['xlib_cell.c'] + drivers += [cell] + if 'trace' in env['drivers']: env.Append(CPPDEFINES = 'GALLIUM_TRACE') + sources += ['xlib_trace.c'] drivers += [trace] # TODO: write a wrapper function http://www.scons.org/wiki/WrapperFunctions libgl = env.SharedLibrary( target ='GL', source = sources, - LIBS = glapi + mesa + drivers + auxiliaries + env['LIBS'], + LIBS = st_xlib + glapi + mesa + drivers + auxiliaries + env['LIBS'], ) env.InstallSharedLibrary(libgl, version=(1, 5)) diff --git a/src/gallium/winsys/xlib/glxheader.h b/src/gallium/winsys/xlib/glxheader.h deleted file mode 100644 index a402191f13..0000000000 --- a/src/gallium/winsys/xlib/glxheader.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Mesa 3-D graphics library - * Version: 6.5.1 - * - * Copyright (C) 1999-2006 Brian Paul All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN - * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - - -#ifndef GLX_HEADER_H -#define GLX_HEADER_H - -#ifdef __VMS -#include <GL/vms_x_fix.h> -#endif - -#include "glheader.h" - -#ifdef XFree86Server - -# include "resource.h" -# include "windowstr.h" - -#else - -# include <X11/Xlib.h> -# include <X11/Xlibint.h> -# include <X11/Xutil.h> -# ifdef USE_XSHM /* was SHM */ -# include <sys/ipc.h> -# include <sys/shm.h> -# include <X11/extensions/XShm.h> -# endif -# include <GL/glx.h> -# include <sys/time.h> - -#endif - - - -/* this silences a compiler warning on several systems */ -struct timespec; -struct itimerspec; - - -#endif /*GLX_HEADER*/ diff --git a/src/gallium/winsys/xlib/realglx.c b/src/gallium/winsys/xlib/realglx.c deleted file mode 100644 index 30adb7465b..0000000000 --- a/src/gallium/winsys/xlib/realglx.c +++ /dev/null @@ -1,180 +0,0 @@ - -/* - * Mesa 3-D graphics library - * Version: 5.1 - * - * Copyright (C) 1999-2002 Brian Paul All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN - * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - - -#include <assert.h> -#include <GL/glx.h> -#include "realglx.h" -#include "glxapi.h" - - -struct _glxapi_table * -_real_GetGLXDispatchTable(void) -{ - static struct _glxapi_table glx; - - /* be sure our dispatch table size <= libGL's table */ - { - GLuint size = sizeof(struct _glxapi_table) / sizeof(void *); - (void) size; - assert(_glxapi_get_dispatch_table_size() >= size); - } - - /* initialize the whole table to no-ops */ - _glxapi_set_no_op_table(&glx); - - /* now initialize the table with the functions I implement */ - - /*** GLX_VERSION_1_0 ***/ - glx.ChooseVisual = _real_glXChooseVisual; - glx.CopyContext = _real_glXCopyContext; - glx.CreateContext = _real_glXCreateContext; - glx.CreateGLXPixmap = _real_glXCreateGLXPixmap; - glx.DestroyContext = _real_glXDestroyContext; - glx.DestroyGLXPixmap = _real_glXDestroyGLXPixmap; - glx.GetConfig = _real_glXGetConfig; - /*glx.GetCurrentContext = _real_glXGetCurrentContext;*/ - /*glx.GetCurrentDrawable = _real_glXGetCurrentDrawable;*/ - glx.IsDirect = _real_glXIsDirect; - glx.MakeCurrent = _real_glXMakeCurrent; - glx.QueryExtension = _real_glXQueryExtension; - glx.QueryVersion = _real_glXQueryVersion; - glx.SwapBuffers = _real_glXSwapBuffers; - glx.UseXFont = _real_glXUseXFont; - glx.WaitGL = _real_glXWaitGL; - glx.WaitX = _real_glXWaitX; - - /*** GLX_VERSION_1_1 ***/ - glx.GetClientString = _real_glXGetClientString; - glx.QueryExtensionsString = _real_glXQueryExtensionsString; - glx.QueryServerString = _real_glXQueryServerString; - - /*** GLX_VERSION_1_2 ***/ - /*glx.GetCurrentDisplay = _real_glXGetCurrentDisplay;*/ - - /*** GLX_VERSION_1_3 ***/ - glx.ChooseFBConfig = _real_glXChooseFBConfig; - glx.CreateNewContext = _real_glXCreateNewContext; - glx.CreatePbuffer = _real_glXCreatePbuffer; - glx.CreatePixmap = _real_glXCreatePixmap; - glx.CreateWindow = _real_glXCreateWindow; - glx.DestroyPbuffer = _real_glXDestroyPbuffer; - glx.DestroyPixmap = _real_glXDestroyPixmap; - glx.DestroyWindow = _real_glXDestroyWindow; - /*glx.GetCurrentReadDrawable = _real_glXGetCurrentReadDrawable;*/ - glx.GetFBConfigAttrib = _real_glXGetFBConfigAttrib; - glx.GetFBConfigs = _real_glXGetFBConfigs; - glx.GetSelectedEvent = _real_glXGetSelectedEvent; - glx.GetVisualFromFBConfig = _real_glXGetVisualFromFBConfig; - glx.MakeContextCurrent = _real_glXMakeContextCurrent; - glx.QueryContext = _real_glXQueryContext; - glx.QueryDrawable = _real_glXQueryDrawable; - glx.SelectEvent = _real_glXSelectEvent; - - /*** GLX_SGI_swap_control ***/ - glx.SwapIntervalSGI = _real_glXSwapIntervalSGI; - - /*** GLX_SGI_video_sync ***/ - glx.GetVideoSyncSGI = _real_glXGetVideoSyncSGI; - glx.WaitVideoSyncSGI = _real_glXWaitVideoSyncSGI; - - /*** GLX_SGI_make_current_read ***/ - glx.MakeCurrentReadSGI = _real_glXMakeCurrentReadSGI; - /*glx.GetCurrentReadDrawableSGI = _real_glXGetCurrentReadDrawableSGI;*/ - -#if defined(_VL_H) - /*** GLX_SGIX_video_source ***/ - glx.CreateGLXVideoSourceSGIX = _real_glXCreateGLXVideoSourceSGIX; - glx.DestroyGLXVideoSourceSGIX = _real_glXDestroyGLXVideoSourceSGIX; -#endif - - /*** GLX_EXT_import_context ***/ - glx.FreeContextEXT = _real_glXFreeContextEXT; - /*glx.GetContextIDEXT = _real_glXGetContextIDEXT;*/ - /*glx.GetCurrentDisplayEXT = _real_glXGetCurrentDisplayEXT;*/ - glx.ImportContextEXT = _real_glXImportContextEXT; - glx.QueryContextInfoEXT = _real_glXQueryContextInfoEXT; - - /*** GLX_SGIX_fbconfig ***/ - glx.GetFBConfigAttribSGIX = _real_glXGetFBConfigAttribSGIX; - glx.ChooseFBConfigSGIX = _real_glXChooseFBConfigSGIX; - glx.CreateGLXPixmapWithConfigSGIX = _real_glXCreateGLXPixmapWithConfigSGIX; - glx.CreateContextWithConfigSGIX = _real_glXCreateContextWithConfigSGIX; - glx.GetVisualFromFBConfigSGIX = _real_glXGetVisualFromFBConfigSGIX; - glx.GetFBConfigFromVisualSGIX = _real_glXGetFBConfigFromVisualSGIX; - - /*** GLX_SGIX_pbuffer ***/ - glx.CreateGLXPbufferSGIX = _real_glXCreateGLXPbufferSGIX; - glx.DestroyGLXPbufferSGIX = _real_glXDestroyGLXPbufferSGIX; - glx.QueryGLXPbufferSGIX = _real_glXQueryGLXPbufferSGIX; - glx.SelectEventSGIX = _real_glXSelectEventSGIX; - glx.GetSelectedEventSGIX = _real_glXGetSelectedEventSGIX; - - /*** GLX_SGI_cushion ***/ - glx.CushionSGI = _real_glXCushionSGI; - - /*** GLX_SGIX_video_resize ***/ - glx.BindChannelToWindowSGIX = _real_glXBindChannelToWindowSGIX; - glx.ChannelRectSGIX = _real_glXChannelRectSGIX; - glx.QueryChannelRectSGIX = _real_glXQueryChannelRectSGIX; - glx.QueryChannelDeltasSGIX = _real_glXQueryChannelDeltasSGIX; - glx.ChannelRectSyncSGIX = _real_glXChannelRectSyncSGIX; - -#if defined(_DM_BUFFER_H_) - /*** (GLX_SGIX_dmbuffer ***/ - glx.AssociateDMPbufferSGIX = NULL; -#endif - - /*** GLX_SGIX_swap_group ***/ - glx.JoinSwapGroupSGIX = _real_glXJoinSwapGroupSGIX; - - /*** GLX_SGIX_swap_barrier ***/ - glx.BindSwapBarrierSGIX = _real_glXBindSwapBarrierSGIX; - glx.QueryMaxSwapBarriersSGIX = _real_glXQueryMaxSwapBarriersSGIX; - - /*** GLX_SUN_get_transparent_index ***/ - glx.GetTransparentIndexSUN = _real_glXGetTransparentIndexSUN; - - /*** GLX_MESA_copy_sub_buffer ***/ - glx.CopySubBufferMESA = _real_glXCopySubBufferMESA; - - /*** GLX_MESA_release_buffers ***/ - glx.ReleaseBuffersMESA = _real_glXReleaseBuffersMESA; - - /*** GLX_MESA_pixmap_colormap ***/ - glx.CreateGLXPixmapMESA = _real_glXCreateGLXPixmapMESA; - - /*** GLX_MESA_set_3dfx_mode ***/ - glx.Set3DfxModeMESA = _real_glXSet3DfxModeMESA; - - /*** GLX_NV_vertex_array_range ***/ - glx.AllocateMemoryNV = _real_glXAllocateMemoryNV; - glx.FreeMemoryNV = _real_glXFreeMemoryNV; - - /*** GLX_MESA_agp_offset ***/ - glx.GetAGPOffsetMESA = _real_glXGetAGPOffsetMESA; - - return &glx; -} diff --git a/src/gallium/winsys/xlib/realglx.h b/src/gallium/winsys/xlib/realglx.h deleted file mode 100644 index 150129db68..0000000000 --- a/src/gallium/winsys/xlib/realglx.h +++ /dev/null @@ -1,326 +0,0 @@ - -/* - * Mesa 3-D graphics library - * Version: 3.5 - * - * Copyright (C) 1999-2001 Brian Paul All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN - * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - - -#ifndef REALGLX_H -#define REALGLX_H - - -extern struct _glxapi_table * -_real_GetGLXDispatchTable(void); - - -/* - * Basically just need these to prevent compiler warnings. - */ - - -extern XVisualInfo * -_real_glXChooseVisual( Display *dpy, int screen, int *list ); - -extern GLXContext -_real_glXCreateContext( Display *dpy, XVisualInfo *visinfo, - GLXContext share_list, Bool direct ); - -extern GLXPixmap -_real_glXCreateGLXPixmap( Display *dpy, XVisualInfo *visinfo, Pixmap pixmap ); - -extern GLXPixmap -_real_glXCreateGLXPixmapMESA( Display *dpy, XVisualInfo *visinfo, - Pixmap pixmap, Colormap cmap ); - -extern void -_real_glXDestroyGLXPixmap( Display *dpy, GLXPixmap pixmap ); - -extern void -_real_glXCopyContext( Display *dpy, GLXContext src, GLXContext dst, - unsigned long mask ); - -extern Bool -_real_glXMakeCurrent( Display *dpy, GLXDrawable drawable, GLXContext ctx ); - -extern Bool -_real_glXQueryExtension( Display *dpy, int *errorb, int *event ); - -extern void -_real_glXDestroyContext( Display *dpy, GLXContext ctx ); - -extern Bool -_real_glXIsDirect( Display *dpy, GLXContext ctx ); - -extern void -_real_glXSwapBuffers( Display *dpy, GLXDrawable drawable ); - -extern void -_real_glXUseXFont( Font font, int first, int count, int listbase ); - -extern Bool -_real_glXQueryVersion( Display *dpy, int *maj, int *min ); - -extern int -_real_glXGetConfig( Display *dpy, XVisualInfo *visinfo, - int attrib, int *value ); - -extern void -_real_glXWaitGL( void ); - - -extern void -_real_glXWaitX( void ); - -/* GLX 1.1 and later */ -extern const char * -_real_glXQueryExtensionsString( Display *dpy, int screen ); - -/* GLX 1.1 and later */ -extern const char * -_real_glXQueryServerString( Display *dpy, int screen, int name ); - -/* GLX 1.1 and later */ -extern const char * -_real_glXGetClientString( Display *dpy, int name ); - - -/* - * GLX 1.3 and later - */ - -extern GLXFBConfig * -_real_glXChooseFBConfig( Display *dpy, int screen, - const int *attribList, int *nitems ); - -extern int -_real_glXGetFBConfigAttrib( Display *dpy, GLXFBConfig config, - int attribute, int *value ); - -extern GLXFBConfig * -_real_glXGetFBConfigs( Display *dpy, int screen, int *nelements ); - -extern XVisualInfo * -_real_glXGetVisualFromFBConfig( Display *dpy, GLXFBConfig config ); - -extern GLXWindow -_real_glXCreateWindow( Display *dpy, GLXFBConfig config, Window win, - const int *attribList ); - -extern void -_real_glXDestroyWindow( Display *dpy, GLXWindow window ); - -extern GLXPixmap -_real_glXCreatePixmap( Display *dpy, GLXFBConfig config, Pixmap pixmap, - const int *attribList ); - -extern void -_real_glXDestroyPixmap( Display *dpy, GLXPixmap pixmap ); - -extern GLXPbuffer -_real_glXCreatePbuffer( Display *dpy, GLXFBConfig config, - const int *attribList ); - -extern void -_real_glXDestroyPbuffer( Display *dpy, GLXPbuffer pbuf ); - -extern void -_real_glXQueryDrawable( Display *dpy, GLXDrawable draw, int attribute, - unsigned int *value ); - -extern GLXContext -_real_glXCreateNewContext( Display *dpy, GLXFBConfig config, - int renderType, GLXContext shareList, Bool direct ); - - -extern Bool -_real_glXMakeContextCurrent( Display *dpy, GLXDrawable draw, - GLXDrawable read, GLXContext ctx ); - -extern int -_real_glXQueryContext( Display *dpy, GLXContext ctx, int attribute, int *value ); - -extern void -_real_glXSelectEvent( Display *dpy, GLXDrawable drawable, unsigned long mask ); - -extern void -_real_glXGetSelectedEvent( Display *dpy, GLXDrawable drawable, - unsigned long *mask ); - -#ifdef GLX_SGI_swap_control -extern int -_real_glXSwapIntervalSGI(int interval); -#endif - - -#ifdef GLX_SGI_video_sync -extern int -_real_glXGetVideoSyncSGI(unsigned int *count); - -extern int -_real_glXWaitVideoSyncSGI(int divisor, int remainder, unsigned int *count); -#endif - - -#ifdef GLX_SGI_make_current_read -extern Bool -_real_glXMakeCurrentReadSGI(Display *dpy, GLXDrawable draw, GLXDrawable read, GLXContext ctx); - -extern GLXDrawable -_real_glXGetCurrentReadDrawableSGI(void); -#endif - -#if defined(_VL_H) && defined(GLX_SGIX_video_source) -extern GLXVideoSourceSGIX -_real_glXCreateGLXVideoSourceSGIX(Display *dpy, int screen, VLServer server, VLPath path, int nodeClass, VLNode drainNode); - -extern void -_real_glXDestroyGLXVideoSourceSGIX(Display *dpy, GLXVideoSourceSGIX src); -#endif - -#ifdef GLX_EXT_import_context -extern void -_real_glXFreeContextEXT(Display *dpy, GLXContext context); - -extern GLXContextID -_real_glXGetContextIDEXT(const GLXContext context); - -extern Display * -_real_glXGetCurrentDisplayEXT(void); - -extern GLXContext -_real_glXImportContextEXT(Display *dpy, GLXContextID contextID); - -extern int -_real_glXQueryContextInfoEXT(Display *dpy, GLXContext context, int attribute, int *value); -#endif - -#ifdef GLX_SGIX_fbconfig -extern int -_real_glXGetFBConfigAttribSGIX(Display *dpy, GLXFBConfigSGIX config, int attribute, int *value); - -extern GLXFBConfigSGIX * -_real_glXChooseFBConfigSGIX(Display *dpy, int screen, int *attrib_list, int *nelements); - -extern GLXPixmap -_real_glXCreateGLXPixmapWithConfigSGIX(Display *dpy, GLXFBConfigSGIX config, Pixmap pixmap); - -extern GLXContext -_real_glXCreateContextWithConfigSGIX(Display *dpy, GLXFBConfigSGIX config, int render_type, GLXContext share_list, Bool direct); - -extern XVisualInfo * -_real_glXGetVisualFromFBConfigSGIX(Display *dpy, GLXFBConfigSGIX config); - -extern GLXFBConfigSGIX -_real_glXGetFBConfigFromVisualSGIX(Display *dpy, XVisualInfo *vis); -#endif - -#ifdef GLX_SGIX_pbuffer -extern GLXPbufferSGIX -_real_glXCreateGLXPbufferSGIX(Display *dpy, GLXFBConfigSGIX config, unsigned int width, unsigned int height, int *attrib_list); - -extern void -_real_glXDestroyGLXPbufferSGIX(Display *dpy, GLXPbufferSGIX pbuf); - -extern int -_real_glXQueryGLXPbufferSGIX(Display *dpy, GLXPbufferSGIX pbuf, int attribute, unsigned int *value); - -extern void -_real_glXSelectEventSGIX(Display *dpy, GLXDrawable drawable, unsigned long mask); - -extern void -_real_glXGetSelectedEventSGIX(Display *dpy, GLXDrawable drawable, unsigned long *mask); -#endif - -#ifdef GLX_SGI_cushion -extern void -_real_glXCushionSGI(Display *dpy, Window win, float cushion); -#endif - -#ifdef GLX_SGIX_video_resize -extern int -_real_glXBindChannelToWindowSGIX(Display *dpy, int screen, int channel , Window window); - -extern int -_real_glXChannelRectSGIX(Display *dpy, int screen, int channel, int x, int y, int w, int h); - -extern int -_real_glXQueryChannelRectSGIX(Display *dpy, int screen, int channel, int *x, int *y, int *w, int *h); - -extern int -_real_glXQueryChannelDeltasSGIX(Display *dpy, int screen, int channel, int *dx, int *dy, int *dw, int *dh); - -extern int -_real_glXChannelRectSyncSGIX(Display *dpy, int screen, int channel, GLenum synctype); -#endif - -#if defined(_DM_BUFFER_H_) && defined(GLX_SGIX_dmbuffer) -extern Bool -_real_glXAssociateDMPbufferSGIX(Display *dpy, GLXPbufferSGIX pbuffer, DMparams *params, DMbuffer dmbuffer); -#endif - -#ifdef GLX_SGIX_swap_group -extern void -_real_glXJoinSwapGroupSGIX(Display *dpy, GLXDrawable drawable, GLXDrawable member); -#endif - -#ifdef GLX_SGIX_swap_barrier -extern void -_real_glXBindSwapBarrierSGIX(Display *dpy, GLXDrawable drawable, int barrier); - -extern Bool -_real_glXQueryMaxSwapBarriersSGIX(Display *dpy, int screen, int *max); -#endif - -#ifdef GLX_SUN_get_transparent_index -extern Status -_real_glXGetTransparentIndexSUN(Display *dpy, Window overlay, Window underlay, long *pTransparent); -#endif - -#ifdef GLX_MESA_release_buffers -extern Bool -_real_glXReleaseBuffersMESA( Display *dpy, GLXDrawable d ); -#endif - -#ifdef GLX_MESA_set_3dfx_mode -extern Bool -_real_glXSet3DfxModeMESA( int mode ); -#endif - -#ifdef GLX_NV_vertex_array_range -extern void * -_real_glXAllocateMemoryNV(GLsizei size, GLfloat readfreq, GLfloat writefreq, GLfloat priority); -extern void -_real_glXFreeMemoryNV(GLvoid *pointer); -#endif - -#ifdef GLX_MESA_agp_offset -extern GLuint -_real_glXGetAGPOffsetMESA(const GLvoid *pointer); -#endif - -#ifdef GLX_MESA_copy_sub_buffer -extern void -_real_glXCopySubBufferMESA( Display *dpy, GLXDrawable drawable, - int x, int y, int width, int height ); -#endif - -#endif /* REALGLX_H */ diff --git a/src/gallium/winsys/xlib/xlib.c b/src/gallium/winsys/xlib/xlib.c new file mode 100644 index 0000000000..da72228215 --- /dev/null +++ b/src/gallium/winsys/xlib/xlib.c @@ -0,0 +1,113 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ + +/* + * Authors: + * Keith Whitwell + */ + +#include "xlib.h" +#include "xm_winsys.h" + +#include <stdlib.h> +#include <assert.h> + +/* Todo, replace all this with callback-structs provided by the + * individual implementations. + */ + +enum mode { + MODE_TRACE, + MODE_BRW, + MODE_CELL, + MODE_SOFTPIPE +}; + + +static enum mode get_mode() +{ + if (getenv("XMESA_TRACE")) + return MODE_TRACE; + + if (getenv("XMESA_BRW")) + return MODE_BRW; + +#ifdef GALLIUM_CELL + if (!getenv("GALLIUM_NOCELL")) + return MODE_CELL; +#endif + + return MODE_SOFTPIPE; +} + +static void _init( void ) __attribute__((constructor)); + +static void _init( void ) +{ + enum mode xlib_mode = get_mode(); + + switch (xlib_mode) { + case MODE_TRACE: +#if defined(GALLIUM_TRACE) && defined(GALLIUM_SOFTPIPE) + xmesa_set_driver( &xlib_trace_driver ); +#endif + break; + case MODE_BRW: +#if defined(GALLIUM_BRW) + xmesa_set_driver( &xlib_brw_driver ); +#endif + break; + case MODE_CELL: +#if defined(GALLIUM_CELL) + xmesa_set_driver( &xlib_cell_driver ); +#endif + break; + case MODE_SOFTPIPE: +#if defined(GALLIUM_SOFTPIPE) + xmesa_set_driver( &xlib_softpipe_driver ); +#endif + break; + default: + assert(0); + break; + } +} + + +/*********************************************************************** + * + * Butt-ugly hack to convince the linker not to throw away public GL + * symbols (they are all referenced from getprocaddress, I guess). + */ +extern void (*linker_foo(const unsigned char *procName))(); +extern void (*glXGetProcAddress(const unsigned char *procName))(); + +extern void (*linker_foo(const unsigned char *procName))() +{ + return glXGetProcAddress(procName); +} diff --git a/src/gallium/winsys/xlib/xlib.h b/src/gallium/winsys/xlib/xlib.h new file mode 100644 index 0000000000..d602ab0b13 --- /dev/null +++ b/src/gallium/winsys/xlib/xlib.h @@ -0,0 +1,14 @@ + +#ifndef XLIB_H +#define XLIB_H + +#include "pipe/p_compiler.h" +#include "xm_winsys.h" + +extern struct xm_driver xlib_trace_driver; +extern struct xm_driver xlib_softpipe_driver; +extern struct xm_driver xlib_cell_driver; +extern struct xm_driver xlib_brw_driver; + + +#endif diff --git a/src/gallium/winsys/xlib/xlib_brw.h b/src/gallium/winsys/xlib/xlib_brw.h new file mode 100644 index 0000000000..be2dd147db --- /dev/null +++ b/src/gallium/winsys/xlib/xlib_brw.h @@ -0,0 +1,30 @@ +#ifndef XLIB_BRW_H +#define XLIB_BRW_H + +struct pipe_winsys; +struct pipe_buffer; +struct pipe_surface; +struct xmesa_buffer; + +unsigned xlib_brw_get_buffer_offset( struct pipe_winsys *pws, + struct pipe_buffer *buf, + unsigned access_flags ); + +void xlib_brw_buffer_subdata_typed( struct pipe_winsys *pws, + struct pipe_buffer *buf, + unsigned long offset, + unsigned long size, + const void *data, + unsigned data_type ); + + + +void xlib_brw_commands_aub(struct pipe_winsys *winsys, + unsigned *cmds, + unsigned nr_dwords); + +struct pipe_context * +xlib_create_brw_context( struct pipe_screen *screen, + void *unused ); + +#endif diff --git a/src/gallium/winsys/xlib/brw_aub.c b/src/gallium/winsys/xlib/xlib_brw_aub.c index 9e96efaa53..b6bd849ef2 100644 --- a/src/gallium/winsys/xlib/brw_aub.c +++ b/src/gallium/winsys/xlib/xlib_brw_aub.c @@ -31,11 +31,12 @@ #include <stdio.h> #include <stdlib.h> -#include "brw_aub.h" +#include "xlib_brw_aub.h" #include "pipe/p_context.h" #include "pipe/p_state.h" -#include "pipe/p_debug.h" +#include "util/u_debug.h" #include "util/u_memory.h" +#include "softpipe/sp_texture.h" struct brw_aubfile { @@ -322,10 +323,10 @@ void brw_aub_dump_bmp( struct brw_aubfile *aubfile, struct aub_dump_bmp db; unsigned format; - assert(surface->block.width == 1); - assert(surface->block.height == 1); + assert(surface->texture->block.width == 1); + assert(surface->texture->block.height == 1); - if (surface->block.size == 4) + if (surface->texture->block.size == 4) format = 0x7; else format = 0x3; @@ -334,8 +335,9 @@ void brw_aub_dump_bmp( struct brw_aubfile *aubfile, db.xmin = 0; db.ymin = 0; db.format = format; - db.bpp = surface->block.size * 8; - db.pitch = surface->stride/surface->block.size; + db.bpp = surface->texture->block.size * 8; + db.pitch = softpipe_texture(surface->texture)->stride[surface->level] / + surface->texture->block.size; db.xsize = surface->width; db.ysize = surface->height; db.addr = gtt_offset; diff --git a/src/gallium/winsys/xlib/brw_aub.h b/src/gallium/winsys/xlib/xlib_brw_aub.h index f5c60c7be2..f5c60c7be2 100644 --- a/src/gallium/winsys/xlib/brw_aub.h +++ b/src/gallium/winsys/xlib/xlib_brw_aub.h diff --git a/src/gallium/winsys/xlib/xlib_brw_context.c b/src/gallium/winsys/xlib/xlib_brw_context.c new file mode 100644 index 0000000000..09599507f4 --- /dev/null +++ b/src/gallium/winsys/xlib/xlib_brw_context.c @@ -0,0 +1,209 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ + +/* + * Authors: + * Keith Whitwell + * Brian Paul + */ + + +//#include "glxheader.h" +//#include "xmesaP.h" + +#include "pipe/internal/p_winsys_screen.h" +#include "pipe/p_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "i965simple/brw_winsys.h" +#include "xlib_brw_aub.h" +#include "xlib_brw.h" + + + + +#define XBCWS_BATCHBUFFER_SIZE 1024 + + +/* The backend to the brw driver (ie struct brw_winsys) is actually a + * per-context entity. + */ +struct xlib_brw_context_winsys { + struct brw_winsys brw_context_winsys; /**< batch buffer funcs */ + struct aub_context *aub; + + struct pipe_winsys *pipe_winsys; + + unsigned batch_data[XBCWS_BATCHBUFFER_SIZE]; + unsigned batch_nr; + unsigned batch_size; + unsigned batch_alloc; +}; + + +/* Turn a brw_winsys into an xlib_brw_context_winsys: + */ +static inline struct xlib_brw_context_winsys * +xlib_brw_context_winsys( struct brw_winsys *sws ) +{ + return (struct xlib_brw_context_winsys *)sws; +} + + +/* Simple batchbuffer interface: + */ + +static unsigned *xbcws_batch_start( struct brw_winsys *sws, + unsigned dwords, + unsigned relocs ) +{ + struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws); + + if (xbcws->batch_size < xbcws->batch_nr + dwords) + return NULL; + + xbcws->batch_alloc = xbcws->batch_nr + dwords; + return (void *)1; /* not a valid pointer! */ +} + +static void xbcws_batch_dword( struct brw_winsys *sws, + unsigned dword ) +{ + struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws); + + assert(xbcws->batch_nr < xbcws->batch_alloc); + xbcws->batch_data[xbcws->batch_nr++] = dword; +} + +static void xbcws_batch_reloc( struct brw_winsys *sws, + struct pipe_buffer *buf, + unsigned access_flags, + unsigned delta ) +{ + struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws); + + assert(xbcws->batch_nr < xbcws->batch_alloc); + xbcws->batch_data[xbcws->batch_nr++] = + ( xlib_brw_get_buffer_offset( NULL, buf, access_flags ) + + delta ); +} + +static void xbcws_batch_end( struct brw_winsys *sws ) +{ + struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws); + + assert(xbcws->batch_nr <= xbcws->batch_alloc); + xbcws->batch_alloc = 0; +} + +static void xbcws_batch_flush( struct brw_winsys *sws, + struct pipe_fence_handle **fence ) +{ + struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws); + assert(xbcws->batch_nr <= xbcws->batch_size); + + if (xbcws->batch_nr) { + xlib_brw_commands_aub( xbcws->pipe_winsys, + xbcws->batch_data, + xbcws->batch_nr ); + } + + xbcws->batch_nr = 0; +} + + + +/* Really a per-device function, just pass through: + */ +static unsigned xbcws_get_buffer_offset( struct brw_winsys *sws, + struct pipe_buffer *buf, + unsigned access_flags ) +{ + struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws); + + return xlib_brw_get_buffer_offset( xbcws->pipe_winsys, + buf, + access_flags ); +} + + +/* Really a per-device function, just pass through: + */ +static void xbcws_buffer_subdata_typed( struct brw_winsys *sws, + struct pipe_buffer *buf, + unsigned long offset, + unsigned long size, + const void *data, + unsigned data_type ) +{ + struct xlib_brw_context_winsys *xbcws = xlib_brw_context_winsys(sws); + + xlib_brw_buffer_subdata_typed( xbcws->pipe_winsys, + buf, + offset, + size, + data, + data_type ); +} + + +/** + * Create i965 hardware rendering context, but plugged into a + * dump-to-aubfile backend. + */ +struct pipe_context * +xlib_create_brw_context( struct pipe_screen *screen, + void *unused ) +{ + struct xlib_brw_context_winsys *xbcws = CALLOC_STRUCT( xlib_brw_context_winsys ); + + /* Fill in this struct with callbacks that i965simple will need to + * communicate with the window system, buffer manager, etc. + */ + xbcws->brw_context_winsys.batch_start = xbcws_batch_start; + xbcws->brw_context_winsys.batch_dword = xbcws_batch_dword; + xbcws->brw_context_winsys.batch_reloc = xbcws_batch_reloc; + xbcws->brw_context_winsys.batch_end = xbcws_batch_end; + xbcws->brw_context_winsys.batch_flush = xbcws_batch_flush; + xbcws->brw_context_winsys.buffer_subdata_typed = xbcws_buffer_subdata_typed; + xbcws->brw_context_winsys.get_buffer_offset = xbcws_get_buffer_offset; + + xbcws->pipe_winsys = screen->winsys; /* redundant */ + + xbcws->batch_size = XBCWS_BATCHBUFFER_SIZE; + + /* Create the i965simple context: + */ +#ifdef GALLIUM_CELL + return NULL; +#else + return brw_create( screen, + &xbcws->brw_context_winsys, + 0 ); +#endif +} diff --git a/src/gallium/winsys/xlib/xm_winsys_aub.c b/src/gallium/winsys/xlib/xlib_brw_screen.c index b7c10b6bca..8e1bfab2f5 100644 --- a/src/gallium/winsys/xlib/xm_winsys_aub.c +++ b/src/gallium/winsys/xlib/xlib_brw_screen.c @@ -33,19 +33,28 @@ */ -#include "glxheader.h" -#include "xmesaP.h" +//#include "state_trackers/xlib/glxheader.h" +//#include "state_trackers/xlib/xmesaP.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" #include "i965simple/brw_winsys.h" #include "i965simple/brw_screen.h" -#include "brw_aub.h" -#include "xm_winsys_aub.h" +#include "i965simple/brw_context.h" +#include "xlib_brw_aub.h" +#include "xlib_brw.h" +#include "xlib.h" + +static struct pipe_buffer * +buffer_from_surface(struct pipe_surface *surface) +{ + struct brw_texture *texture = (struct brw_texture *)surface; + return texture->buffer; +} struct aub_buffer { char *data; @@ -142,29 +151,8 @@ aub_buffer_destroy(struct pipe_winsys *winsys, } -void xmesa_buffer_subdata_aub(struct pipe_winsys *winsys, - struct pipe_buffer *buf, - unsigned long offset, - unsigned long size, - const void *data, - unsigned aub_type, - unsigned aub_sub_type) -{ - struct aub_pipe_winsys *iws = aub_pipe_winsys(winsys); - struct aub_buffer *sbo = aub_bo(buf); - - assert(sbo->size > offset + size); - memcpy(sbo->data + offset, data, size); - - brw_aub_gtt_data( iws->aubfile, - sbo->offset + offset, - sbo->data + offset, - size, - aub_type, - aub_sub_type ); -} -void xmesa_commands_aub(struct pipe_winsys *winsys, +void xlib_brw_commands_aub(struct pipe_winsys *winsys, unsigned *cmds, unsigned nr_dwords) { @@ -182,16 +170,10 @@ void xmesa_commands_aub(struct pipe_winsys *winsys, } +/* XXX: fix me: + */ static struct aub_pipe_winsys *global_winsys = NULL; -void xmesa_display_aub( /* struct pipe_winsys *winsys, */ - struct pipe_surface *surface ) -{ -// struct aub_pipe_winsys *iws = aub_pipe_winsys(winsys); - brw_aub_dump_bmp( global_winsys->aubfile, - surface, - aub_bo(surface->buffer)->offset ); -} @@ -245,21 +227,13 @@ aub_user_buffer_create(struct pipe_winsys *winsys, void *ptr, unsigned bytes) */ static void aub_flush_frontbuffer( struct pipe_winsys *winsys, - struct pipe_surface *surf, - void *context_private) -{ - xmesa_display_aub( surf ); -} - -static struct pipe_surface * -aub_i915_surface_alloc(struct pipe_winsys *winsys) + struct pipe_surface *surface, + void *context_private) { - struct pipe_surface *surf = CALLOC_STRUCT(pipe_surface); - if (surf) { - surf->refcount = 1; - surf->winsys = winsys; - } - return surf; +// struct aub_pipe_winsys *iws = aub_pipe_winsys(winsys); + brw_aub_dump_bmp( global_winsys->aubfile, + surface, + aub_bo(buffer_from_surface(surface))->offset ); } @@ -272,58 +246,48 @@ round_up(unsigned n, unsigned multiple) return (n + multiple - 1) & ~(multiple - 1); } -static int -aub_i915_surface_alloc_storage(struct pipe_winsys *winsys, - struct pipe_surface *surf, +static struct pipe_buffer * +aub_i915_surface_buffer_create(struct pipe_winsys *winsys, unsigned width, unsigned height, enum pipe_format format, - unsigned flags, - unsigned tex_usage) + unsigned usage, + unsigned *stride) { const unsigned alignment = 64; + struct pipe_format_block block; + unsigned nblocksx, nblocksy; - surf->width = width; - surf->height = height; - surf->format = format; - pf_get_block(format, &surf->block); - surf->nblocksx = pf_get_nblocksx(&surf->block, width); - surf->nblocksy = pf_get_nblocksy(&surf->block, height); - surf->stride = round_up(surf->nblocksx * surf->block.size, alignment); - surf->usage = flags; - - assert(!surf->buffer); - surf->buffer = winsys->buffer_create(winsys, alignment, - PIPE_BUFFER_USAGE_PIXEL, - surf->stride * surf->nblocksy); - if(!surf->buffer) - return -1; - - return 0; -} + pf_get_block(format, &block); + nblocksx = pf_get_nblocksx(&block, width); + nblocksy = pf_get_nblocksy(&block, height); + *stride = round_up(nblocksx * block.size, alignment); -static void -aub_i915_surface_release(struct pipe_winsys *winsys, struct pipe_surface **s) -{ - struct pipe_surface *surf = *s; - surf->refcount--; - if (surf->refcount == 0) { - if (surf->buffer) - winsys_buffer_reference(winsys, &surf->buffer, NULL); - free(surf); - } - *s = NULL; + return winsys->buffer_create(winsys, alignment, + usage, + *stride * nblocksy); } - static const char * aub_get_name( struct pipe_winsys *winsys ) { return "Aub/xlib"; } -struct pipe_winsys * -xmesa_create_pipe_winsys_aub( void ) +static void +xlib_brw_destroy_pipe_winsys_aub( struct pipe_winsys *winsys ) + +{ + struct aub_pipe_winsys *iws = aub_pipe_winsys(winsys); + brw_aub_destroy(iws->aubfile); + free(iws->pool); + free(iws); +} + + + +static struct pipe_winsys * +xlib_create_brw_winsys( void ) { struct aub_pipe_winsys *iws = CALLOC_STRUCT( aub_pipe_winsys ); @@ -341,10 +305,9 @@ xmesa_create_pipe_winsys_aub( void ) iws->winsys.buffer_destroy = aub_buffer_destroy; iws->winsys.flush_frontbuffer = aub_flush_frontbuffer; iws->winsys.get_name = aub_get_name; + iws->winsys.destroy = xlib_brw_destroy_pipe_winsys_aub; - iws->winsys.surface_alloc = aub_i915_surface_alloc; - iws->winsys.surface_alloc_storage = aub_i915_surface_alloc_storage; - iws->winsys.surface_release = aub_i915_surface_release; + iws->winsys.surface_buffer_create = aub_i915_surface_buffer_create; iws->aubfile = brw_aubfile_create(); iws->size = AUB_BUF_SIZE; @@ -359,122 +322,49 @@ xmesa_create_pipe_winsys_aub( void ) } -void -xmesa_destroy_pipe_winsys_aub( struct pipe_winsys *winsys ) - +static struct pipe_screen * +xlib_create_brw_screen( void ) { - struct aub_pipe_winsys *iws = aub_pipe_winsys(winsys); - brw_aub_destroy(iws->aubfile); - free(iws->pool); - free(iws); -} - - - - - +#ifndef GALLIUM_CELL + struct pipe_winsys *winsys; + struct pipe_screen *screen; + winsys = xlib_create_brw_winsys(); + if (winsys == NULL) + return NULL; -#define IWS_BATCHBUFFER_SIZE 1024 - -struct aub_brw_winsys { - struct brw_winsys winsys; /**< batch buffer funcs */ - struct aub_context *aub; - - struct pipe_winsys *pipe_winsys; + screen = brw_create_screen(winsys, 0/* XXX pci_id */); + if (screen == NULL) + goto fail; - unsigned batch_data[IWS_BATCHBUFFER_SIZE]; - unsigned batch_nr; - unsigned batch_size; - unsigned batch_alloc; -}; + return screen; +fail: + if (winsys) + winsys->destroy( winsys ); -/* Turn a i965simple winsys into an aub/i965simple winsys: - */ -static inline struct aub_brw_winsys * -aub_brw_winsys( struct brw_winsys *sws ) -{ - return (struct aub_brw_winsys *)sws; +#endif + return NULL; } -/* Simple batchbuffer interface: +/* These per-screen functions are acually made available to the driver + * through the brw_winsys (per-context) entity. */ - -static unsigned *aub_i965_batch_start( struct brw_winsys *sws, - unsigned dwords, - unsigned relocs ) -{ - struct aub_brw_winsys *iws = aub_brw_winsys(sws); - - if (iws->batch_size < iws->batch_nr + dwords) - return NULL; - - iws->batch_alloc = iws->batch_nr + dwords; - return (void *)1; /* not a valid pointer! */ -} - -static void aub_i965_batch_dword( struct brw_winsys *sws, - unsigned dword ) -{ - struct aub_brw_winsys *iws = aub_brw_winsys(sws); - - assert(iws->batch_nr < iws->batch_alloc); - iws->batch_data[iws->batch_nr++] = dword; -} - -static void aub_i965_batch_reloc( struct brw_winsys *sws, - struct pipe_buffer *buf, - unsigned access_flags, - unsigned delta ) -{ - struct aub_brw_winsys *iws = aub_brw_winsys(sws); - - assert(iws->batch_nr < iws->batch_alloc); - iws->batch_data[iws->batch_nr++] = aub_bo(buf)->offset + delta; -} - -static unsigned aub_i965_get_buffer_offset( struct brw_winsys *sws, - struct pipe_buffer *buf, - unsigned access_flags ) +unsigned xlib_brw_get_buffer_offset( struct pipe_winsys *pws, + struct pipe_buffer *buf, + unsigned access_flags ) { return aub_bo(buf)->offset; } -static void aub_i965_batch_end( struct brw_winsys *sws ) +void xlib_brw_buffer_subdata_typed( struct pipe_winsys *pws, + struct pipe_buffer *buf, + unsigned long offset, + unsigned long size, + const void *data, + unsigned data_type ) { - struct aub_brw_winsys *iws = aub_brw_winsys(sws); - - assert(iws->batch_nr <= iws->batch_alloc); - iws->batch_alloc = 0; -} - -static void aub_i965_batch_flush( struct brw_winsys *sws, - struct pipe_fence_handle **fence ) -{ - struct aub_brw_winsys *iws = aub_brw_winsys(sws); - assert(iws->batch_nr <= iws->batch_size); - - if (iws->batch_nr) { - xmesa_commands_aub( iws->pipe_winsys, - iws->batch_data, - iws->batch_nr ); - } - - iws->batch_nr = 0; -} - - - -static void aub_i965_buffer_subdata_typed(struct brw_winsys *winsys, - struct pipe_buffer *buf, - unsigned long offset, - unsigned long size, - const void *data, - unsigned data_type) -{ - struct aub_brw_winsys *iws = aub_brw_winsys(winsys); unsigned aub_type = DW_GENERAL_STATE; unsigned aub_sub_type; @@ -545,42 +435,36 @@ static void aub_i965_buffer_subdata_typed(struct brw_winsys *winsys, break; } - xmesa_buffer_subdata_aub( iws->pipe_winsys, - buf, - offset, - size, - data, - aub_type, - aub_sub_type ); -} - -/** - * Create i965 hardware rendering context. - */ -struct pipe_context * -xmesa_create_i965simple( struct pipe_winsys *winsys ) -{ - struct aub_brw_winsys *iws = CALLOC_STRUCT( aub_brw_winsys ); - struct pipe_screen *screen = brw_create_screen(winsys, 0/* XXX pci_id */); - - /* Fill in this struct with callbacks that i965simple will need to - * communicate with the window system, buffer manager, etc. - */ - iws->winsys.batch_start = aub_i965_batch_start; - iws->winsys.batch_dword = aub_i965_batch_dword; - iws->winsys.batch_reloc = aub_i965_batch_reloc; - iws->winsys.batch_end = aub_i965_batch_end; - iws->winsys.batch_flush = aub_i965_batch_flush; - iws->winsys.buffer_subdata_typed = aub_i965_buffer_subdata_typed; - iws->winsys.get_buffer_offset = aub_i965_get_buffer_offset; + { + struct aub_pipe_winsys *iws = aub_pipe_winsys(pws); + struct aub_buffer *sbo = aub_bo(buf); - iws->pipe_winsys = winsys; + assert(sbo->size > offset + size); + memcpy(sbo->data + offset, data, size); - iws->batch_size = IWS_BATCHBUFFER_SIZE; + brw_aub_gtt_data( iws->aubfile, + sbo->offset + offset, + sbo->data + offset, + size, + aub_type, + aub_sub_type ); + } +} + - /* Create the i965simple context: - */ - return brw_create( screen, - &iws->winsys, - 0 ); +static void +xlib_brw_display_surface(struct xmesa_buffer *b, + struct pipe_surface *surf) +{ + brw_aub_dump_bmp( global_winsys->aubfile, + surf, + aub_bo(buffer_from_surface(surf))->offset ); } + + +struct xm_driver xlib_brw_driver = +{ + .create_pipe_screen = xlib_create_brw_screen, + .create_pipe_context = xlib_create_brw_context, + .display_surface = xlib_brw_display_surface, +}; diff --git a/src/gallium/winsys/xlib/xlib_cell.c b/src/gallium/winsys/xlib/xlib_cell.c new file mode 100644 index 0000000000..c87564f4dc --- /dev/null +++ b/src/gallium/winsys/xlib/xlib_cell.c @@ -0,0 +1,437 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ + +/* + * Authors: + * Keith Whitwell + * Brian Paul + */ + +#include "xlib.h" + +#ifdef GALLIUM_CELL + +#include "xm_api.h" + +#undef ASSERT +#undef Elements + +#include "pipe/internal/p_winsys_screen.h" +#include "pipe/p_format.h" +#include "pipe/p_context.h" +#include "pipe/p_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "cell/ppu/cell_context.h" +#include "cell/ppu/cell_screen.h" +#include "cell/ppu/cell_winsys.h" +#include "cell/ppu/cell_texture.h" + + +/** + * Subclass of pipe_buffer for Xlib winsys. + * Low-level OS/window system memory buffer + */ +struct xm_buffer +{ + struct pipe_buffer base; + boolean userBuffer; /** Is this a user-space buffer? */ + void *data; + void *mapped; + + XImage *tempImage; + int shm; +}; + + +/** + * Subclass of pipe_winsys for Xlib winsys + */ +struct xmesa_pipe_winsys +{ + struct pipe_winsys base; +}; + + + +/** Cast wrapper */ +static INLINE struct xm_buffer * +xm_buffer( struct pipe_buffer *buf ) +{ + return (struct xm_buffer *)buf; +} + + +/* Most callbacks map direcly onto dri_bufmgr operations: + */ +static void * +xm_buffer_map(struct pipe_winsys *pws, struct pipe_buffer *buf, + unsigned flags) +{ + struct xm_buffer *xm_buf = xm_buffer(buf); + xm_buf->mapped = xm_buf->data; + return xm_buf->mapped; +} + +static void +xm_buffer_unmap(struct pipe_winsys *pws, struct pipe_buffer *buf) +{ + struct xm_buffer *xm_buf = xm_buffer(buf); + xm_buf->mapped = NULL; +} + +static void +xm_buffer_destroy(struct pipe_winsys *pws, + struct pipe_buffer *buf) +{ + struct xm_buffer *oldBuf = xm_buffer(buf); + + if (oldBuf->data) { + { + if (!oldBuf->userBuffer) { + align_free(oldBuf->data); + } + } + + oldBuf->data = NULL; + } + + free(oldBuf); +} + + +/** + * For Cell. Basically, rearrange the pixels/quads from this layout: + * +--+--+--+--+ + * |p0|p1|p2|p3|.... + * +--+--+--+--+ + * + * to this layout: + * +--+--+ + * |p0|p1|.... + * +--+--+ + * |p2|p3| + * +--+--+ + */ +static void +twiddle_tile(const uint *tileIn, uint *tileOut) +{ + int y, x; + + for (y = 0; y < TILE_SIZE; y+=2) { + for (x = 0; x < TILE_SIZE; x+=2) { + int k = 4 * (y/2 * TILE_SIZE/2 + x/2); + tileOut[y * TILE_SIZE + (x + 0)] = tileIn[k]; + tileOut[y * TILE_SIZE + (x + 1)] = tileIn[k+1]; + tileOut[(y + 1) * TILE_SIZE + (x + 0)] = tileIn[k+2]; + tileOut[(y + 1) * TILE_SIZE + (x + 1)] = tileIn[k+3]; + } + } +} + + + +/** + * Display a surface that's in a tiled configuration. That is, all the + * pixels for a TILE_SIZExTILE_SIZE block are contiguous in memory. + */ +static void +xlib_cell_display_surface(struct xmesa_buffer *b, struct pipe_surface *surf) +{ + XImage *ximage; + struct xm_buffer *xm_buf = xm_buffer( + cell_texture(surf->texture)->buffer); + const uint tilesPerRow = (surf->width + TILE_SIZE - 1) / TILE_SIZE; + uint x, y; + + ximage = b->tempImage; + + /* check that the XImage has been previously initialized */ + assert(ximage->format); + assert(ximage->bitmap_unit); + + /* update XImage's fields */ + ximage->width = TILE_SIZE; + ximage->height = TILE_SIZE; + ximage->bytes_per_line = TILE_SIZE * 4; + + for (y = 0; y < surf->height; y += TILE_SIZE) { + for (x = 0; x < surf->width; x += TILE_SIZE) { + uint tmpTile[TILE_SIZE * TILE_SIZE]; + int tx = x / TILE_SIZE; + int ty = y / TILE_SIZE; + int offset = ty * tilesPerRow + tx; + int w = TILE_SIZE; + int h = TILE_SIZE; + + if (y + h > surf->height) + h = surf->height - y; + if (x + w > surf->width) + w = surf->width - x; + + /* offset in pixels */ + offset *= TILE_SIZE * TILE_SIZE; + + /* twiddle from ximage buffer to temp tile */ + twiddle_tile((uint *) xm_buf->data + offset, tmpTile); + /* display temp tile data */ + ximage->data = (char *) tmpTile; + XPutImage(b->xm_visual->display, b->drawable, b->gc, + ximage, 0, 0, x, y, w, h); + } + } +} + + + + + +static void +xm_flush_frontbuffer(struct pipe_winsys *pws, + struct pipe_surface *surf, + void *context_private) +{ + /* + * The front color buffer is actually just another XImage buffer. + * This function copies that XImage to the actual X Window. + */ + XMesaContext xmctx = (XMesaContext) context_private; + xlib_cell_display_surface(xmctx->xm_buffer, surf); +} + + + +static const char * +xm_get_name(struct pipe_winsys *pws) +{ + return "Xlib/Cell"; +} + + +static struct pipe_buffer * +xm_buffer_create(struct pipe_winsys *pws, + unsigned alignment, + unsigned usage, + unsigned size) +{ + struct xm_buffer *buffer = CALLOC_STRUCT(xm_buffer); + + buffer->base.refcount = 1; + buffer->base.alignment = alignment; + buffer->base.usage = usage; + buffer->base.size = size; + + + if (buffer->data == NULL) { + buffer->shm = 0; + + /* align to 16-byte multiple for Cell */ + buffer->data = align_malloc(size, max(alignment, 16)); + } + + return &buffer->base; +} + + +/** + * Create buffer which wraps user-space data. + */ +static struct pipe_buffer * +xm_user_buffer_create(struct pipe_winsys *pws, void *ptr, unsigned bytes) +{ + struct xm_buffer *buffer = CALLOC_STRUCT(xm_buffer); + buffer->base.refcount = 1; + buffer->base.size = bytes; + buffer->userBuffer = TRUE; + buffer->data = ptr; + buffer->shm = 0; + + return &buffer->base; +} + + + +/** + * Round n up to next multiple. + */ +static INLINE unsigned +round_up(unsigned n, unsigned multiple) +{ + return (n + multiple - 1) & ~(multiple - 1); +} + +static struct pipe_buffer * +xm_surface_buffer_create(struct pipe_winsys *winsys, + unsigned width, unsigned height, + enum pipe_format format, + unsigned usage, + unsigned *stride) +{ + const unsigned alignment = 64; + struct pipe_format_block block; + unsigned nblocksx, nblocksy; + + pf_get_block(format, &block); + nblocksx = pf_get_nblocksx(&block, width); + nblocksy = pf_get_nblocksy(&block, height); + *stride = round_up(nblocksx * block.size, alignment); + + return winsys->buffer_create(winsys, alignment, + usage, + /* XXX a bit of a hack */ + *stride * round_up(nblocksy, TILE_SIZE)); +} + + +/* + * Fence functions - basically nothing to do, as we don't create any actual + * fence objects. + */ + +static void +xm_fence_reference(struct pipe_winsys *sws, struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ +} + + +static int +xm_fence_signalled(struct pipe_winsys *sws, struct pipe_fence_handle *fence, + unsigned flag) +{ + return 0; +} + + +static int +xm_fence_finish(struct pipe_winsys *sws, struct pipe_fence_handle *fence, + unsigned flag) +{ + return 0; +} + + + +static struct pipe_winsys * +xlib_create_cell_winsys( void ) +{ + static struct xmesa_pipe_winsys *ws = NULL; + + if (!ws) { + ws = CALLOC_STRUCT(xmesa_pipe_winsys); + + /* Fill in this struct with callbacks that pipe will need to + * communicate with the window system, buffer manager, etc. + */ + ws->base.buffer_create = xm_buffer_create; + ws->base.user_buffer_create = xm_user_buffer_create; + ws->base.buffer_map = xm_buffer_map; + ws->base.buffer_unmap = xm_buffer_unmap; + ws->base.buffer_destroy = xm_buffer_destroy; + + ws->base.surface_buffer_create = xm_surface_buffer_create; + + ws->base.fence_reference = xm_fence_reference; + ws->base.fence_signalled = xm_fence_signalled; + ws->base.fence_finish = xm_fence_finish; + + ws->base.flush_frontbuffer = xm_flush_frontbuffer; + ws->base.get_name = xm_get_name; + } + + return &ws->base; +} + + +static struct pipe_screen * +xlib_create_cell_screen( struct pipe_winsys *pws ) +{ + struct pipe_winsys *winsys; + struct pipe_screen *screen; + + winsys = xlib_create_cell_winsys(); + if (winsys == NULL) + return NULL; + + screen = cell_create_screen(winsys); + if (screen == NULL) + goto fail; + + return screen; + +fail: + if (winsys) + winsys->destroy( winsys ); + + return NULL; +} + + +static struct pipe_context * +xlib_create_cell_context( struct pipe_screen *screen, + void *priv ) +{ + struct pipe_context *pipe; + + + /* This takes a cell_winsys pointer, but probably that should be + * created and stored at screen creation, not context creation. + * + * The actual cell_winsys value isn't used for anything, so just + * passing NULL for now. + */ + pipe = cell_create_context( screen, NULL); + if (pipe == NULL) + goto fail; + + pipe->priv = priv; + + return pipe; + +fail: + return NULL; +} + +struct xm_driver xlib_cell_driver = +{ + .create_pipe_screen = xlib_create_cell_screen, + .create_pipe_context = xlib_create_cell_context, + .display_surface = xlib_cell_display_surface, +}; + +#else + +struct xm_driver xlib_cell_driver = +{ + .create_pipe_screen = NULL, + .create_pipe_context = NULL, + .display_surface = NULL, +}; + +#endif diff --git a/src/gallium/winsys/xlib/xm_winsys.c b/src/gallium/winsys/xlib/xlib_softpipe.c index 3334af175b..71f12b2b47 100644 --- a/src/gallium/winsys/xlib/xm_winsys.c +++ b/src/gallium/winsys/xlib/xlib_softpipe.c @@ -33,35 +33,21 @@ */ -#include "glxheader.h" -#include "xmesaP.h" +#include "xm_api.h" #undef ASSERT #undef Elements -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_format.h" #include "pipe/p_context.h" #include "pipe/p_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" #include "softpipe/sp_winsys.h" +#include "softpipe/sp_texture.h" -#ifdef GALLIUM_CELL -#include "cell/ppu/cell_context.h" -#include "cell/ppu/cell_screen.h" -#include "cell/ppu/cell_winsys.h" -#else -#define TILE_SIZE 32 /* avoid compilation errors */ -#endif - -#ifdef GALLIUM_TRACE -#include "trace/tr_screen.h" -#include "trace/tr_context.h" -#endif - -#include "xm_winsys_aub.h" - +#include "xlib.h" /** * Subclass of pipe_buffer for Xlib winsys. @@ -73,10 +59,10 @@ struct xm_buffer boolean userBuffer; /** Is this a user-space buffer? */ void *data; void *mapped; - + XImage *tempImage; +#ifdef USE_XSHM int shm; -#if defined(USE_XSHM) && !defined(XFree86Server) XShmSegmentInfo shminfo; #endif }; @@ -88,8 +74,10 @@ struct xm_buffer struct xmesa_pipe_winsys { struct pipe_winsys base; - struct xmesa_visual *xm_visual; +/* struct xmesa_visual *xm_visual; */ +#ifdef USE_XSHM int shm; +#endif }; @@ -105,9 +93,13 @@ xm_buffer( struct pipe_buffer *buf ) /** * X Shared Memory Image extension code */ -#if defined(USE_XSHM) && !defined(XFree86Server) - +#ifdef USE_XSHM #define XSHM_ENABLED(b) ((b)->shm) +#else +#define XSHM_ENABLED(b) 0 +#endif + +#ifdef USE_XSHM static volatile int mesaXErrorFlag = 0; @@ -115,7 +107,7 @@ static volatile int mesaXErrorFlag = 0; * Catches potential Xlib errors. */ static int -mesaHandleXError(XMesaDisplay *dpy, XErrorEvent *event) +mesaHandleXError(Display *dpy, XErrorEvent *event) { (void) dpy; (void) event; @@ -157,10 +149,7 @@ alloc_shm_ximage(struct xm_buffer *b, struct xmesa_buffer *xmb, * errors at different points if the extension won't work. Therefore * we have to be very careful... */ -#if 0 - GC gc; -#endif - int (*old_handler)(XMesaDisplay *, XErrorEvent *); + int (*old_handler)(Display *, XErrorEvent *); b->tempImage = XShmCreateImage(xmb->xm_visual->display, xmb->xm_visual->visinfo->visual, @@ -192,41 +181,13 @@ alloc_shm_ximage(struct xm_buffer *b, struct xmesa_buffer *xmb, return; } - - /* Finally, try an XShmPutImage to be really sure the extension works */ -#if 0 - gc = XCreateGC(xmb->xm_visual->display, xmb->drawable, 0, NULL); - XShmPutImage(xmb->xm_visual->display, xmb->drawable, gc, - b->tempImage, 0, 0, 0, 0, 1, 1 /*one pixel*/, False); - XSync(xmb->xm_visual->display, False); - XFreeGC(xmb->xm_visual->display, gc); - (void) XSetErrorHandler(old_handler); - if (mesaXErrorFlag) { - XFlush(xmb->xm_visual->display); - mesaXErrorFlag = 0; - XDestroyImage(b->tempImage); - b->tempImage = NULL; - b->shm = 0; - return; - } -#endif + b->shm = 1; } -#else - -#define XSHM_ENABLED(b) 0 - -static void -alloc_shm_ximage(struct xm_buffer *b, struct xmesa_buffer *xmb, - unsigned width, unsigned height) -{ - b->shm = 0; -} #endif /* USE_XSHM */ - /* Most callbacks map direcly onto dri_bufmgr operations: */ static void * @@ -252,7 +213,7 @@ xm_buffer_destroy(struct pipe_winsys *pws, struct xm_buffer *oldBuf = xm_buffer(buf); if (oldBuf->data) { -#if defined(USE_XSHM) && !defined(XFree86Server) +#ifdef USE_XSHM if (oldBuf->shminfo.shmid >= 0) { shmdt(oldBuf->shminfo.shmaddr); shmctl(oldBuf->shminfo.shmid, IPC_RMID, 0); @@ -276,154 +237,47 @@ xm_buffer_destroy(struct pipe_winsys *pws, /** - * For Cell. Basically, rearrange the pixels/quads from this layout: - * +--+--+--+--+ - * |p0|p1|p2|p3|.... - * +--+--+--+--+ - * - * to this layout: - * +--+--+ - * |p0|p1|.... - * +--+--+ - * |p2|p3| - * +--+--+ - */ -static void -twiddle_tile(const uint *tileIn, uint *tileOut) -{ - int y, x; - - for (y = 0; y < TILE_SIZE; y+=2) { - for (x = 0; x < TILE_SIZE; x+=2) { - int k = 4 * (y/2 * TILE_SIZE/2 + x/2); - tileOut[y * TILE_SIZE + (x + 0)] = tileIn[k]; - tileOut[y * TILE_SIZE + (x + 1)] = tileIn[k+1]; - tileOut[(y + 1) * TILE_SIZE + (x + 0)] = tileIn[k+2]; - tileOut[(y + 1) * TILE_SIZE + (x + 1)] = tileIn[k+3]; - } - } -} - - - -/** - * Display a surface that's in a tiled configuration. That is, all the - * pixels for a TILE_SIZExTILE_SIZE block are contiguous in memory. - */ -static void -xmesa_display_surface_tiled(XMesaBuffer b, const struct pipe_surface *surf) -{ - XImage *ximage; - struct xm_buffer *xm_buf = xm_buffer(surf->buffer); - const uint tilesPerRow = (surf->width + TILE_SIZE - 1) / TILE_SIZE; - uint x, y; - - if (XSHM_ENABLED(xm_buf) && (xm_buf->tempImage == NULL)) { - alloc_shm_ximage(xm_buf, b, TILE_SIZE, TILE_SIZE); - } - - ximage = (XSHM_ENABLED(xm_buf)) ? xm_buf->tempImage : b->tempImage; - - /* check that the XImage has been previously initialized */ - assert(ximage->format); - assert(ximage->bitmap_unit); - - if (!XSHM_ENABLED(xm_buf)) { - /* update XImage's fields */ - ximage->width = TILE_SIZE; - ximage->height = TILE_SIZE; - ximage->bytes_per_line = TILE_SIZE * 4; - } - - for (y = 0; y < surf->height; y += TILE_SIZE) { - for (x = 0; x < surf->width; x += TILE_SIZE) { - uint tmpTile[TILE_SIZE * TILE_SIZE]; - int tx = x / TILE_SIZE; - int ty = y / TILE_SIZE; - int offset = ty * tilesPerRow + tx; - int w = TILE_SIZE; - int h = TILE_SIZE; - - if (y + h > surf->height) - h = surf->height - y; - if (x + w > surf->width) - w = surf->width - x; - - /* offset in pixels */ - offset *= TILE_SIZE * TILE_SIZE; - - if (XSHM_ENABLED(xm_buf)) { - ximage->data = (char *) xm_buf->data + 4 * offset; - /* make copy of tile data */ - memcpy(tmpTile, (uint *) ximage->data, sizeof(tmpTile)); - /* twiddle from temp to ximage in shared memory */ - twiddle_tile(tmpTile, (uint *) ximage->data); - /* display image in shared memory */ -#if defined(USE_XSHM) && !defined(XFree86Server) - XShmPutImage(b->xm_visual->display, b->drawable, b->gc, - ximage, 0, 0, x, y, w, h, False); -#endif - } - else { - /* twiddel from ximage buffer to temp tile */ - twiddle_tile((uint *) xm_buf->data + offset, tmpTile); - /* display temp tile data */ - ximage->data = (char *) tmpTile; - XPutImage(b->xm_visual->display, b->drawable, b->gc, - ximage, 0, 0, x, y, w, h); - } - } - } -} - - -/** * Display/copy the image in the surface into the X window specified * by the XMesaBuffer. */ -void -xmesa_display_surface(XMesaBuffer b, const struct pipe_surface *surf) +static void +xlib_softpipe_display_surface(struct xmesa_buffer *b, + struct pipe_surface *surf) { XImage *ximage; - struct xm_buffer *xm_buf = xm_buffer(surf->buffer); + struct softpipe_texture *spt = softpipe_texture(surf->texture); + struct xm_buffer *xm_buf = xm_buffer(spt->buffer); static boolean no_swap = 0; static boolean firsttime = 1; - static int tileSize = 0; if (firsttime) { no_swap = getenv("SP_NO_RAST") != NULL; -#ifdef GALLIUM_CELL - if (!getenv("GALLIUM_NOCELL")) { - tileSize = 32; /** probably temporary */ - } -#endif firsttime = 0; } if (no_swap) return; - if (tileSize) { - xmesa_display_surface_tiled(b, surf); - return; - } - +#ifdef USE_XSHM if (XSHM_ENABLED(xm_buf) && (xm_buf->tempImage == NULL)) { - assert(surf->block.width == 1); - assert(surf->block.height == 1); - alloc_shm_ximage(xm_buf, b, surf->stride/surf->block.size, surf->height); + assert(surf->texture->block.width == 1); + assert(surf->texture->block.height == 1); + alloc_shm_ximage(xm_buf, b, spt->stride[surf->level] / + surf->texture->block.size, surf->height); } +#endif ximage = (XSHM_ENABLED(xm_buf)) ? xm_buf->tempImage : b->tempImage; ximage->data = xm_buf->data; /* display image in Window */ +#ifdef USE_XSHM if (XSHM_ENABLED(xm_buf)) { -#if defined(USE_XSHM) && !defined(XFree86Server) XShmPutImage(b->xm_visual->display, b->drawable, b->gc, ximage, 0, 0, 0, 0, surf->width, surf->height, False); + } else #endif - } else { + { /* check that the XImage has been previously initialized */ assert(ximage->format); assert(ximage->bitmap_unit); @@ -431,7 +285,7 @@ xmesa_display_surface(XMesaBuffer b, const struct pipe_surface *surf) /* update XImage's fields */ ximage->width = surf->width; ximage->height = surf->height; - ximage->bytes_per_line = surf->stride; + ximage->bytes_per_line = spt->stride[surf->level]; XPutImage(b->xm_visual->display, b->drawable, b->gc, ximage, 0, 0, 0, 0, surf->width, surf->height); @@ -449,7 +303,7 @@ xm_flush_frontbuffer(struct pipe_winsys *pws, * This function copies that XImage to the actual X Window. */ XMesaContext xmctx = (XMesaContext) context_private; - xmesa_display_surface(xmctx->xm_buffer, surf); + xlib_softpipe_display_surface(xmctx->xm_buffer, surf); } @@ -468,17 +322,9 @@ xm_buffer_create(struct pipe_winsys *pws, unsigned size) { struct xm_buffer *buffer = CALLOC_STRUCT(xm_buffer); -#if defined(USE_XSHM) && !defined(XFree86Server) +#ifdef USE_XSHM struct xmesa_pipe_winsys *xpws = (struct xmesa_pipe_winsys *) pws; -#endif - - buffer->base.refcount = 1; - buffer->base.alignment = alignment; - buffer->base.usage = usage; - buffer->base.size = size; - -#if defined(USE_XSHM) && !defined(XFree86Server) buffer->shminfo.shmid = -1; buffer->shminfo.shmaddr = (char *) -1; @@ -487,13 +333,17 @@ xm_buffer_create(struct pipe_winsys *pws, if (alloc_shm(buffer, size)) { buffer->data = buffer->shminfo.shmaddr; + buffer->shm = 1; } } #endif - if (buffer->data == NULL) { - buffer->shm = 0; + buffer->base.refcount = 1; + buffer->base.alignment = alignment; + buffer->base.usage = usage; + buffer->base.size = size; + if (buffer->data == NULL) { /* align to 16-byte multiple for Cell */ buffer->data = align_malloc(size, max(alignment, 16)); } @@ -513,87 +363,33 @@ xm_user_buffer_create(struct pipe_winsys *pws, void *ptr, unsigned bytes) buffer->base.size = bytes; buffer->userBuffer = TRUE; buffer->data = ptr; +#ifdef USE_XSHM buffer->shm = 0; +#endif return &buffer->base; } - -/** - * Round n up to next multiple. - */ -static INLINE unsigned -round_up(unsigned n, unsigned multiple) -{ - return (n + multiple - 1) & ~(multiple - 1); -} - -static int -xm_surface_alloc_storage(struct pipe_winsys *winsys, - struct pipe_surface *surf, +static struct pipe_buffer * +xm_surface_buffer_create(struct pipe_winsys *winsys, unsigned width, unsigned height, - enum pipe_format format, - unsigned flags, - unsigned tex_usage) + enum pipe_format format, + unsigned usage, + unsigned *stride) { const unsigned alignment = 64; + struct pipe_format_block block; + unsigned nblocksx, nblocksy; - surf->width = width; - surf->height = height; - surf->format = format; - pf_get_block(format, &surf->block); - surf->nblocksx = pf_get_nblocksx(&surf->block, width); - surf->nblocksy = pf_get_nblocksy(&surf->block, height); - surf->stride = round_up(surf->nblocksx * surf->block.size, alignment); - surf->usage = flags; - - assert(!surf->buffer); - surf->buffer = winsys->buffer_create(winsys, alignment, - PIPE_BUFFER_USAGE_PIXEL, -#ifdef GALLIUM_CELL /* XXX a bit of a hack */ - surf->stride * round_up(surf->nblocksy, TILE_SIZE)); -#else - surf->stride * surf->nblocksy); -#endif - - if(!surf->buffer) - return -1; - - return 0; -} - - -/** - * Called via winsys->surface_alloc() to create new surfaces. - */ -static struct pipe_surface * -xm_surface_alloc(struct pipe_winsys *ws) -{ - struct pipe_surface *surface = CALLOC_STRUCT(pipe_surface); - - assert(ws); - - surface->refcount = 1; - surface->winsys = ws; - - return surface; -} - - + pf_get_block(format, &block); + nblocksx = pf_get_nblocksx(&block, width); + nblocksy = pf_get_nblocksy(&block, height); + *stride = align(nblocksx * block.size, alignment); -static void -xm_surface_release(struct pipe_winsys *winsys, struct pipe_surface **s) -{ - struct pipe_surface *surf = *s; - assert(!surf->texture); - surf->refcount--; - if (surf->refcount == 0) { - if (surf->buffer) - winsys_buffer_reference(winsys, &surf->buffer, NULL); - free(surf); - } - *s = NULL; + return winsys->buffer_create(winsys, alignment, + usage, + *stride * nblocksy); } @@ -625,34 +421,15 @@ xm_fence_finish(struct pipe_winsys *sws, struct pipe_fence_handle *fence, } -/** - * Return pointer to a pipe_winsys object. - * For Xlib, this is a singleton object. - * Nothing special for the Xlib driver so no subclassing or anything. - */ -struct pipe_winsys * -xmesa_get_pipe_winsys_aub(struct xmesa_visual *xm_vis) -{ - static struct xmesa_pipe_winsys *ws = NULL; - - if (!ws) { - ws = (struct xmesa_pipe_winsys *) xmesa_create_pipe_winsys_aub(); - } - return &ws->base; -} - static struct pipe_winsys * -xmesa_get_pipe_winsys(struct xmesa_visual *xm_vis) +xlib_create_softpipe_winsys( void ) { static struct xmesa_pipe_winsys *ws = NULL; if (!ws) { ws = CALLOC_STRUCT(xmesa_pipe_winsys); - ws->xm_visual = xm_vis; - ws->shm = xmesa_check_for_xshm(xm_vis->display); - /* Fill in this struct with callbacks that pipe will need to * communicate with the window system, buffer manager, etc. */ @@ -662,9 +439,7 @@ xmesa_get_pipe_winsys(struct xmesa_visual *xm_vis) ws->base.buffer_unmap = xm_buffer_unmap; ws->base.buffer_destroy = xm_buffer_destroy; - ws->base.surface_alloc = xm_surface_alloc; - ws->base.surface_alloc_storage = xm_surface_alloc_storage; - ws->base.surface_release = xm_surface_release; + ws->base.surface_buffer_create = xm_surface_buffer_create; ws->base.fence_reference = xm_fence_reference; ws->base.fence_signalled = xm_fence_signalled; @@ -678,42 +453,54 @@ xmesa_get_pipe_winsys(struct xmesa_visual *xm_vis) } -struct pipe_context * -xmesa_create_pipe_context(XMesaContext xmesa, uint pixelformat) +static struct pipe_screen * +xlib_create_softpipe_screen( void ) { - struct pipe_winsys *pws; - struct pipe_context *pipe; - - if (getenv("XM_AUB")) { - pws = xmesa_get_pipe_winsys_aub(xmesa->xm_visual); - } - else { - pws = xmesa_get_pipe_winsys(xmesa->xm_visual); - } + struct pipe_winsys *winsys; + struct pipe_screen *screen; -#ifdef GALLIUM_CELL - if (!getenv("GALLIUM_NOCELL")) { - struct cell_winsys *cws = cell_get_winsys(pixelformat); - struct pipe_screen *screen = cell_create_screen(pws); + winsys = xlib_create_softpipe_winsys(); + if (winsys == NULL) + return NULL; - pipe = cell_create_context(screen, cws); - } - else -#endif - { - struct pipe_screen *screen = softpipe_create_screen(pws); + screen = softpipe_create_screen(winsys); + if (screen == NULL) + goto fail; - pipe = softpipe_create(screen, pws, NULL); + return screen; -#ifdef GALLIUM_TRACE - screen = trace_screen_create(screen); - - pipe = trace_context_create(screen, pipe); -#endif - } +fail: + if (winsys) + winsys->destroy( winsys ); + + return NULL; +} - if (pipe) - pipe->priv = xmesa; +static struct pipe_context * +xlib_create_softpipe_context( struct pipe_screen *screen, + void *context_private ) +{ + struct pipe_context *pipe; + + pipe = softpipe_create(screen, screen->winsys, NULL); + if (pipe == NULL) + goto fail; + + pipe->priv = context_private; return pipe; + +fail: + /* Free stuff here */ + return NULL; } + +struct xm_driver xlib_softpipe_driver = +{ + .create_pipe_screen = xlib_create_softpipe_screen, + .create_pipe_context = xlib_create_softpipe_context, + .display_surface = xlib_softpipe_display_surface +}; + + + diff --git a/src/gallium/winsys/xlib/xlib_trace.c b/src/gallium/winsys/xlib/xlib_trace.c new file mode 100644 index 0000000000..37095c5d8e --- /dev/null +++ b/src/gallium/winsys/xlib/xlib_trace.c @@ -0,0 +1,107 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ + +/* + * Authors: + * Keith Whitwell + * Brian Paul + */ + + +#include "xlib.h" + +#include "trace/tr_screen.h" +#include "trace/tr_context.h" + +#include "pipe/p_screen.h" + + + +static struct pipe_screen * +xlib_create_trace_screen( void ) +{ + struct pipe_screen *screen, *trace_screen; + + screen = xlib_softpipe_driver.create_pipe_screen(); + if (screen == NULL) + goto fail; + + /* Wrap it: + */ + trace_screen = trace_screen_create(screen); + if (trace_screen == NULL) + goto fail; + + return trace_screen; + +fail: + if (screen) + screen->destroy( screen ); + return NULL; +} + +static struct pipe_context * +xlib_create_trace_context( struct pipe_screen *screen, + void *priv ) +{ + struct pipe_context *pipe, *trace_pipe; + + pipe = xlib_softpipe_driver.create_pipe_context( screen, priv ); + if (pipe == NULL) + goto fail; + + /* Wrap it: + */ + trace_pipe = trace_context_create(screen, pipe); + if (trace_pipe == NULL) + goto fail; + + trace_pipe->priv = priv; + + return trace_pipe; + +fail: + return NULL; +} + +static void +xlib_trace_display_surface( struct xmesa_buffer *buffer, + struct pipe_surface *surf ) +{ + /* ?? + */ + xlib_softpipe_driver.display_surface( buffer, surf ); +} + + +struct xm_driver xlib_trace_driver = +{ + .create_pipe_screen = xlib_create_trace_screen, + .create_pipe_context = xlib_create_trace_context, + .display_surface = xlib_trace_display_surface, +}; diff --git a/src/gallium/winsys/xlib/xm_image.c b/src/gallium/winsys/xlib/xm_image.c deleted file mode 100644 index 087b4e4c3a..0000000000 --- a/src/gallium/winsys/xlib/xm_image.c +++ /dev/null @@ -1,133 +0,0 @@ -/************************************************************************** - -Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas. -All Rights Reserved. - -Permission is hereby granted, free of charge, to any person obtaining a -copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sub license, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice (including the -next paragraph) shall be included in all copies or substantial portions -of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. -IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR -ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -**************************************************************************/ - -/* - * Authors: - * Kevin E. Martin <kevin@precisioninsight.com> - * Brian Paul <brian@precisioninsight.com> - */ - -#include <stdlib.h> -#include <X11/Xmd.h> - -#include "glxheader.h" -#include "xmesaP.h" - -#ifdef XFree86Server - -#ifdef ROUNDUP -#undef ROUNDUP -#endif - -#define ROUNDUP(nbytes, pad) ((((nbytes) + ((pad)-1)) / (pad)) * ((pad)>>3)) - -XMesaImage *XMesaCreateImage(int bitsPerPixel, int width, int height, char *data) -{ - XMesaImage *image; - - image = (XMesaImage *)xalloc(sizeof(XMesaImage)); - - if (image) { - image->width = width; - image->height = height; - image->data = data; - /* Always pad to 32 bits */ - image->bytes_per_line = ROUNDUP((bitsPerPixel * width), 32); - image->bits_per_pixel = bitsPerPixel; - } - - return image; -} - -void XMesaDestroyImage(XMesaImage *image) -{ - if (image->data) - free(image->data); - xfree(image); -} - -unsigned long XMesaGetPixel(XMesaImage *image, int x, int y) -{ - CARD8 *row = (CARD8 *)(image->data + y*image->bytes_per_line); - CARD8 *i8; - CARD16 *i16; - CARD32 *i32; - switch (image->bits_per_pixel) { - case 8: - i8 = (CARD8 *)row; - return i8[x]; - break; - case 15: - case 16: - i16 = (CARD16 *)row; - return i16[x]; - break; - case 24: /* WARNING: architecture specific code */ - i8 = (CARD8 *)row; - return (((CARD32)i8[x*3]) | - (((CARD32)i8[x*3+1])<<8) | - (((CARD32)i8[x*3+2])<<16)); - break; - case 32: - i32 = (CARD32 *)row; - return i32[x]; - break; - } - return 0; -} - -#ifndef XMESA_USE_PUTPIXEL_MACRO -void XMesaPutPixel(XMesaImage *image, int x, int y, unsigned long pixel) -{ - CARD8 *row = (CARD8 *)(image->data + y*image->bytes_per_line); - CARD8 *i8; - CARD16 *i16; - CARD32 *i32; - switch (image->bits_per_pixel) { - case 8: - i8 = (CARD8 *)row; - i8[x] = (CARD8)pixel; - break; - case 15: - case 16: - i16 = (CARD16 *)row; - i16[x] = (CARD16)pixel; - break; - case 24: /* WARNING: architecture specific code */ - i8 = (CARD8 *)__row; - i8[x*3] = (CARD8)(p); - i8[x*3+1] = (CARD8)(p>>8); - i8[x*3+2] = (CARD8)(p>>16); - case 32: - i32 = (CARD32 *)row; - i32[x] = (CARD32)pixel; - break; - } -} -#endif - -#endif /* XFree86Server */ diff --git a/src/gallium/winsys/xlib/xm_image.h b/src/gallium/winsys/xlib/xm_image.h deleted file mode 100644 index 2a5e0f3777..0000000000 --- a/src/gallium/winsys/xlib/xm_image.h +++ /dev/null @@ -1,77 +0,0 @@ -/************************************************************************** - -Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas. -All Rights Reserved. - -Permission is hereby granted, free of charge, to any person obtaining a -copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sub license, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice (including the -next paragraph) shall be included in all copies or substantial portions -of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. -IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR -ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - -**************************************************************************/ - -/* - * Authors: - * Kevin E. Martin <kevin@precisioninsight.com> - * Brian Paul <brian@precisioninsight.com> - */ - -#ifndef _XM_IMAGE_H_ -#define _XM_IMAGE_H_ - -#define XMESA_USE_PUTPIXEL_MACRO - -extern XMesaImage *XMesaCreateImage(int bitsPerPixel, int width, int height, - char *data); -extern void XMesaDestroyImage(XMesaImage *image); -extern unsigned long XMesaGetPixel(XMesaImage *image, int x, int y); -#ifdef XMESA_USE_PUTPIXEL_MACRO -#define XMesaPutPixel(__i,__x,__y,__p) \ -{ \ - CARD8 *__row = (CARD8 *)(__i->data + __y*__i->bytes_per_line); \ - CARD8 *__i8; \ - CARD16 *__i16; \ - CARD32 *__i32; \ - switch (__i->bits_per_pixel) { \ - case 8: \ - __i8 = (CARD8 *)__row; \ - __i8[__x] = (CARD8)__p; \ - break; \ - case 15: \ - case 16: \ - __i16 = (CARD16 *)__row; \ - __i16[__x] = (CARD16)__p; \ - break; \ - case 24: /* WARNING: architecture specific code */ \ - __i8 = (CARD8 *)__row; \ - __i8[__x*3] = (CARD8)(__p); \ - __i8[__x*3+1] = (CARD8)(__p>>8); \ - __i8[__x*3+2] = (CARD8)(__p>>16); \ - break; \ - case 32: \ - __i32 = (CARD32 *)__row; \ - __i32[__x] = (CARD32)__p; \ - break; \ - } \ -} -#else -extern void XMesaPutPixel(XMesaImage *image, int x, int y, - unsigned long pixel); -#endif - -#endif /* _XM_IMAGE_H_ */ diff --git a/src/gallium/winsys/xlib/xmesa.h b/src/gallium/winsys/xlib/xmesa.h new file mode 100644 index 0000000000..98139af833 --- /dev/null +++ b/src/gallium/winsys/xlib/xmesa.h @@ -0,0 +1,424 @@ +/* + * Mesa 3-D graphics library + * Version: 7.1 + * + * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +/* + * Mesa/X11 interface. This header file serves as the documentation for + * the Mesa/X11 interface functions. + * + * Note: this interface isn't intended for user programs. It's primarily + * just for implementing the pseudo-GLX interface. + */ + + +/* Sample Usage: + +In addition to the usual X calls to select a visual, create a colormap +and create a window, you must do the following to use the X/Mesa interface: + +1. Call XMesaCreateVisual() to make an XMesaVisual from an XVisualInfo. + +2. Call XMesaCreateContext() to create an X/Mesa rendering context, given + the XMesaVisual. + +3. Call XMesaCreateWindowBuffer() to create an XMesaBuffer from an X window + and XMesaVisual. + +4. Call XMesaMakeCurrent() to bind the XMesaBuffer to an XMesaContext and + to make the context the current one. + +5. Make gl* calls to render your graphics. + +6. Use XMesaSwapBuffers() when double buffering to swap front/back buffers. + +7. Before the X window is destroyed, call XMesaDestroyBuffer(). + +8. Before exiting, call XMesaDestroyVisual and XMesaDestroyContext. + +*/ + + + + +#ifndef XMESA_H +#define XMESA_H + +#ifdef __VMS +#include <GL/vms_x_fix.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef XFree86Server +#include "xmesa_xf86.h" +#else +#include <X11/Xlib.h> +#include <X11/Xutil.h> +#include "xmesa_x.h" +#endif +#include "GL/gl.h" + +#ifdef AMIWIN +#include <pragmas/xlib_pragmas.h> +extern struct Library *XLibBase; +#endif + + +#define XMESA_MAJOR_VERSION 6 +#define XMESA_MINOR_VERSION 3 + + + +/* + * Values passed to XMesaGetString: + */ +#define XMESA_VERSION 1 +#define XMESA_EXTENSIONS 2 + + +/* + * Values passed to XMesaSetFXmode: + */ +#define XMESA_FX_WINDOW 1 +#define XMESA_FX_FULLSCREEN 2 + + + +typedef struct xmesa_context *XMesaContext; + +typedef struct xmesa_visual *XMesaVisual; + +typedef struct xmesa_buffer *XMesaBuffer; + + + +/* + * Create a new X/Mesa visual. + * Input: display - X11 display + * visinfo - an XVisualInfo pointer + * rgb_flag - GL_TRUE = RGB mode, + * GL_FALSE = color index mode + * alpha_flag - alpha buffer requested? + * db_flag - GL_TRUE = double-buffered, + * GL_FALSE = single buffered + * stereo_flag - stereo visual? + * ximage_flag - GL_TRUE = use an XImage for back buffer, + * GL_FALSE = use an off-screen pixmap for back buffer + * depth_size - requested bits/depth values, or zero + * stencil_size - requested bits/stencil values, or zero + * accum_red_size - requested bits/red accum values, or zero + * accum_green_size - requested bits/green accum values, or zero + * accum_blue_size - requested bits/blue accum values, or zero + * accum_alpha_size - requested bits/alpha accum values, or zero + * num_samples - number of samples/pixel if multisampling, or zero + * level - visual level, usually 0 + * visualCaveat - ala the GLX extension, usually GLX_NONE_EXT + * Return; a new XMesaVisual or 0 if error. + */ +extern XMesaVisual XMesaCreateVisual( XMesaDisplay *display, + XMesaVisualInfo visinfo, + GLboolean rgb_flag, + GLboolean alpha_flag, + GLboolean db_flag, + GLboolean stereo_flag, + GLboolean ximage_flag, + GLint depth_size, + GLint stencil_size, + GLint accum_red_size, + GLint accum_green_size, + GLint accum_blue_size, + GLint accum_alpha_size, + GLint num_samples, + GLint level, + GLint visualCaveat ); + +/* + * Destroy an XMesaVisual, but not the associated XVisualInfo. + */ +extern void XMesaDestroyVisual( XMesaVisual v ); + + + +/* + * Create a new XMesaContext for rendering into an X11 window. + * + * Input: visual - an XMesaVisual + * share_list - another XMesaContext with which to share display + * lists or NULL if no sharing is wanted. + * Return: an XMesaContext or NULL if error. + */ +extern XMesaContext XMesaCreateContext( XMesaVisual v, + XMesaContext share_list ); + + +/* + * Destroy a rendering context as returned by XMesaCreateContext() + */ +extern void XMesaDestroyContext( XMesaContext c ); + + +#ifdef XFree86Server +/* + * These are the extra routines required for integration with XFree86. + * None of these routines should be user visible. -KEM + */ +extern GLboolean XMesaForceCurrent( XMesaContext c ); + +extern GLboolean XMesaLoseCurrent( XMesaContext c ); + +extern GLboolean XMesaCopyContext( XMesaContext src, + XMesaContext dst, + GLuint mask ); +#endif /* XFree86Server */ + + +/* + * Create an XMesaBuffer from an X window. + */ +extern XMesaBuffer XMesaCreateWindowBuffer( XMesaVisual v, XMesaWindow w ); + + +/* + * Create an XMesaBuffer from an X pixmap. + */ +extern XMesaBuffer XMesaCreatePixmapBuffer( XMesaVisual v, + XMesaPixmap p, + XMesaColormap cmap ); + + +/* + * Destroy an XMesaBuffer, but not the corresponding window or pixmap. + */ +extern void XMesaDestroyBuffer( XMesaBuffer b ); + + +/* + * Return the XMesaBuffer handle which corresponds to an X drawable, if any. + * + * New in Mesa 2.3. + */ +extern XMesaBuffer XMesaFindBuffer( XMesaDisplay *dpy, + XMesaDrawable d ); + + + +/* + * Bind a buffer to a context and make the context the current one. + */ +extern GLboolean XMesaMakeCurrent( XMesaContext c, + XMesaBuffer b ); + + +/* + * Bind two buffers (read and draw) to a context and make the + * context the current one. + * New in Mesa 3.3 + */ +extern GLboolean XMesaMakeCurrent2( XMesaContext c, + XMesaBuffer drawBuffer, + XMesaBuffer readBuffer ); + + +/* + * Unbind the current context from its buffer. + */ +extern GLboolean XMesaUnbindContext( XMesaContext c ); + + +/* + * Return a handle to the current context. + */ +extern XMesaContext XMesaGetCurrentContext( void ); + + +/* + * Return handle to the current (draw) buffer. + */ +extern XMesaBuffer XMesaGetCurrentBuffer( void ); + + +/* + * Return handle to the current read buffer. + * New in Mesa 3.3 + */ +extern XMesaBuffer XMesaGetCurrentReadBuffer( void ); + + +/* + * Swap the front and back buffers for the given buffer. No action is + * taken if the buffer is not double buffered. + */ +extern void XMesaSwapBuffers( XMesaBuffer b ); + + +/* + * Copy a sub-region of the back buffer to the front buffer. + * + * New in Mesa 2.6 + */ +extern void XMesaCopySubBuffer( XMesaBuffer b, + int x, + int y, + int width, + int height ); + + +/* + * Return a pointer to the the Pixmap or XImage being used as the back + * color buffer of an XMesaBuffer. This function is a way to get "under + * the hood" of X/Mesa so one can manipulate the back buffer directly. + * Input: b - the XMesaBuffer + * Output: pixmap - pointer to back buffer's Pixmap, or 0 + * ximage - pointer to back buffer's XImage, or NULL + * Return: GL_TRUE = context is double buffered + * GL_FALSE = context is single buffered + */ +extern GLboolean XMesaGetBackBuffer( XMesaBuffer b, + XMesaPixmap *pixmap, + XMesaImage **ximage ); + + + +/* + * Return the depth buffer associated with an XMesaBuffer. + * Input: b - the XMesa buffer handle + * Output: width, height - size of buffer in pixels + * bytesPerValue - bytes per depth value (2 or 4) + * buffer - pointer to depth buffer values + * Return: GL_TRUE or GL_FALSE to indicate success or failure. + * + * New in Mesa 2.4. + */ +extern GLboolean XMesaGetDepthBuffer( XMesaBuffer b, + GLint *width, + GLint *height, + GLint *bytesPerValue, + void **buffer ); + + + +/* + * Flush/sync a context + */ +extern void XMesaFlush( XMesaContext c ); + + + +/* + * Get an X/Mesa-specific string. + * Input: name - either XMESA_VERSION or XMESA_EXTENSIONS + */ +extern const char *XMesaGetString( XMesaContext c, int name ); + + + +/* + * Scan for XMesaBuffers whose window/pixmap has been destroyed, then free + * any memory used by that buffer. + * + * New in Mesa 2.3. + */ +extern void XMesaGarbageCollect( void ); + + + +/* + * Return a dithered pixel value. + * Input: c - XMesaContext + * x, y - window coordinate + * red, green, blue, alpha - color components in [0,1] + * Return: pixel value + * + * New in Mesa 2.3. + */ +extern unsigned long XMesaDitherColor( XMesaContext xmesa, + GLint x, + GLint y, + GLfloat red, + GLfloat green, + GLfloat blue, + GLfloat alpha ); + + + +/* + * 3Dfx Glide driver only! + * Set 3Dfx/Glide full-screen or window rendering mode. + * Input: mode - either XMESA_FX_WINDOW (window rendering mode) or + * XMESA_FX_FULLSCREEN (full-screen rendering mode) + * Return: GL_TRUE if success + * GL_FALSE if invalid mode or if not using 3Dfx driver + * + * New in Mesa 2.6. + */ +extern GLboolean XMesaSetFXmode( GLint mode ); + + + +/* + * Reallocate the back/depth/stencil/accum/etc/ buffers associated with + * buffer <b> if its size has changed. + * + * New in Mesa 4.0.2 + */ +extern void XMesaResizeBuffers( XMesaBuffer b ); + + + +/* + * Create a pbuffer. + * New in Mesa 4.1 + */ +extern XMesaBuffer XMesaCreatePBuffer(XMesaVisual v, XMesaColormap cmap, + unsigned int width, unsigned int height); + + + +/* + * Texture from Pixmap + * New in Mesa 7.1 + */ +extern void +XMesaBindTexImage(XMesaDisplay *dpy, XMesaBuffer drawable, int buffer, + const int *attrib_list); + +extern void +XMesaReleaseTexImage(XMesaDisplay *dpy, XMesaBuffer drawable, int buffer); + + +extern XMesaBuffer +XMesaCreatePixmapTextureBuffer(XMesaVisual v, XMesaPixmap p, + XMesaColormap cmap, + int format, int target, int mipmap); + + + +#ifdef __cplusplus +} +#endif + + +#endif diff --git a/src/gallium/winsys/xlib/xmesaP.h b/src/gallium/winsys/xlib/xmesaP.h deleted file mode 100644 index fcaeee52bc..0000000000 --- a/src/gallium/winsys/xlib/xmesaP.h +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Mesa 3-D graphics library - * Version: 7.1 - * - * Copyright (C) 1999-2007 Brian Paul All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN - * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - - -#ifndef XMESAP_H -#define XMESAP_H - - -#include "GL/xmesa.h" -#include "mtypes.h" -#ifdef XFree86Server -#include "xm_image.h" -#endif - -#include "state_tracker/st_context.h" -#include "state_tracker/st_public.h" -#include "pipe/p_thread.h" - - -extern pipe_mutex _xmesa_lock; - -extern XMesaBuffer XMesaBufferList; - -/* - */ -#define XMESA_SOFTPIPE 1 -#define XMESA_AUB 2 -extern int xmesa_mode; - - -/** - * Visual inforation, derived from GLvisual. - * Basically corresponds to an XVisualInfo. - */ -struct xmesa_visual { - GLvisual mesa_visual; /* Device independent visual parameters */ - XMesaDisplay *display; /* The X11 display */ -#ifdef XFree86Server - GLint ColormapEntries; - GLint nplanes; -#else - XMesaVisualInfo visinfo; /* X's visual info (pointer to private copy) */ - XVisualInfo *vishandle; /* Only used in fakeglx.c */ -#endif - GLint BitsPerPixel; /* True bits per pixel for XImages */ - - GLboolean ximage_flag; /* Use XImage for back buffer (not pixmap)? */ -}; - - -/** - * Context info, derived from st_context. - * Basically corresponds to a GLXContext. - */ -struct xmesa_context { - struct st_context *st; - XMesaVisual xm_visual; /** pixel format info */ - XMesaBuffer xm_buffer; /** current drawbuffer */ -}; - - -/** - * Types of X/GLX drawables we might render into. - */ -typedef enum { - WINDOW, /* An X window */ - GLXWINDOW, /* GLX window */ - PIXMAP, /* GLX pixmap */ - PBUFFER /* GLX Pbuffer */ -} BufferType; - - -/** - * Framebuffer information, derived from. - * Basically corresponds to a GLXDrawable. - */ -struct xmesa_buffer { - struct st_framebuffer *stfb; - - GLboolean wasCurrent; /* was ever the current buffer? */ - XMesaVisual xm_visual; /* the X/Mesa visual */ - XMesaDrawable drawable; /* Usually the X window ID */ - XMesaColormap cmap; /* the X colormap */ - BufferType type; /* window, pixmap, pbuffer or glxwindow */ - - XMesaImage *tempImage; - unsigned long selectedEvents;/* for pbuffers only */ - - GLuint shm; /* X Shared Memory extension status: */ - /* 0 = not available */ - /* 1 = XImage support available */ - /* 2 = Pixmap support available too */ -#if defined(USE_XSHM) && !defined(XFree86Server) - XShmSegmentInfo shminfo; -#endif - - XMesaGC gc; /* scratch GC for span, line, tri drawing */ - - /* GLX_EXT_texture_from_pixmap */ - GLint TextureTarget; /** GLX_TEXTURE_1D_EXT, for example */ - GLint TextureFormat; /** GLX_TEXTURE_FORMAT_RGB_EXT, for example */ - GLint TextureMipmap; /** 0 or 1 */ - - struct xmesa_buffer *Next; /* Linked list pointer: */ -}; - - - -/** cast wrapper */ -static INLINE XMesaContext -xmesa_context(GLcontext *ctx) -{ - return (XMesaContext) ctx->DriverCtx; -} - - -/** cast wrapper */ -static INLINE XMesaBuffer -xmesa_buffer(GLframebuffer *fb) -{ - struct st_framebuffer *stfb = (struct st_framebuffer *) fb; - return (XMesaBuffer) st_framebuffer_private(stfb); -} - - -extern void -xmesa_delete_framebuffer(struct gl_framebuffer *fb); - -extern XMesaBuffer -xmesa_find_buffer(XMesaDisplay *dpy, XMesaColormap cmap, XMesaBuffer notThis); - -extern void -xmesa_check_and_update_buffer_size(XMesaContext xmctx, XMesaBuffer drawBuffer); - -extern void -xmesa_destroy_buffers_on_display(XMesaDisplay *dpy); - -extern struct pipe_context * -xmesa_create_pipe_context(XMesaContext xm, uint pixelformat); - -static INLINE GLuint -xmesa_buffer_width(XMesaBuffer b) -{ - return b->stfb->Base.Width; -} - -static INLINE GLuint -xmesa_buffer_height(XMesaBuffer b) -{ - return b->stfb->Base.Height; -} - -extern void -xmesa_display_surface(XMesaBuffer b, const struct pipe_surface *surf); - -extern int -xmesa_check_for_xshm(XMesaDisplay *display); - -#endif diff --git a/src/gallium/winsys/xlib/xmesa_x.h b/src/gallium/winsys/xlib/xmesa_x.h new file mode 100644 index 0000000000..865bab4313 --- /dev/null +++ b/src/gallium/winsys/xlib/xmesa_x.h @@ -0,0 +1,86 @@ + +/************************************************************************** + +Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas. +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sub license, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. +IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR +ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Kevin E. Martin <kevin@precisioninsight.com> + * + * When we're building the XMesa driver for stand-alone Mesa we + * include this file when building the xm_*.c files. + * We need to define some types and macros differently when building + * in the Xserver vs. stand-alone Mesa. + */ + +#ifndef _XMESA_X_H_ +#define _XMESA_X_H_ + +typedef Display XMesaDisplay; +typedef Pixmap XMesaPixmap; +typedef Colormap XMesaColormap; +typedef Drawable XMesaDrawable; +typedef Window XMesaWindow; +typedef GC XMesaGC; +typedef XVisualInfo *XMesaVisualInfo; +typedef XImage XMesaImage; +typedef XPoint XMesaPoint; +typedef XColor XMesaColor; + +#define XMesaDestroyImage XDestroyImage + +#define XMesaPutPixel XPutPixel +#define XMesaGetPixel XGetPixel + +#define XMesaSetForeground XSetForeground +#define XMesaSetBackground XSetBackground +#define XMesaSetPlaneMask XSetPlaneMask +#define XMesaSetFunction XSetFunction +#define XMesaSetFillStyle XSetFillStyle +#define XMesaSetTile XSetTile + +#define XMesaDrawPoint XDrawPoint +#define XMesaDrawPoints XDrawPoints +#define XMesaDrawLine XDrawLine +#define XMesaFillRectangle XFillRectangle +#define XMesaGetImage XGetImage +#define XMesaPutImage XPutImage +#define XMesaCopyArea XCopyArea + +#define XMesaCreatePixmap XCreatePixmap +#define XMesaFreePixmap XFreePixmap +#define XMesaFreeGC XFreeGC + +#define GET_COLORMAP_SIZE(__v) __v->visinfo->colormap_size +#define GET_REDMASK(__v) __v->mesa_visual.redMask +#define GET_GREENMASK(__v) __v->mesa_visual.greenMask +#define GET_BLUEMASK(__v) __v->mesa_visual.blueMask +#define GET_VISUAL_DEPTH(__v) __v->visinfo->depth +#define GET_BLACK_PIXEL(__v) BlackPixel(__v->display, __v->mesa_visual.screen) +#define CHECK_BYTE_ORDER(__v) host_byte_order()==ImageByteOrder(__v->display) +#define CHECK_FOR_HPCR(__v) XInternAtom(__v->display, "_HP_RGB_SMOOTH_MAP_LIST", True) + +#endif |